ff8c39f2e51611efac92c4447d93fe45/0000775000175400017540000000000014751343160015441 5ustar jenkinsjenkinsff8c39f2e51611efac92c4447d93fe45/MindSpore_Gitee_Gate_377046_ff8c39f2e51611efac92c4447d93fe45.csv0000644000175400017540000000724214751343160027531 0ustar jenkinsjenkinsEnvironment IP,Env Type,Device ID,Case Name,Module,Sequence,Result,TimeCost,Start Time,End time,Case Level 8_92_9_73,ASCEND_ARM_EULEROS_overall_networks,Single,test_compile_cache,test_compile_cache_pipeline_parallel_and_recompute,0,pass,139.466,2025-02-07 13:49:59,2025-02-07 13:49:59,level0 8_92_9_73,ASCEND_ARM_EULEROS_overall_networks,Single,test_all,test_hccl_send_receive,1,pass,76.865,2025-02-07 13:52:20,2025-02-07 13:52:20,level0 8_92_9_73,ASCEND_ARM_EULEROS_overall_networks,Single,test_parameter_broadcast,test_parameter_broadcast,2,pass,70.785,2025-02-07 13:53:37,2025-02-07 13:53:37,level0 8_92_9_73,ASCEND_ARM_EULEROS_overall_networks,Single,test_dump_hccl,test_dump_hccl,3,pass,49.012,2025-02-07 13:54:49,2025-02-07 13:54:49,level0 8_92_9_73,ASCEND_ARM_EULEROS_overall_networks,Single,test_pynative_resnet50_ascend_8p,test_pynative_resnet50_ascend_8p_mpi,4,pass,50.903,2025-02-07 13:55:39,2025-02-07 13:55:39,level0 8_92_9_73,ASCEND_ARM_EULEROS_overall_networks,Single,test_remove_redundancy,test_load_remove_redundancy_error,5,pass,23.895,2025-02-07 13:56:31,2025-02-07 13:56:31,level0 8_92_9_85,ASCEND_ARM_EULEROS_overall_networks,Single,test_remove_redundancy,test_no_init_parameters_without_load_param,0,pass,140.422,2025-02-07 15:51:02,2025-02-07 15:51:02,level0 8_92_9_85,ASCEND_ARM_EULEROS_overall_networks,Single,test_remove_redundancy,test_remove_redundancy_1_1,1,pass,94.872,2025-02-07 15:53:24,2025-02-07 15:53:24,level0 8_92_9_85,ASCEND_ARM_EULEROS_overall_networks,Single,test_all,test_hccl_all_to_all_v,2,pass,83.199,2025-02-07 15:55:00,2025-02-07 15:55:00,level0 8_92_9_85,ASCEND_ARM_EULEROS_overall_networks,Single,test_all,test_hccl_broadcast,3,pass,78.509,2025-02-07 15:56:24,2025-02-07 15:56:24,level0 8_92_9_85,ASCEND_ARM_EULEROS_overall_networks,Single,test_parallel_complex_input,test_graph_mode_parallel_complex_input,4,pass,50.743,2025-02-07 15:57:44,2025-02-07 15:57:44,level0 8_92_9_85,ASCEND_ARM_EULEROS_overall_networks,Single,test_entry_msrun,test_msrun_with_correct_hostname,5,pass,52.044,2025-02-07 15:58:36,2025-02-07 15:58:36,level0 8_92_9_85,ASCEND_ARM_EULEROS_overall_networks,Single,test_all,test_hccl_reduce,6,pass,43.000,2025-02-07 15:59:29,2025-02-07 15:59:29,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_remove_redundancy,test_get_strategy_redundancy,0,pass,147.865,2025-02-07 15:47:18,2025-02-07 15:47:18,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_remove_redundancy,test_remove_redundancy_1_1_dp,1,pass,90.521,2025-02-07 15:49:47,2025-02-07 15:49:47,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_entry_full_ps_lenet,test_full_ps_lenet_ascend,2,pass,79.066,2025-02-07 15:51:19,2025-02-07 15:51:19,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_all,test_hccl_gather_into_tensor,3,pass,78.795,2025-02-07 15:52:39,2025-02-07 15:52:39,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_entry_msrun,test_msrun,4,pass,68.135,2025-02-07 15:53:59,2025-02-07 15:53:59,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_deterministic,test_deterministic_allreduce,5,pass,49.360,2025-02-07 15:55:08,2025-02-07 15:55:08,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_mc2_all2all,test_mc2_alltoall_allgather_batchmatmul_withoutsilu,6,pass,6.622,2025-02-07 15:55:58,2025-02-07 15:55:58,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_mc2_all2all,test_mc2_alltoall_allgather_batchmatmul_withsilu,7,pass,6.614,2025-02-07 15:56:06,2025-02-07 15:56:06,level0 8_92_9_86,ASCEND_ARM_EULEROS_overall_networks,Single,test_compile_cache,test_compile_cache_pipeline_parallel_and_recompute_o2,8,fail,26.538,2025-02-07 15:56:14,2025-02-07 15:56:14,level0 ff8c39f2e51611efac92c4447d93fe45/failed/0000775000175400017540000000000014751343160016665 5ustar jenkinsjenkins././@LongLink0000644000000000000000000000016500000000000011605 Lustar rootrootff8c39f2e51611efac92c4447d93fe45/failed/test_compile_cache_test_compile_cache_pipeline_parallel_and_recompute_o2.logff8c39f2e51611efac92c4447d93fe45/failed/test_compile_cache_test_compile_cache_pipeline_parallel_and_0000644000175400017540000003123314751343160033036 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/compiler/compile_cache, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_compile_cache.py [WARNING] ME(64147:281472911641616,MainProcess):2025-02-07-15:56:25.118.855 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(64147:281472911641616,MainProcess):2025-02-07-15:56:25.212.272 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(64147:281472911641616,MainProcess):2025-02-07-15:56:25.312.104 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(64147:281472911641616,MainProcess):2025-02-07-15:56:25.418.136 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. [WARNING] ME(64147:281472911641616,MainProcess):2025-02-07-15:56:25.527.188 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:worker_4.log. Environment variable [RANK_ID] is exported. [WARNING] ME(64147:281472911641616,MainProcess):2025-02-07-15:56:25.637.414 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:worker_5.log. Environment variable [RANK_ID] is exported. [WARNING] ME(64147:281472911641616,MainProcess):2025-02-07-15:56:25.748.536 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:worker_6.log. Environment variable [RANK_ID] is exported. [WARNING] ME(64147:281472911641616,MainProcess):2025-02-07-15:56:25.866.679 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:worker_7.log. Environment variable [RANK_ID] is exported. check first train. check cache file. F =================================== FAILURES =================================== ____________ test_compile_cache_pipeline_parallel_and_recompute_o2 _____________ @arg_mark(plat_marks=['platform_ascend'], level_mark='level0', card_mark='allcards', essential_mark='essential') def test_compile_cache_pipeline_parallel_and_recompute_o2(): """ Feature: Compile cache. Description: Test whether pipeline parallel and recompute can successfullty with compile cache. Expectation: success. """ run_compile_cache_mp("run_compile_cache_mp_o2.py", "./pp_recompute_o2", "pp_recompute_o2_first", > "pp_recompute_o2_second") test_compile_cache.py:487: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ file_name = 'run_compile_cache_mp_o2.py', cache_path = './pp_recompute_o2' log_file_name_first = 'pp_recompute_o2_first' log_file_name_second = 'pp_recompute_o2_second' def run_compile_cache_mp(file_name, cache_path, log_file_name_first, log_file_name_second): # Clear compile cache folder and log files if os.path.exists(cache_path): shutil.rmtree(cache_path) assert not os.path.exists(cache_path) # First run without compile cache cmd = "bash run_compile_cache_mp.sh {} {} {}".format(file_name, cache_path, log_file_name_first) os.system(cmd) check_cmd = "ps -ef | grep python | grep {} | grep -v grep".format(file_name) # wait for net train finish ret = utils.process_check(150, check_cmd) print("check first train.", flush=True) assert ret print("check cache file.", flush=True) > assert os.path.exists(cache_path) E AssertionError: assert False E + where False = ('./pp_recompute_o2') E + where = .exists E + where = os.path test_compile_cache.py:154: AssertionError =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html =========================== short test summary info ============================ FAILED test_compile_cache.py::test_compile_cache_pipeline_parallel_and_recompute_o2 ======================= 1 failed, 18 warnings in 18.50s ======================== ff8c39f2e51611efac92c4447d93fe45/pass/0000775000175400017540000000000014751343160016407 5ustar jenkinsjenkinsff8c39f2e51611efac92c4447d93fe45/pass/test_all_test_hccl_reduce.log0000644000175400017540000027761414751343157024325 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collected 1 item test_all.py ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... [WARNING] ME(195024:281472828857360,MainProcess):2025-02-07-15:59:40.898.331 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(195023:281473161264144,MainProcess):2025-02-07-15:59:40.922.994 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(195025:281473688149008,MainProcess):2025-02-07-15:59:40.965.420 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(195027:281473196641296,MainProcess):2025-02-07-15:59:40.973.800 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(195026:281473222810640,MainProcess):2025-02-07-15:59:41.129.61 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(195029:281472984869904,MainProcess):2025-02-07-15:59:41.314.41 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(195028:281473713429520,MainProcess):2025-02-07-15:59:41.149.060 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(195030:281473065098256,MainProcess):2025-02-07-15:59:41.179.013 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] DISTRIBUTED(195024,ffff7ffa5c10,python3.7):2025-02-07-15:59:46.851.698 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(195023,ffff93ca7c10,python3.7):2025-02-07-15:59:47.703.460 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(195028,ffffb4b3dc10,python3.7):2025-02-07-15:59:47.728.609 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(195027,ffff95e64c10,python3.7):2025-02-07-15:59:47.752.489 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(195026,ffff97759c10,python3.7):2025-02-07-15:59:47.754.721 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(195029,ffff8946ec10,python3.7):2025-02-07-15:59:47.764.147 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(195025,ffffb3321c10,python3.7):2025-02-07-15:59:47.771.329 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(195030,ffff8e0f1c10,python3.7):2025-02-07-15:59:47.834.510 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(195024,fffe85ffb0f0,python3.7):2025-02-07-15:59:47.840.994 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(195023,fffe99ffb0f0,python3.7):2025-02-07-15:59:47.841.014 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(195025,fffec0ff90f0,python3.7):2025-02-07-15:59:47.841.044 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(195030,fffe8b7fe0f0,python3.7):2025-02-07-15:59:47.841.117 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(195028,fffeba7fc0f0,python3.7):2025-02-07-15:59:47.841.103 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(195029,fffe86ffd0f0,python3.7):2025-02-07-15:59:47.841.096 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(195026,fffea57fa0f0,python3.7):2025-02-07-15:59:47.841.117 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(195027,fffe92ffd0f0,python3.7):2025-02-07-15:59:47.841.127 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(195024,fffe857fa0f0,python3.7):2025-02-07-15:59:47.841.239 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(195023,fffe777fe0f0,python3.7):2025-02-07-15:59:47.841.239 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(195025,fffe9ffff0f0,python3.7):2025-02-07-15:59:47.841.389 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(195030,fffe8affd0f0,python3.7):2025-02-07-15:59:47.841.431 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(195029,fffe867fc0f0,python3.7):2025-02-07-15:59:47.841.437 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(195027,fffe927fc0f0,python3.7):2025-02-07-15:59:47.841.481 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(195028,fffeb9ffb0f0,python3.7):2025-02-07-15:59:47.841.469 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(195026,fffea4ff90f0,python3.7):2025-02-07-15:59:47.841.516 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(195023,fffe777fe0f0,python3.7):2025-02-07-15:59:48.137.373 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(195023,fffe99ffb0f0,python3.7):2025-02-07-15:59:48.137.550 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group  collecting 3 items  collected 3 items  test_reduce.py [WARNING] PROFILER(195023,fffe767fc0f0,python3.7):2025-02-07-15:59:48.171.657 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] DEVICE(195026,fffea4ff90f0,python3.7):2025-02-07-15:59:48.238.663 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(195026,fffea57fa0f0,python3.7):2025-02-07-15:59:48.238.905 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group  collecting 3 items  collected 3 items  test_reduce.py [WARNING] DEVICE(195027,fffe927fc0f0,python3.7):2025-02-07-15:59:48.265.664 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(195027,fffe92ffd0f0,python3.7):2025-02-07-15:59:48.265.900 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PROFILER(195026,fffe83fff0f0,python3.7):2025-02-07-15:59:48.272.393 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] DEVICE(195025,fffe9ffff0f0,python3.7):2025-02-07-15:59:48.273.434 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group  collecting 3 items  collected 3 items  [WARNING] DISTRIBUTED(195025,fffec0ff90f0,python3.7):2025-02-07-15:59:48.274.893 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group test_reduce.py  collecting 3 items [WARNING] DEVICE(195028,fffeb9ffb0f0,python3.7):2025-02-07-15:59:48.283.007 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(195028,fffeba7fc0f0,python3.7):2025-02-07-15:59:48.283.256 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group  collected 3 items   collecting 3 items  collected 3 items  test_reduce.py [WARNING] PROFILER(195027,fffe91ffb0f0,python3.7):2025-02-07-15:59:48.297.461 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory test_reduce.py [WARNING] DEVICE(195029,fffe867fc0f0,python3.7):2025-02-07-15:59:48.301.419 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(195029,fffe86ffd0f0,python3.7):2025-02-07-15:59:48.301.665 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(195024,fffe857fa0f0,python3.7):2025-02-07-15:59:48.307.042 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(195024,fffe85ffb0f0,python3.7):2025-02-07-15:59:48.307.594 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group  collecting 3 items  collected 3 items  [WARNING] PROFILER(195025,fffe9f7fe0f0,python3.7):2025-02-07-15:59:48.312.939 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory  collecting 3 items  test_reduce.py  collected 3 items  [WARNING] PROFILER(195028,fffeb97fa0f0,python3.7):2025-02-07-15:59:48.319.265 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] DEVICE(195030,fffe8affd0f0,python3.7):2025-02-07-15:59:48.319.650 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(195030,fffe8b7fe0f0,python3.7):2025-02-07-15:59:48.319.908 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group test_reduce.py  collecting 3 items  collected 3 items  test_reduce.py [WARNING] PROFILER(195029,fffe85ffb0f0,python3.7):2025-02-07-15:59:48.335.388 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(195024,fffe84ff90f0,python3.7):2025-02-07-15:59:48.340.156 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(195030,fffe8a7fc0f0,python3.7):2025-02-07-15:59:48.353.606 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] .outputs are [0.] [0.] ....outputs are [ 0. 8. 16. 24.] [0.] outputs are [0.] [ 0. 8. 16. 24.] ...outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] .outputs are [ 0. 8. 16. 24.] [0.] .....outputs are [0.] [ 0. 8. 16. 24.] ..outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] outputs are [0.] [0.] .outputs are [ 0. 8. 16. 24.] [0.] ....outputs are [0.] [ 0. 8. 16. 24.] ... =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") =============================== warnings summary =============================== =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") ======================= 3 passed, 18 warnings in 12.96s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 -- Docs: https://docs.pytest.org/en/latest/warnings.html -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 ======================= 3 passed, 18 warnings in 12.96s ======================== -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") ======================= 3 passed, 18 warnings in 12.94s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 3 passed, 18 warnings in 12.95s ======================== ======================= 3 passed, 18 warnings in 12.96s ======================== ======================= 3 passed, 18 warnings in 12.95s ======================== ======================= 3 passed, 18 warnings in 12.94s ======================== ======================= 3 passed, 18 warnings in 12.96s ======================== [WARNING] DEVICE(195029,ffff8946ec10,python3.7):2025-02-07-15:59:53.052.216 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x298096e0 is not exist. [WARNING] DEVICE(195024,ffff7ffa5c10,python3.7):2025-02-07-15:59:53.056.656 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3fef4b00 is not exist. [WARNING] DEVICE(195023,ffff93ca7c10,python3.7):2025-02-07-15:59:55.297.387 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x428149b0 is not exist. [WARNING] DEVICE(195025,ffffb3321c10,python3.7):2025-02-07-15:59:55.321.705 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2cfb6690 is not exist. [WARNING] DEVICE(195027,ffff95e64c10,python3.7):2025-02-07-15:59:55.413.647 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2b9393b0 is not exist. [WARNING] DEVICE(195026,ffff97759c10,python3.7):2025-02-07-15:59:55.435.125 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x475108f0 is not exist. [WARNING] DEVICE(195030,ffff8e0f1c10,python3.7):2025-02-07-15:59:55.486.653 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x45bd5210 is not exist. [WARNING] DEVICE(195028,ffffb4b3dc10,python3.7):2025-02-07-15:59:55.505.933 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3725cb00 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 34.71s ======================== ff8c39f2e51611efac92c4447d93fe45/pass/test_dump_hccl_test_dump_hccl.log0000644000175400017540000102225614751343157025200 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/dump, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collected 1 item test_dump_hccl.py [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:55.694.235 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:55.829.005 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:55.965.992 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:56.121.090 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:56.286.543 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:worker_4.log. Environment variable [RANK_ID] is exported. [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:56.453.879 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:worker_5.log. Environment variable [RANK_ID] is exported. [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:56.612.714 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:worker_6.log. Environment variable [RANK_ID] is exported. [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:56.775.792 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:worker_7.log. Environment variable [RANK_ID] is exported. [WARNING] ME(172884:281473254292496,MainProcess):2025-02-07-13:54:56.936.774 [mindspore/parallel/cluster/process_entity/_api.py:223] Distributed job is spawned. Waiting all processes to exit... [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:01.086.857 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:56326, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:01.086.961 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:76] Connect] Failed to connect to the tcp server : 127.0.0.1:8118, retry to reconnect(1/1)... [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:01.310.971 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:56328, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173024,ffff1881b0f0,python):2025-02-07-13:55:01.310.978 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56328 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:01.311.068 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:01.359.478 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:56330, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173036,ffff29eda0f0,python):2025-02-07-13:55:01.359.483 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56330 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:01.359.568 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:01.587.074 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:173] Register] Failed to connect to the meta server node url: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:01.587.116 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:363] ReconnectWithTimeoutWindow] Failed to register and try to reconnect to the meta server. [WARNING] DISTRIBUTED(173073,ffff069820f0,python):2025-02-07-13:55:01.759.035 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56332 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:01.759.035 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:56332, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:01.759.216 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:56334, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173073,ffff079840f0,python):2025-02-07-13:55:01.759.245 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56334 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:01.759.251 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:01.793.233 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:56336, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173053,ffff18d1b0f0,python):2025-02-07-13:55:01.793.244 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56336 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:01.793.362 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:01.811.428 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:56338, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:01.811.474 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(173024,ffff1981d0f0,python):2025-02-07-13:55:01.811.476 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56338 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173087,ffff258b80f0,python):2025-02-07-13:55:01.843.183 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56340 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:01.843.181 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:56340, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:01.843.340 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:56342, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173087,ffff268ba0f0,python):2025-02-07-13:55:01.843.369 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56342 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:01.843.374 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:01.859.790 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:56344, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173036,ffff2aedc0f0,python):2025-02-07-13:55:01.859.814 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56344 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:01.859.825 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:02.087.390 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:56346, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173011,ffff3585d0f0,python):2025-02-07-13:55:02.087.402 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56346 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:02.087.442 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:02.239.608 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:56348, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173118,ffff38b5b0f0,python):2025-02-07-13:55:02.239.612 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56348 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:02.239.689 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:02.259.883 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:02.293.527 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:56352, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:02.293.565 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(173053,ffff19d1d0f0,python):2025-02-07-13:55:02.293.564 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56352 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:02.311.883 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:02.343.685 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(173104,ffff1e8790f0,python):2025-02-07-13:55:02.349.549 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56354 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:02.349.551 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:56354, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:02.349.722 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:56356, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173104,ffff1f87b0f0,python):2025-02-07-13:55:02.349.749 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56356 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:02.349.765 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:02.360.234 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:02.587.689 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:56358, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173011,ffff3485b0f0,python):2025-02-07-13:55:02.587.707 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56358 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:02.587.806 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:02.739.847 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:56360, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:02.739.870 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(173118,ffff39b5d0f0,python):2025-02-07-13:55:02.739.880 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:56360 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:02.759.981 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:02.793.942 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:02.811.990 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:02.843.772 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:02.850.164 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:02.860.344 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:03.088.265 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:03.240.211 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:03.260.075 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:03.294.030 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:03.312.084 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:03.343.861 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:03.350.264 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:03.360.432 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:03.588.355 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:03.740.311 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:03.740.335 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 7 rank id: 7 [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:03.760.183 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:03.760.214 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 4 rank id: 4 [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:03.794.162 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:03.794.194 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 3 rank id: 3 [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:03.812.189 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:03.812.217 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 1 rank id: 1 [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:03.843.961 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:03.843.989 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 5 rank id: 5 [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:03.850.383 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:03.850.415 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 6 rank id: 6 [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:03.860.526 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:03.860.550 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 2 rank id: 2 [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:04.088.504 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:04.088.560 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 0 rank id: 0 [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:08.204.317 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(173053,ffff98e3bc10,python):2025-02-07-13:55:08.204.512 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(173053,fffe9d7fa0f0,python):2025-02-07-13:55:08.205.144 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:08.305.157 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(173024,ffff9891fc10,python):2025-02-07-13:55:08.305.326 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(173024,fffe9d7fa0f0,python):2025-02-07-13:55:08.305.857 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:08.311.952 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(173087,ffffa5985c10,python):2025-02-07-13:55:08.312.160 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(173087,fffea1ffb0f0,python):2025-02-07-13:55:08.312.849 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:08.352.483 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(173118,ffffb8c65c10,python):2025-02-07-13:55:08.352.684 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(173118,fffebd7fa0f0,python):2025-02-07-13:55:08.353.244 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:08.444.400 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(173073,ffff86a8fc10,python):2025-02-07-13:55:08.444.598 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(173073,fffe7affd0f0,python):2025-02-07-13:55:08.445.278 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:08.632.188 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(173036,ffffa9ff1c10,python):2025-02-07-13:55:08.632.404 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(173036,fffea67fc0f0,python):2025-02-07-13:55:08.632.975 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 [WARNING] DEVICE(173053,fffe9d7fa0f0,python):2025-02-07-13:55:08.705.684 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(173024,fffe9d7fa0f0,python):2025-02-07-13:55:08.806.405 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(173087,fffea1ffb0f0,python):2025-02-07-13:55:08.813.302 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(173118,fffebd7fa0f0,python):2025-02-07-13:55:08.853.646 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(173073,fffe7affd0f0,python):2025-02-07-13:55:08.945.705 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(173036,fffea67fc0f0,python):2025-02-07-13:55:09.133.351 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 2 [WARNING] DEVICE(173053,fffe9d7fa0f0,python):2025-02-07-13:55:09.206.072 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 1 [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:09.216.370 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(173104,ffff9e959c10,python):2025-02-07-13:55:09.216.584 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(173104,fffe937fe0f0,python):2025-02-07-13:55:09.217.131 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DEVICE(173024,fffe9d7fa0f0,python):2025-02-07-13:55:09.306.805 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 2 [WARNING] DEVICE(173087,fffea1ffb0f0,python):2025-02-07-13:55:09.313.706 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 1 [WARNING] DEVICE(173118,fffebd7fa0f0,python):2025-02-07-13:55:09.353.931 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 1 [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:09.445.924 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DEVICE(173073,fffe7affd0f0,python):2025-02-07-13:55:09.446.046 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 1 [WARNING] DISTRIBUTED(173011,ffffb4920c10,python):2025-02-07-13:55:09.446.154 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(173011,fffeb97fa0f0,python):2025-02-07-13:55:09.451.435 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173011,fffe96ffd0f0,python):2025-02-07-13:55:09.451.709 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(173036,fffea67fc0f0,python):2025-02-07-13:55:09.633.770 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173036,fffe03fff0f0,python):2025-02-07-13:55:09.634.083 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(173053,fffe9d7fa0f0,python):2025-02-07-13:55:09.706.503 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173053,fffdfaffd0f0,python):2025-02-07-13:55:09.706.826 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(173104,fffe937fe0f0,python):2025-02-07-13:55:09.717.576 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173104,fffe10ff90f0,python):2025-02-07-13:55:09.717.919 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(173024,fffe9d7fa0f0,python):2025-02-07-13:55:09.807.240 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173024,fffdfaffd0f0,python):2025-02-07-13:55:09.807.574 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(173087,fffea1ffb0f0,python):2025-02-07-13:55:09.814.157 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173087,fffe037fe0f0,python):2025-02-07-13:55:09.814.508 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(173118,fffebd7fa0f0,python):2025-02-07-13:55:09.854.358 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173118,fffe1affd0f0,python):2025-02-07-13:55:09.854.713 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(173073,fffe7affd0f0,python):2025-02-07-13:55:09.946.507 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173073,fffe00ff90f0,python):2025-02-07-13:55:09.946.843 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(173011,fffe96ffd0f0,python):2025-02-07-13:55:10.143.893 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(173011,fffeb97fa0f0,python):2025-02-07-13:55:10.144.133 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173024,fffdfaffd0f0,python):2025-02-07-13:55:10.186.086 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(173024,fffe9d7fa0f0,python):2025-02-07-13:55:10.186.313 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173087,fffe037fe0f0,python):2025-02-07-13:55:10.209.896 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(173087,fffea1ffb0f0,python):2025-02-07-13:55:10.210.124 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173036,fffe03fff0f0,python):2025-02-07-13:55:10.223.953 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(173036,fffea67fc0f0,python):2025-02-07-13:55:10.224.179 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173118,fffe1affd0f0,python):2025-02-07-13:55:10.226.682 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(173118,fffebd7fa0f0,python):2025-02-07-13:55:10.227.486 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173104,fffe10ff90f0,python):2025-02-07-13:55:10.288.559 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(173104,fffe937fe0f0,python):2025-02-07-13:55:10.288.802 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173053,fffdfaffd0f0,python):2025-02-07-13:55:10.294.931 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(173053,fffe9d7fa0f0,python):2025-02-07-13:55:10.295.112 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(173073,fffe00ff90f0,python):2025-02-07-13:55:10.329.541 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(173073,fffe7affd0f0,python):2025-02-07-13:55:10.329.776 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group TotalTime = 7.66601, [21] [bootstrap]: 0.00165056 [type_inference]: 0.0154116 [auto_monad]: 9.497e-05 [graph_reusing]: 2.94999e-06 [inline]: 2.84e-06 [parallel-infer-symbol]: 1.118e-05 [pre_auto_parallel]: 4.801e-05 [insert-virtual-dataset]: 3.84e-06 [parallel-infer-symbol-second]: 4.29995e-07 [dataset_repeat_opt]: 1.33e-06 [pipeline_split]: 1.60999e-06 [optimize]: 0.0130908, [52] [py_interpret_to_execute]: 2.496e-05 [rewriter_before_opt_a]: 3.96e-05 [opt_a]: 0.0112135, [2] [Cycle 1]: 0.00166778, [43] [expand_dump_flag]: 4.86e-06 [switch_simplify]: 3.975e-05 [loop_unroll]: 9.77001e-06 [a_1]: 0.00036763 [recompute_prepare]: 6.84999e-06 [updatestate_depend_eliminate]: 1.161e-05 [updatestate_assign_eliminate]: 3.88001e-06 [updatestate_loads_eliminate]: 3.18e-06 [parameter_eliminate]: 8.12e-06 [a_2]: 8.085e-05 [accelerated_algorithm]: 5.7e-06 [shard]: 2.65001e-06 [meta_shard_fg_expand]: 3.85e-06 [shard_inline]: 5.10999e-06 [auto_parallel]: 2.14e-05 [parallel]: 1.029e-05 [flash_sp]: 1.271e-05 [merge_comm]: 6.08001e-06 [allreduce_fusion]: 4.07999e-06 [matmul_add_comm_reduction]: 1.025e-05 [allreduce_slice_to_reducescatter]: 4.60001e-07 [virtual_shard_identity]: 5.97999e-06 [virtual_dataset]: 4.89e-06 [get_grad_eliminate_]: 4.50001e-06 [virtual_output]: 4.47e-06 [merge_forward]: 4.21e-06 [cell_reuse_recompute_pass]: 2.01e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.461e-05 [before_grad]: 8.48e-06 [inplace_validation]: 3.37001e-06 [meta_fg_expand]: 3.39e-06 [inplace_validation_after_expand]: 4.59999e-06 [flash_sp_send_recv_attached]: 4.61e-06 [receive_attached]: 9.16999e-06 [after_resolve]: 9.38e-06 [a_after_grad]: 7.45e-06 [special_op_eliminate]: 4.54999e-06 [renormalize]: 0.00059795 [add_forward_monad_depend]: 5.14e-06 [auto_monad_grad]: 3.53e-06 [auto_monad_eliminator]: 1.577e-05 [cse]: 3.285e-05 [a_3]: 3.782e-05 [Cycle 2]: 0.00053612, [43] [expand_dump_flag]: 2.32999e-06 [switch_simplify]: 5.91e-06 [loop_unroll]: 5.27001e-06 [a_1]: 9.888e-05 [recompute_prepare]: 4.37e-06 [updatestate_depend_eliminate]: 3.76001e-06 [updatestate_assign_eliminate]: 2.83e-06 [updatestate_loads_eliminate]: 2.66e-06 [parameter_eliminate]: 1.54e-06 [a_2]: 5.939e-05 [accelerated_algorithm]: 5.2e-06 [shard]: 2.43e-06 [meta_shard_fg_expand]: 1.84e-06 [shard_inline]: 4.77e-06 [auto_parallel]: 1.429e-05 [parallel]: 6.58999e-06 [flash_sp]: 6.17999e-06 [merge_comm]: 4.88e-06 [allreduce_fusion]: 3.39e-06 [matmul_add_comm_reduction]: 7.43e-06 [allreduce_slice_to_reducescatter]: 5.19998e-07 [virtual_shard_identity]: 5.53e-06 [virtual_dataset]: 4.70999e-06 [get_grad_eliminate_]: 4.29001e-06 [virtual_output]: 4.05e-06 [merge_forward]: 3.62999e-06 [cell_reuse_recompute_pass]: 3.63e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.031e-05 [before_grad]: 7.77e-06 [inplace_validation]: 2.69001e-06 [meta_fg_expand]: 2.59999e-06 [inplace_validation_after_expand]: 3.14999e-06 [flash_sp_send_recv_attached]: 2.2e-06 [receive_attached]: 1.65001e-06 [after_resolve]: 7.64e-06 [a_after_grad]: 7.37001e-06 [special_op_eliminate]: 4.25e-06 [renormalize]: 7.00093e-08 [add_forward_monad_depend]: 1.17e-06 [auto_monad_grad]: 1.23e-06 [auto_monad_eliminator]: 5.56e-06 [cse]: 1.567e-05 [a_3]: 2.597e-05 [py_interpret_to_execute_after_opt_a]: 1.637e-05 [slice_cell_reuse_recomputed_activation]: 2.78e-06 [rewriter_after_opt_a]: 6.363e-05 [convert_after_rewriter]: 6.58001e-06 [order_py_execute_after_rewriter]: 4.48e-06 [opt_b]: 0.00014581, [1] [Cycle 1]: 0.00013806, [7] [b_1]: 8.289e-05 [b_2]: 6.83e-06 [updatestate_depend_eliminate]: 2.51e-06 [updatestate_assign_eliminate]: 2.11e-06 [updatestate_loads_eliminate]: 1.9e-06 [renormalize]: 2.2001e-07 [cse]: 1.185e-05 [optimize_parallel_all_gather_comm]: 6.82e-06 [overlap_param_gather]: 3.9e-06 [cconv]: 2.934e-05 [loop_unroll]: 0.00078021 [opt_after_cconv]: 0.00011412, [1] [Cycle 1]: 0.00010623, [7] [c_1]: 2.578e-05 [parameter_eliminate]: 5.20001e-06 [updatestate_depend_eliminate]: 1.016e-05 [updatestate_assign_eliminate]: 2.92e-06 [updatestate_loads_eliminate]: 2.34001e-06 [cse]: 2.69e-05 [renormalize]: 6.60002e-07 [remove_dup_value]: 1.395e-05 [tuple_transform]: 5.506e-05, [1] [Cycle 1]: 5.022e-05, [2] [d_1]: 4.067e-05 [renormalize]: 1.80007e-07 [partial_unused_args_eliminate]: 2.44001e-06 [add_cache_embedding]: 1.585e-05 [add_recomputation]: 6.87e-05 [cse_after_recomputation]: 2.233e-05, [1] [Cycle 1]: 1.709e-05, [1] [cse]: 1.195e-05 [environ_conv]: 2.876e-05 [swap_dp_allreduce_reducescatter]: 5.92e-06 [bias_add_comm_swap]: 3.42001e-06 [label_micro_interleaved_index]: 2.72e-06 [label_fine_grained_interleaved_index]: 2.26e-06 [merge_cast_opt]: 1.45001e-06 [slice_recompute_activation]: 2.04e-06 [micro_interleaved_order_control]: 1.89e-06 [assign_add_opt]: 1.728e-05 [ForceFp32Comm]: 1.04001e-06 [remove_cast_before_assign_add]: 1.19001e-06 [full_micro_interleaved_order_control]: 2.51e-06 [reorder_send_recv_between_fp_bp]: 2.53999e-06 [comm_op_add_attrs]: 1.04999e-06 [add_comm_op_reuse_tag]: 1.17e-06 [interleave_split_concat_branches]: 9.4001e-07 [interleave_parallel_branches]: 9.60004e-07 [overlap_opt_shard_in_pipeline]: 8.69e-06 [overlap_opt_shard_grad_in_pipeline]: 2.37e-06 [control_data_broadcast_order]: 1.51001e-06 [grouped_pairwise_exchange_alltoall]: 1.45e-06 [offloading_packed_experts]: 1.04e-06 [overlap_recompute_and_grad_model_parallel]: 2.26e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.32e-06 [overlap_recompute_allgather_and_fa_grad]: 1.22e-06 [overlap_grad_ring_attention]: 1.281e-05 [overlap_grad_flash_sp]: 1.834e-05 [begin_end_overlap_inline]: 8.00006e-07 [split_matmul_comm_elemetwise]: 2.32999e-06 [split_layernorm_comm]: 2.1e-06 [handle_group_info]: 1.07e-06 [symbol_engine_optimizer]: 7.112e-05, [1] [Cycle 1]: 6.634e-05, [6] [build]: 4.27999e-06 [elim_shapecalc]: 8.84999e-06 [elim_not_effective]: 1.39e-05 [opt_reshape]: 5.66001e-06 [fold_const_symbol]: 8.44001e-06 [renormalize]: 5.10001e-07 [pipeline_parallel_scheduler]: 2.27999e-06 [auto_monad_reorder]: 2.52e-05 [get_jit_bprop_graph]: 4.39992e-07 [rewriter_after_jit_bprop_graph]: 5.10001e-07 [eliminate_special_op_node]: 0.00073475 [distribtued_split]: 6.719e-05 [validate]: 6.276e-05 [task_emit]: 7.63443 [execute]: 1.144e-05 Sums bootstrap : 0.001651s : 0.02% type_inference : 0.015412s : 0.20% auto_monad : 0.000095s : 0.00% graph_reusing : 0.000003s : 0.00% inline : 0.000003s : 0.00% parallel-infer-symbol : 0.000011s : 0.00% pre_auto_parallel : 0.000048s : 0.00% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000025s : 0.00% optimize.rewriter_before_opt_a : 0.000040s : 0.00% optimize.opt_a.expand_dump_flag : 0.000007s : 0.00% optimize.opt_a.switch_simplify : 0.000046s : 0.00% optimize.opt_a.loop_unroll : 0.000015s : 0.00% optimize.opt_a.a_1 : 0.000467s : 0.01% optimize.opt_a.recompute_prepare : 0.000011s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000007s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.000140s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000011s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000036s : 0.00% optimize.opt_a.parallel : 0.000017s : 0.00% optimize.opt_a.flash_sp : 0.000019s : 0.00% optimize.opt_a.merge_comm : 0.000011s : 0.00% optimize.opt_a.allreduce_fusion : 0.000007s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000012s : 0.00% optimize.opt_a.virtual_dataset : 0.000010s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000008s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000006s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000025s : 0.00% optimize.opt_a.before_grad : 0.000016s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000008s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.00% optimize.opt_a.receive_attached : 0.000011s : 0.00% optimize.opt_a.after_resolve : 0.000017s : 0.00% optimize.opt_a.a_after_grad : 0.000015s : 0.00% optimize.opt_a.special_op_eliminate : 0.000009s : 0.00% optimize.opt_a.renormalize : 0.000598s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000005s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000021s : 0.00% optimize.opt_a.cse : 0.000049s : 0.00% optimize.opt_a.a_3 : 0.000064s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000016s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000064s : 0.00% optimize.convert_after_rewriter : 0.000007s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000083s : 0.00% optimize.opt_b.b_2 : 0.000007s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000012s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.00% optimize.overlap_param_gather : 0.000004s : 0.00% optimize.cconv : 0.000029s : 0.00% optimize.loop_unroll : 0.000780s : 0.01% optimize.opt_after_cconv.c_1 : 0.000026s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000010s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000027s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000014s : 0.00% optimize.tuple_transform.d_1 : 0.000041s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000016s : 0.00% optimize.add_recomputation : 0.000069s : 0.00% optimize.cse_after_recomputation.cse : 0.000012s : 0.00% optimize.environ_conv : 0.000029s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000003s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000017s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000009s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000002s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000013s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000009s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000014s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000006s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000025s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000735s : 0.01% distribtued_split : 0.000067s : 0.00% validate : 0.000063s : 0.00% task_emit : 7.634428s : 99.72% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.000215 20 1.01% : 0.000002s : 2: substitution.elim_not_effective 0.80% : 0.000002s : 2: substitution.fold_const_symbol 3.19% : 0.000007s : 3: substitution.graph_param_transform 80.35% : 0.000173s : 1: substitution.inline 2.02% : 0.000004s : 4: substitution.j_node_and_user_rematch 6.99% : 0.000015s : 2: substitution.reduce_all_const_elim 3.55% : 0.000008s : 4: substitution.remove_not_recompute_node 2.09% : 0.000004s : 2: substitution.replace_old_param ------[type_inference.] 0.015357 2 97.50% : 0.014972s : 1: type_inference.infer 2.50% : 0.000384s : 1: type_inference.specialize ------[replace.] 0.000019 1 100.00% : 0.000019s : 1: replace.inline ------[match.] 0.000172 1 100.00% : 0.000172s : 1: match.inline ------[predicate.] 0.000150 740 0.70% : 0.000001s : 7: predicate.accumulaten_eliminater 2.56% : 0.000004s : 3: predicate.ad_related_special_op_eliminate 0.54% : 0.000001s : 6: predicate.addn_check_dump 0.75% : 0.000001s : 7: predicate.addn_zero_filter 0.59% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.20% : 0.000003s : 13: predicate.arithmetic_simplify 0.77% : 0.000001s : 7: predicate.cast_eliminate 0.66% : 0.000001s : 6: predicate.check_bprop_eliminate 0.67% : 0.000001s : 6: predicate.compare_switch_simplify 0.17% : 0.000000s : 3: predicate.const_output_eliminate 0.51% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.81% : 0.000003s : 7: predicate.convert_tensor_eliminate 0.86% : 0.000001s : 6: predicate.depend_value_elim 0.66% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.80% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.68% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.24% : 0.000000s : 3: predicate.elim_not_effective 0.63% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000002s : 10: predicate.environ_add_const_eliminate 0.90% : 0.000001s : 10: predicate.environ_get_add_eliminate 0.90% : 0.000001s : 10: predicate.environ_get_depend_swap 1.48% : 0.000002s : 16: predicate.environ_get_eliminate 1.06% : 0.000002s : 10: predicate.environ_get_set_eliminate 0.67% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.77% : 0.000003s : 8: predicate.float_depend_g_call 0.55% : 0.000001s : 6: predicate.float_environ_get_switch 0.94% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 3: predicate.fold_const_symbol 0.67% : 0.000001s : 6: predicate.get_grad_eliminate 0.60% : 0.000001s : 3: predicate.graph_param_transform 0.62% : 0.000001s : 6: predicate.incorporate_call 0.54% : 0.000001s : 6: predicate.incorporate_call_switch 6.04% : 0.000009s : 33: predicate.inline 1.16% : 0.000002s : 6: predicate.inline_without_move 0.35% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 6: predicate.less_batch_normalization 3.15% : 0.000005s : 13: predicate.list_to_tuple_eliminator_ 1.83% : 0.000003s : 20: predicate.load_eliminater 3.37% : 0.000005s : 3: predicate.loop_unroll_after_grad 1.54% : 0.000002s : 10: predicate.loop_unroll_before_grad 2.23% : 0.000003s : 13: predicate.make_slice_get_slice_eliminator 0.67% : 0.000001s : 6: predicate.merge_addn 0.62% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.63% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.88% : 0.000001s : 7: predicate.minmaximum_grad 1.19% : 0.000002s : 3: predicate.mutable_eliminate 0.55% : 0.000001s : 3: predicate.opt_reshape 0.47% : 0.000001s : 3: predicate.parallel_virtual_node 1.18% : 0.000002s : 8: predicate.partial_defer_inline 1.04% : 0.000002s : 10: predicate.partial_eliminate 0.71% : 0.000001s : 7: predicate.print_const_string_wrapper 1.28% : 0.000002s : 6: predicate.reduce_all_const_elim 1.06% : 0.000002s : 7: predicate.reduce_eliminate 0.64% : 0.000001s : 6: predicate.remove_not_recompute_node 1.03% : 0.000002s : 13: predicate.replace_applicator 0.70% : 0.000001s : 6: predicate.replace_old_param 0.21% : 0.000000s : 3: predicate.reset_defer_inline 0.77% : 0.000001s : 7: predicate.reshape_eliminate 0.71% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 3: predicate.row_tensor_eliminate 1.15% : 0.000002s : 6: predicate.same_eliminate 0.43% : 0.000001s : 6: predicate.set_cell_output_no_recompute 0.85% : 0.000001s : 6: predicate.shard_identity_eliminate 1.10% : 0.000002s : 9: predicate.special_op_eliminate 0.82% : 0.000001s : 6: predicate.specialize_transform 1.18% : 0.000002s : 6: predicate.split_environ_get_set_with_tuple_value 1.06% : 0.000002s : 6: predicate.stack_unstack_eliminate 2.25% : 0.000003s : 20: predicate.stopgrad_eliminater 0.35% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.75% : 0.000001s : 8: predicate.switch_defer_inline 1.32% : 0.000002s : 14: predicate.switch_layer_defer_inline 6.29% : 0.000009s : 24: predicate.switch_simplify 0.75% : 0.000001s : 7: predicate.tile_eliminate 0.67% : 0.000001s : 7: predicate.transpose_eliminate 1.68% : 0.000003s : 13: predicate.tuple_list_convert_item_index_to_positive 1.36% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.18% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.30% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.18% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.14% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.38% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 1.66% : 0.000002s : 20: predicate.updatestate_pure_node_eliminater 2.38% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.45% : 0.000001s : 3: predicate.value_based_eliminate 0.77% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.68% : 0.000001s : 6: predicate.virtual_output_eliminate 0.49% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000221 4 8.36% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.64% : 0.000203s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 7.680619 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000074s : 1: add_recomputation 0.00% : 0.000022s : 1: assign_add_opt 0.00% : 0.000107s : 1: auto_monad 0.00% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.02% : 0.001717s : 1: bootstrap 0.00% : 0.000033s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000011s : 1: convert_after_rewriter 0.00% : 0.000026s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000079s : 1: distribtued_split 0.01% : 0.000758s : 1: eliminate_special_op_node 0.00% : 0.000033s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.000795s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000020s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000809s : 80: opt.transform.opt_a 0.00% : 0.000024s : 1: opt.transform.opt_after_cconv 0.00% : 0.000071s : 27: opt.transform.opt_b 0.00% : 0.000039s : 1: opt.transform.opt_trans_graph 0.00% : 0.000029s : 3: opt.transform.special_op_eliminate 0.00% : 0.000034s : 4: opt.transform.symbol_engine_opt 0.15% : 0.011219s : 1: opt_a 0.00% : 0.000119s : 1: opt_after_cconv 0.00% : 0.000149s : 1: opt_b 0.17% : 0.013106s : 1: optimize 0.00% : 0.000010s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000022s : 1: overlap_grad_flash_sp 0.00% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000016s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000012s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000016s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000054s : 1: pre_auto_parallel 0.00% : 0.000030s : 1: py_interpret_to_execute 0.00% : 0.000021s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000018s : 1: remove_dup_value 0.00% : 0.000345s : 1: renormalize.infer 0.00% : 0.000245s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000069s : 1: rewriter_after_opt_a 0.00% : 0.000044s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000074s : 1: symbol_engine_optimizer 99.40% : 7.634465s : 1: task_emit 0.00% : 0.000058s : 1: tuple_transform 0.20% : 0.015450s : 1: type_inference 0.00% : 0.000118s : 1: validate TotalTime = 7.75003, [21] [bootstrap]: 0.00194856 [type_inference]: 0.0148436 [auto_monad]: 0.00017339 [graph_reusing]: 2.73e-06 [inline]: 2.47e-06 [parallel-infer-symbol]: 3.14e-06 [pre_auto_parallel]: 4.078e-05 [insert-virtual-dataset]: 4.597e-05 [parallel-infer-symbol-second]: 6.30011e-07 [dataset_repeat_opt]: 1.60999e-06 [pipeline_split]: 2e-06 [optimize]: 0.0120051, [52] [py_interpret_to_execute]: 1.52e-05 [rewriter_before_opt_a]: 4.218e-05 [opt_a]: 0.010422, [2] [Cycle 1]: 0.00141241, [43] [expand_dump_flag]: 4.02001e-06 [switch_simplify]: 3.102e-05 [loop_unroll]: 9.33e-06 [a_1]: 0.00026974 [recompute_prepare]: 5.58e-06 [updatestate_depend_eliminate]: 1.068e-05 [updatestate_assign_eliminate]: 3.2e-06 [updatestate_loads_eliminate]: 3.00001e-06 [parameter_eliminate]: 6.31e-06 [a_2]: 7.626e-05 [accelerated_algorithm]: 5.48e-06 [shard]: 2.51e-06 [meta_shard_fg_expand]: 3.24e-06 [shard_inline]: 4.99e-06 [auto_parallel]: 1.482e-05 [parallel]: 1.55e-05 [flash_sp]: 1.635e-05 [merge_comm]: 1.224e-05 [allreduce_fusion]: 3.19001e-06 [matmul_add_comm_reduction]: 9.24e-06 [allreduce_slice_to_reducescatter]: 5.60001e-07 [virtual_shard_identity]: 5.81e-06 [virtual_dataset]: 4.68e-06 [get_grad_eliminate_]: 4.68e-06 [virtual_output]: 4.62e-06 [merge_forward]: 3.92999e-06 [cell_reuse_recompute_pass]: 1.57001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.022e-05 [before_grad]: 8.33999e-06 [inplace_validation]: 3.74e-06 [meta_fg_expand]: 3.61999e-06 [inplace_validation_after_expand]: 4.42e-06 [flash_sp_send_recv_attached]: 3.91999e-06 [receive_attached]: 8.26e-06 [after_resolve]: 1.034e-05 [a_after_grad]: 7.25e-06 [special_op_eliminate]: 4.43e-06 [renormalize]: 0.00049838 [add_forward_monad_depend]: 3.69e-06 [auto_monad_grad]: 2.83e-06 [auto_monad_eliminator]: 1.164e-05 [cse]: 2.622e-05 [a_3]: 3.203e-05 [Cycle 2]: 0.00048849, [43] [expand_dump_flag]: 9.89996e-07 [switch_simplify]: 5.28e-06 [loop_unroll]: 4.48e-06 [a_1]: 9.275e-05 [recompute_prepare]: 4.53999e-06 [updatestate_depend_eliminate]: 3.19e-06 [updatestate_assign_eliminate]: 2.26e-06 [updatestate_loads_eliminate]: 2.17999e-06 [parameter_eliminate]: 9.89996e-07 [a_2]: 5.461e-05 [accelerated_algorithm]: 4.94e-06 [shard]: 1.24e-06 [meta_shard_fg_expand]: 1.71e-06 [shard_inline]: 4.97e-06 [auto_parallel]: 7.82999e-06 [parallel]: 3.82999e-06 [flash_sp]: 5.33e-06 [merge_comm]: 4.07e-06 [allreduce_fusion]: 2.73999e-06 [matmul_add_comm_reduction]: 5.04999e-06 [allreduce_slice_to_reducescatter]: 4.40006e-07 [virtual_shard_identity]: 4.89e-06 [virtual_dataset]: 4.34001e-06 [get_grad_eliminate_]: 4.2e-06 [virtual_output]: 4.06e-06 [merge_forward]: 2.79001e-06 [cell_reuse_recompute_pass]: 1.71001e-06 [cell_reuse_handle_not_recompute_node_pass]: 8.99e-06 [before_grad]: 7.05e-06 [inplace_validation]: 2.71e-06 [meta_fg_expand]: 2.56e-06 [inplace_validation_after_expand]: 2.63999e-06 [flash_sp_send_recv_attached]: 1.07e-06 [receive_attached]: 6.50005e-07 [after_resolve]: 6.49001e-06 [a_after_grad]: 6.43999e-06 [special_op_eliminate]: 4.25e-06 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 6.89994e-07 [auto_monad_grad]: 1.05001e-06 [auto_monad_eliminator]: 4.55999e-06 [cse]: 1.069e-05 [a_3]: 2.497e-05 [py_interpret_to_execute_after_opt_a]: 1.129e-05 [slice_cell_reuse_recomputed_activation]: 2.42001e-06 [rewriter_after_opt_a]: 5.04e-05 [convert_after_rewriter]: 1.405e-05 [order_py_execute_after_rewriter]: 4.25e-06 [opt_b]: 0.00013478, [1] [Cycle 1]: 0.00012911, [7] [b_1]: 8.054e-05 [b_2]: 5.6e-06 [updatestate_depend_eliminate]: 2.44001e-06 [updatestate_assign_eliminate]: 1.96e-06 [updatestate_loads_eliminate]: 1.94e-06 [renormalize]: 2.59999e-07 [cse]: 8.65001e-06 [optimize_parallel_all_gather_comm]: 5.19e-06 [overlap_param_gather]: 2.71e-06 [cconv]: 2.209e-05 [loop_unroll]: 0.00052381 [opt_after_cconv]: 8.342e-05, [1] [Cycle 1]: 7.763e-05, [7] [c_1]: 2.266e-05 [parameter_eliminate]: 2.41e-06 [updatestate_depend_eliminate]: 5.07e-06 [updatestate_assign_eliminate]: 2.27999e-06 [updatestate_loads_eliminate]: 2.2e-06 [cse]: 1.324e-05 [renormalize]: 3.09999e-07 [remove_dup_value]: 1.059e-05 [tuple_transform]: 4.645e-05, [1] [Cycle 1]: 4.217e-05, [2] [d_1]: 3.333e-05 [renormalize]: 1.69995e-07 [partial_unused_args_eliminate]: 2.37e-06 [add_cache_embedding]: 1.048e-05 [add_recomputation]: 5.983e-05 [cse_after_recomputation]: 1.851e-05, [1] [Cycle 1]: 1.419e-05, [1] [cse]: 9.22001e-06 [environ_conv]: 2.058e-05 [swap_dp_allreduce_reducescatter]: 5.40999e-06 [bias_add_comm_swap]: 2.43e-06 [label_micro_interleaved_index]: 1.87999e-06 [label_fine_grained_interleaved_index]: 2.17999e-06 [merge_cast_opt]: 1.69e-06 [slice_recompute_activation]: 1.86e-06 [micro_interleaved_order_control]: 1.74e-06 [assign_add_opt]: 1.226e-05 [ForceFp32Comm]: 1.36001e-06 [remove_cast_before_assign_add]: 1.09e-06 [full_micro_interleaved_order_control]: 1.99e-06 [reorder_send_recv_between_fp_bp]: 2.17999e-06 [comm_op_add_attrs]: 9.89996e-07 [add_comm_op_reuse_tag]: 1.24e-06 [interleave_split_concat_branches]: 1.18e-06 [interleave_parallel_branches]: 8.30012e-07 [overlap_opt_shard_in_pipeline]: 9.49e-06 [overlap_opt_shard_grad_in_pipeline]: 2.22e-06 [control_data_broadcast_order]: 1.35999e-06 [grouped_pairwise_exchange_alltoall]: 1.54e-06 [offloading_packed_experts]: 1.6e-06 [overlap_recompute_and_grad_model_parallel]: 2.12e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.60004e-07 [overlap_recompute_allgather_and_fa_grad]: 1.24e-06 [overlap_grad_ring_attention]: 1.67999e-06 [overlap_grad_flash_sp]: 3.064e-05 [begin_end_overlap_inline]: 7.59988e-07 [split_matmul_comm_elemetwise]: 2.47e-06 [split_layernorm_comm]: 1.91001e-06 [handle_group_info]: 1.19e-06 [symbol_engine_optimizer]: 0.00011219, [1] [Cycle 1]: 0.00010768, [6] [build]: 2.84e-06 [elim_shapecalc]: 7.98999e-06 [elim_not_effective]: 1.136e-05 [opt_reshape]: 4.759e-05 [fold_const_symbol]: 9.03e-06 [renormalize]: 3.19997e-07 [pipeline_parallel_scheduler]: 2.12e-06 [auto_monad_reorder]: 2.506e-05 [get_jit_bprop_graph]: 4.39992e-07 [rewriter_after_jit_bprop_graph]: 5.00004e-07 [eliminate_special_op_node]: 0.00048103 [distribtued_split]: 5.223e-05 [validate]: 5.535e-05 [task_emit]: 7.72001 [execute]: 1.149e-05 Sums bootstrap : 0.001949s : 0.03% type_inference : 0.014844s : 0.19% auto_monad : 0.000173s : 0.00% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000041s : 0.00% insert-virtual-dataset : 0.000046s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.00% optimize.rewriter_before_opt_a : 0.000042s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000362s : 0.00% optimize.opt_a.recompute_prepare : 0.000010s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000007s : 0.00% optimize.opt_a.a_2 : 0.000131s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000010s : 0.00% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000023s : 0.00% optimize.opt_a.parallel : 0.000019s : 0.00% optimize.opt_a.flash_sp : 0.000022s : 0.00% optimize.opt_a.merge_comm : 0.000016s : 0.00% optimize.opt_a.allreduce_fusion : 0.000006s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000014s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000011s : 0.00% optimize.opt_a.virtual_dataset : 0.000009s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000007s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000019s : 0.00% optimize.opt_a.before_grad : 0.000015s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000009s : 0.00% optimize.opt_a.after_resolve : 0.000017s : 0.00% optimize.opt_a.a_after_grad : 0.000014s : 0.00% optimize.opt_a.special_op_eliminate : 0.000009s : 0.00% optimize.opt_a.renormalize : 0.000498s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000016s : 0.00% optimize.opt_a.cse : 0.000037s : 0.00% optimize.opt_a.a_3 : 0.000057s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000011s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000050s : 0.00% optimize.convert_after_rewriter : 0.000014s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000081s : 0.00% optimize.opt_b.b_2 : 0.000006s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000009s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000005s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000022s : 0.00% optimize.loop_unroll : 0.000524s : 0.01% optimize.opt_after_cconv.c_1 : 0.000023s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000013s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.00% optimize.tuple_transform.d_1 : 0.000033s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000010s : 0.00% optimize.add_recomputation : 0.000060s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000021s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000005s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000012s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000009s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000031s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000008s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000011s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000048s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000009s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000025s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000481s : 0.01% distribtued_split : 0.000052s : 0.00% validate : 0.000055s : 0.00% task_emit : 7.720014s : 99.74% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.000102 20 1.86% : 0.000002s : 2: substitution.elim_not_effective 1.55% : 0.000002s : 2: substitution.fold_const_symbol 5.43% : 0.000006s : 3: substitution.graph_param_transform 68.08% : 0.000069s : 1: substitution.inline 3.51% : 0.000004s : 4: substitution.j_node_and_user_rematch 12.07% : 0.000012s : 2: substitution.reduce_all_const_elim 3.99% : 0.000004s : 4: substitution.remove_not_recompute_node 3.49% : 0.000004s : 2: substitution.replace_old_param ------[type_inference.] 0.014796 2 97.58% : 0.014439s : 1: type_inference.infer 2.42% : 0.000358s : 1: type_inference.specialize ------[replace.] 0.000015 1 100.00% : 0.000015s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000183 740 0.55% : 0.000001s : 7: predicate.accumulaten_eliminater 0.77% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.45% : 0.000001s : 6: predicate.addn_check_dump 0.47% : 0.000001s : 7: predicate.addn_zero_filter 0.43% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 1.46% : 0.000003s : 13: predicate.arithmetic_simplify 0.55% : 0.000001s : 7: predicate.cast_eliminate 0.49% : 0.000001s : 6: predicate.check_bprop_eliminate 0.45% : 0.000001s : 6: predicate.compare_switch_simplify 0.16% : 0.000000s : 3: predicate.const_output_eliminate 0.34% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.36% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.68% : 0.000001s : 6: predicate.depend_value_elim 0.57% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.59% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.60% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.24% : 0.000000s : 3: predicate.elim_not_effective 0.42% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 0.75% : 0.000001s : 10: predicate.environ_add_const_eliminate 0.66% : 0.000001s : 10: predicate.environ_get_add_eliminate 0.65% : 0.000001s : 10: predicate.environ_get_depend_swap 1.20% : 0.000002s : 16: predicate.environ_get_eliminate 0.65% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.54% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.18% : 0.000002s : 8: predicate.float_depend_g_call 0.41% : 0.000001s : 6: predicate.float_environ_get_switch 0.62% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 3: predicate.fold_const_symbol 0.59% : 0.000001s : 6: predicate.get_grad_eliminate 0.26% : 0.000000s : 3: predicate.graph_param_transform 0.51% : 0.000001s : 6: predicate.incorporate_call 0.43% : 0.000001s : 6: predicate.incorporate_call_switch 4.21% : 0.000008s : 33: predicate.inline 0.79% : 0.000001s : 6: predicate.inline_without_move 0.30% : 0.000001s : 6: predicate.j_node_and_user_rematch 0.71% : 0.000001s : 6: predicate.less_batch_normalization 11.95% : 0.000022s : 13: predicate.list_to_tuple_eliminator_ 1.37% : 0.000003s : 20: predicate.load_eliminater 0.96% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.03% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.28% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.49% : 0.000001s : 6: predicate.merge_addn 0.46% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.48% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.45% : 0.000001s : 7: predicate.minmaximum_grad 0.66% : 0.000001s : 3: predicate.mutable_eliminate 22.53% : 0.000041s : 3: predicate.opt_reshape 0.43% : 0.000001s : 3: predicate.parallel_virtual_node 0.94% : 0.000002s : 8: predicate.partial_defer_inline 0.87% : 0.000002s : 10: predicate.partial_eliminate 0.56% : 0.000001s : 7: predicate.print_const_string_wrapper 0.60% : 0.000001s : 6: predicate.reduce_all_const_elim 0.66% : 0.000001s : 7: predicate.reduce_eliminate 0.50% : 0.000001s : 6: predicate.remove_not_recompute_node 0.82% : 0.000002s : 13: predicate.replace_applicator 0.34% : 0.000001s : 6: predicate.replace_old_param 0.13% : 0.000000s : 3: predicate.reset_defer_inline 0.51% : 0.000001s : 7: predicate.reshape_eliminate 0.55% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.39% : 0.000001s : 3: predicate.row_tensor_eliminate 0.63% : 0.000001s : 6: predicate.same_eliminate 0.32% : 0.000001s : 6: predicate.set_cell_output_no_recompute 0.57% : 0.000001s : 6: predicate.shard_identity_eliminate 0.93% : 0.000002s : 9: predicate.special_op_eliminate 0.66% : 0.000001s : 6: predicate.specialize_transform 0.76% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 0.62% : 0.000001s : 6: predicate.stack_unstack_eliminate 1.48% : 0.000003s : 20: predicate.stopgrad_eliminater 0.27% : 0.000000s : 3: predicate.switch_call_monad_eliminater 0.59% : 0.000001s : 8: predicate.switch_defer_inline 1.07% : 0.000002s : 14: predicate.switch_layer_defer_inline 3.55% : 0.000006s : 24: predicate.switch_simplify 0.50% : 0.000001s : 7: predicate.tile_eliminate 0.55% : 0.000001s : 7: predicate.transpose_eliminate 1.06% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.05% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 1.82% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 0.89% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 1.49% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.12% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 1.33% : 0.000002s : 20: predicate.updatestate_pure_node_eliminater 1.88% : 0.000003s : 26: predicate.updatestate_useless_node_eliminater 0.33% : 0.000001s : 3: predicate.value_based_eliminate 0.56% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.50% : 0.000001s : 6: predicate.virtual_output_eliminate 0.33% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000189 4 8.23% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.77% : 0.000173s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 7.763312 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.00% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.00% : 0.000065s : 1: add_recomputation 0.00% : 0.000016s : 1: assign_add_opt 0.00% : 0.000185s : 1: auto_monad 0.00% : 0.000032s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.03% : 0.002002s : 1: bootstrap 0.00% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.00% : 0.000018s : 1: convert_after_rewriter 0.00% : 0.000022s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000061s : 1: distribtued_split 0.01% : 0.000493s : 1: eliminate_special_op_node 0.00% : 0.000025s : 1: environ_conv 0.00% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000009s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000052s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000533s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.00% : 0.000011s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000677s : 80: opt.transform.opt_a 0.00% : 0.000021s : 1: opt.transform.opt_after_cconv 0.00% : 0.000069s : 27: opt.transform.opt_b 0.00% : 0.000032s : 1: opt.transform.opt_trans_graph 0.00% : 0.000020s : 3: opt.transform.special_op_eliminate 0.00% : 0.000072s : 4: opt.transform.symbol_engine_opt 0.13% : 0.010427s : 1: opt_a 0.00% : 0.000087s : 1: opt_after_cconv 0.00% : 0.000138s : 1: opt_b 0.15% : 0.012016s : 1: optimize 0.00% : 0.000009s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000035s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000013s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000008s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000011s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.00% : 0.000047s : 1: pre_auto_parallel 0.00% : 0.000022s : 1: py_interpret_to_execute 0.00% : 0.000015s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000005s : 1: remove_cast_before_assign_add 0.00% : 0.000015s : 1: remove_dup_value 0.00% : 0.000290s : 1: renormalize.infer 0.00% : 0.000202s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000055s : 1: rewriter_after_opt_a 0.00% : 0.000047s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000116s : 1: symbol_engine_optimizer 99.44% : 7.720054s : 1: task_emit 0.00% : 0.000050s : 1: tuple_transform 0.19% : 0.014877s : 1: type_inference 0.00% : 0.000100s : 1: validate TotalTime = 7.72398, [21] [bootstrap]: 0.00157726 [type_inference]: 0.0148046 [auto_monad]: 0.00017063 [graph_reusing]: 2.44001e-06 [inline]: 2.2e-06 [parallel-infer-symbol]: 1.063e-05 [pre_auto_parallel]: 4.687e-05 [insert-virtual-dataset]: 4.02999e-06 [parallel-infer-symbol-second]: 5.29995e-07 [dataset_repeat_opt]: 1.25999e-06 [pipeline_split]: 1.69e-06 [optimize]: 0.012994, [52] [py_interpret_to_execute]: 2.299e-05 [rewriter_before_opt_a]: 6.296e-05 [opt_a]: 0.0110929, [2] [Cycle 1]: 0.00151288, [43] [expand_dump_flag]: 3.746e-05 [switch_simplify]: 3.608e-05 [loop_unroll]: 9.31999e-06 [a_1]: 0.00026688 [recompute_prepare]: 6.25e-06 [updatestate_depend_eliminate]: 1.214e-05 [updatestate_assign_eliminate]: 3.31999e-06 [updatestate_loads_eliminate]: 2.92e-06 [parameter_eliminate]: 8.47e-06 [a_2]: 7.716e-05 [accelerated_algorithm]: 5.61999e-06 [shard]: 3.07e-06 [meta_shard_fg_expand]: 3.24e-06 [shard_inline]: 5.43e-06 [auto_parallel]: 1.772e-05 [parallel]: 9.05999e-06 [flash_sp]: 1.112e-05 [merge_comm]: 5.97001e-06 [allreduce_fusion]: 6.19999e-06 [matmul_add_comm_reduction]: 9.6e-06 [allreduce_slice_to_reducescatter]: 5.60001e-07 [virtual_shard_identity]: 5.99e-06 [virtual_dataset]: 4.68999e-06 [get_grad_eliminate_]: 4.80999e-06 [virtual_output]: 4.62e-06 [merge_forward]: 4.11e-06 [cell_reuse_recompute_pass]: 1.84e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.084e-05 [before_grad]: 8.34001e-06 [inplace_validation]: 3.39e-06 [meta_fg_expand]: 3.27e-06 [inplace_validation_after_expand]: 4.47e-06 [flash_sp_send_recv_attached]: 3.92001e-06 [receive_attached]: 9.47999e-06 [after_resolve]: 9.17999e-06 [a_after_grad]: 9.36e-06 [special_op_eliminate]: 4.76e-06 [renormalize]: 0.00054801 [add_forward_monad_depend]: 4.37e-06 [auto_monad_grad]: 3.25e-06 [auto_monad_eliminator]: 1.287e-05 [cse]: 2.989e-05 [a_3]: 3.512e-05 [Cycle 2]: 0.00050352, [43] [expand_dump_flag]: 1.27e-06 [switch_simplify]: 5.44e-06 [loop_unroll]: 4.54001e-06 [a_1]: 9.308e-05 [recompute_prepare]: 4.02e-06 [updatestate_depend_eliminate]: 3.98999e-06 [updatestate_assign_eliminate]: 2.26e-06 [updatestate_loads_eliminate]: 2.4e-06 [parameter_eliminate]: 1.58e-06 [a_2]: 5.563e-05 [accelerated_algorithm]: 5.95e-06 [shard]: 1.47001e-06 [meta_shard_fg_expand]: 1.84e-06 [shard_inline]: 4.86e-06 [auto_parallel]: 1.026e-05 [parallel]: 5.46001e-06 [flash_sp]: 6.07e-06 [merge_comm]: 4.94001e-06 [allreduce_fusion]: 2.78e-06 [matmul_add_comm_reduction]: 6.13001e-06 [allreduce_slice_to_reducescatter]: 3.29994e-07 [virtual_shard_identity]: 5.09e-06 [virtual_dataset]: 4.43e-06 [get_grad_eliminate_]: 4.33e-06 [virtual_output]: 4.48001e-06 [merge_forward]: 3.32e-06 [cell_reuse_recompute_pass]: 1.74e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.003e-05 [before_grad]: 7.18e-06 [inplace_validation]: 2.43999e-06 [meta_fg_expand]: 2.61e-06 [inplace_validation_after_expand]: 2.69001e-06 [flash_sp_send_recv_attached]: 1.21001e-06 [receive_attached]: 1.57001e-06 [after_resolve]: 6.74e-06 [a_after_grad]: 7.09e-06 [special_op_eliminate]: 4.47e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 1.38e-06 [auto_monad_grad]: 1.35e-06 [auto_monad_eliminator]: 5.34e-06 [cse]: 1.167e-05 [a_3]: 2.549e-05 [py_interpret_to_execute_after_opt_a]: 1.304e-05 [slice_cell_reuse_recomputed_activation]: 2.28999e-06 [rewriter_after_opt_a]: 9.563e-05 [convert_after_rewriter]: 7.14001e-06 [order_py_execute_after_rewriter]: 4.27e-06 [opt_b]: 0.00014397, [1] [Cycle 1]: 0.00013782, [7] [b_1]: 8.404e-05 [b_2]: 6.02e-06 [updatestate_depend_eliminate]: 2.84999e-06 [updatestate_assign_eliminate]: 1.93999e-06 [updatestate_loads_eliminate]: 2e-06 [renormalize]: 2.50002e-07 [cse]: 1.107e-05 [optimize_parallel_all_gather_comm]: 6.02999e-06 [overlap_param_gather]: 1.958e-05 [cconv]: 2.854e-05 [loop_unroll]: 0.00066937 [opt_after_cconv]: 9.503e-05, [1] [Cycle 1]: 8.843e-05, [7] [c_1]: 2.362e-05 [parameter_eliminate]: 3.91001e-06 [updatestate_depend_eliminate]: 8.05e-06 [updatestate_assign_eliminate]: 2.36e-06 [updatestate_loads_eliminate]: 2.31e-06 [cse]: 1.749e-05 [renormalize]: 5.60001e-07 [remove_dup_value]: 1.133e-05 [tuple_transform]: 4.838e-05, [1] [Cycle 1]: 4.413e-05, [2] [d_1]: 3.521e-05 [renormalize]: 1.69995e-07 [partial_unused_args_eliminate]: 2.02e-06 [add_cache_embedding]: 1.533e-05 [add_recomputation]: 5.449e-05 [cse_after_recomputation]: 2.107e-05, [1] [Cycle 1]: 1.634e-05, [1] [cse]: 1.055e-05 [environ_conv]: 2.134e-05 [swap_dp_allreduce_reducescatter]: 5.51e-06 [bias_add_comm_swap]: 2.43e-06 [label_micro_interleaved_index]: 2.32e-06 [label_fine_grained_interleaved_index]: 2.21e-06 [merge_cast_opt]: 1.71e-06 [slice_recompute_activation]: 2.07001e-06 [micro_interleaved_order_control]: 2.06e-06 [assign_add_opt]: 1.15e-05 [ForceFp32Comm]: 1.14e-06 [remove_cast_before_assign_add]: 1.06001e-06 [full_micro_interleaved_order_control]: 2.32e-06 [reorder_send_recv_between_fp_bp]: 2.22999e-06 [comm_op_add_attrs]: 1.00999e-06 [add_comm_op_reuse_tag]: 1.09e-06 [interleave_split_concat_branches]: 8.29998e-07 [interleave_parallel_branches]: 7.99992e-07 [overlap_opt_shard_in_pipeline]: 2.622e-05 [overlap_opt_shard_grad_in_pipeline]: 2.2e-06 [control_data_broadcast_order]: 1.23e-06 [grouped_pairwise_exchange_alltoall]: 1.31001e-06 [offloading_packed_experts]: 1.484e-05 [overlap_recompute_and_grad_model_parallel]: 1.91001e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.29997e-07 [overlap_recompute_allgather_and_fa_grad]: 1.24001e-06 [overlap_grad_ring_attention]: 1.087e-05 [overlap_grad_flash_sp]: 3.209e-05 [begin_end_overlap_inline]: 8.70001e-07 [split_matmul_comm_elemetwise]: 1.86999e-06 [split_layernorm_comm]: 2.32e-06 [handle_group_info]: 1.02e-06 [symbol_engine_optimizer]: 6.518e-05, [1] [Cycle 1]: 6.103e-05, [6] [build]: 3.40999e-06 [elim_shapecalc]: 8.13001e-06 [elim_not_effective]: 1.079e-05 [opt_reshape]: 5.37001e-06 [fold_const_symbol]: 7.91e-06 [renormalize]: 1.79993e-07 [pipeline_parallel_scheduler]: 1.95e-06 [auto_monad_reorder]: 2.3e-05 [get_jit_bprop_graph]: 4.49989e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00058436 [distribtued_split]: 8.597e-05 [validate]: 5.702e-05 [task_emit]: 7.69324 [execute]: 1.131e-05 Sums bootstrap : 0.001577s : 0.02% type_inference : 0.014805s : 0.19% auto_monad : 0.000171s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000011s : 0.00% pre_auto_parallel : 0.000047s : 0.00% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000023s : 0.00% optimize.rewriter_before_opt_a : 0.000063s : 0.00% optimize.opt_a.expand_dump_flag : 0.000039s : 0.00% optimize.opt_a.switch_simplify : 0.000042s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000360s : 0.00% optimize.opt_a.recompute_prepare : 0.000010s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.000133s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000012s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000028s : 0.00% optimize.opt_a.parallel : 0.000015s : 0.00% optimize.opt_a.flash_sp : 0.000017s : 0.00% optimize.opt_a.merge_comm : 0.000011s : 0.00% optimize.opt_a.allreduce_fusion : 0.000009s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000011s : 0.00% optimize.opt_a.virtual_dataset : 0.000009s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000007s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000021s : 0.00% optimize.opt_a.before_grad : 0.000016s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000011s : 0.00% optimize.opt_a.after_resolve : 0.000016s : 0.00% optimize.opt_a.a_after_grad : 0.000016s : 0.00% optimize.opt_a.special_op_eliminate : 0.000009s : 0.00% optimize.opt_a.renormalize : 0.000548s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000005s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000018s : 0.00% optimize.opt_a.cse : 0.000042s : 0.00% optimize.opt_a.a_3 : 0.000061s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000013s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000096s : 0.00% optimize.convert_after_rewriter : 0.000007s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000084s : 0.00% optimize.opt_b.b_2 : 0.000006s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000011s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000006s : 0.00% optimize.overlap_param_gather : 0.000020s : 0.00% optimize.cconv : 0.000029s : 0.00% optimize.loop_unroll : 0.000669s : 0.01% optimize.opt_after_cconv.c_1 : 0.000024s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000017s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000011s : 0.00% optimize.tuple_transform.d_1 : 0.000035s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.00% optimize.add_recomputation : 0.000054s : 0.00% optimize.cse_after_recomputation.cse : 0.000011s : 0.00% optimize.environ_conv : 0.000021s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000012s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000026s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000015s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000011s : 0.00% optimize.overlap_grad_flash_sp : 0.000032s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000008s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000011s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000005s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000023s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000584s : 0.01% distribtued_split : 0.000086s : 0.00% validate : 0.000057s : 0.00% task_emit : 7.693242s : 99.74% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.000118 20 1.47% : 0.000002s : 2: substitution.elim_not_effective 1.32% : 0.000002s : 2: substitution.fold_const_symbol 5.01% : 0.000006s : 3: substitution.graph_param_transform 70.69% : 0.000083s : 1: substitution.inline 2.99% : 0.000004s : 4: substitution.j_node_and_user_rematch 11.47% : 0.000014s : 2: substitution.reduce_all_const_elim 3.65% : 0.000004s : 4: substitution.remove_not_recompute_node 3.40% : 0.000004s : 2: substitution.replace_old_param ------[type_inference.] 0.014758 2 97.54% : 0.014395s : 1: type_inference.infer 2.46% : 0.000363s : 1: type_inference.specialize ------[replace.] 0.000016 1 100.00% : 0.000016s : 1: replace.inline ------[match.] 0.000082 1 100.00% : 0.000082s : 1: match.inline ------[predicate.] 0.000133 740 0.90% : 0.000001s : 7: predicate.accumulaten_eliminater 1.33% : 0.000002s : 3: predicate.ad_related_special_op_eliminate 0.63% : 0.000001s : 6: predicate.addn_check_dump 1.01% : 0.000001s : 7: predicate.addn_zero_filter 0.71% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.36% : 0.000003s : 13: predicate.arithmetic_simplify 0.76% : 0.000001s : 7: predicate.cast_eliminate 0.73% : 0.000001s : 6: predicate.check_bprop_eliminate 0.66% : 0.000001s : 6: predicate.compare_switch_simplify 0.20% : 0.000000s : 3: predicate.const_output_eliminate 0.50% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.95% : 0.000003s : 7: predicate.convert_tensor_eliminate 0.69% : 0.000001s : 6: predicate.depend_value_elim 0.75% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.91% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.83% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.35% : 0.000000s : 3: predicate.elim_not_effective 0.59% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000001s : 10: predicate.environ_add_const_eliminate 0.90% : 0.000001s : 10: predicate.environ_get_add_eliminate 0.97% : 0.000001s : 10: predicate.environ_get_depend_swap 1.64% : 0.000002s : 16: predicate.environ_get_eliminate 1.02% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.81% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.48% : 0.000002s : 8: predicate.float_depend_g_call 0.61% : 0.000001s : 6: predicate.float_environ_get_switch 0.91% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.23% : 0.000000s : 3: predicate.fold_const_symbol 0.82% : 0.000001s : 6: predicate.get_grad_eliminate 0.41% : 0.000001s : 3: predicate.graph_param_transform 0.72% : 0.000001s : 6: predicate.incorporate_call 0.58% : 0.000001s : 6: predicate.incorporate_call_switch 5.87% : 0.000008s : 33: predicate.inline 0.90% : 0.000001s : 6: predicate.inline_without_move 0.38% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.27% : 0.000002s : 6: predicate.less_batch_normalization 1.55% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 1.96% : 0.000003s : 20: predicate.load_eliminater 2.22% : 0.000003s : 3: predicate.loop_unroll_after_grad 1.40% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.74% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.78% : 0.000001s : 6: predicate.merge_addn 0.68% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.69% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.60% : 0.000001s : 7: predicate.minmaximum_grad 0.96% : 0.000001s : 3: predicate.mutable_eliminate 0.43% : 0.000001s : 3: predicate.opt_reshape 0.68% : 0.000001s : 3: predicate.parallel_virtual_node 1.23% : 0.000002s : 8: predicate.partial_defer_inline 1.14% : 0.000002s : 10: predicate.partial_eliminate 0.72% : 0.000001s : 7: predicate.print_const_string_wrapper 1.04% : 0.000001s : 6: predicate.reduce_all_const_elim 1.08% : 0.000001s : 7: predicate.reduce_eliminate 0.73% : 0.000001s : 6: predicate.remove_not_recompute_node 1.17% : 0.000002s : 13: predicate.replace_applicator 0.65% : 0.000001s : 6: predicate.replace_old_param 0.26% : 0.000000s : 3: predicate.reset_defer_inline 0.77% : 0.000001s : 7: predicate.reshape_eliminate 0.86% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.46% : 0.000001s : 3: predicate.row_tensor_eliminate 0.99% : 0.000001s : 6: predicate.same_eliminate 0.61% : 0.000001s : 6: predicate.set_cell_output_no_recompute 1.13% : 0.000001s : 6: predicate.shard_identity_eliminate 1.32% : 0.000002s : 9: predicate.special_op_eliminate 0.90% : 0.000001s : 6: predicate.specialize_transform 1.13% : 0.000002s : 6: predicate.split_environ_get_set_with_tuple_value 1.11% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.17% : 0.000003s : 20: predicate.stopgrad_eliminater 0.41% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.92% : 0.000001s : 8: predicate.switch_defer_inline 1.51% : 0.000002s : 14: predicate.switch_layer_defer_inline 6.70% : 0.000009s : 24: predicate.switch_simplify 0.77% : 0.000001s : 7: predicate.tile_eliminate 0.70% : 0.000001s : 7: predicate.transpose_eliminate 1.53% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.38% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.28% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.57% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.31% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.47% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.49% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 1.84% : 0.000002s : 20: predicate.updatestate_pure_node_eliminater 2.81% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 3: predicate.value_based_eliminate 0.71% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.73% : 0.000001s : 6: predicate.virtual_output_eliminate 0.72% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000211 4 8.90% : 0.000019s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.10% : 0.000192s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 7.738227 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000060s : 1: add_recomputation 0.00% : 0.000015s : 1: assign_add_opt 0.00% : 0.000185s : 1: auto_monad 0.00% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001629s : 1: bootstrap 0.00% : 0.000033s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000011s : 1: convert_after_rewriter 0.00% : 0.000024s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000096s : 1: distribtued_split 0.01% : 0.000600s : 1: eliminate_special_op_node 0.00% : 0.000025s : 1: environ_conv 0.00% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000680s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000018s : 1: offloading_packed_experts 0.00% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000690s : 80: opt.transform.opt_a 0.00% : 0.000022s : 1: opt.transform.opt_after_cconv 0.00% : 0.000072s : 27: opt.transform.opt_b 0.00% : 0.000034s : 1: opt.transform.opt_trans_graph 0.00% : 0.000022s : 3: opt.transform.special_op_eliminate 0.00% : 0.000029s : 4: opt.transform.symbol_engine_opt 0.14% : 0.011099s : 1: opt_a 0.00% : 0.000099s : 1: opt_after_cconv 0.00% : 0.000147s : 1: opt_b 0.17% : 0.013008s : 1: optimize 0.00% : 0.000009s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000037s : 1: overlap_grad_flash_sp 0.00% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000014s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000031s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000024s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000015s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000030s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000054s : 1: pre_auto_parallel 0.00% : 0.000029s : 1: py_interpret_to_execute 0.00% : 0.000017s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000015s : 1: remove_dup_value 0.00% : 0.000320s : 1: renormalize.infer 0.00% : 0.000219s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000102s : 1: rewriter_after_opt_a 0.00% : 0.000068s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000008s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000068s : 1: symbol_engine_optimizer 99.42% : 7.693280s : 1: task_emit 0.00% : 0.000052s : 1: tuple_transform 0.19% : 0.014838s : 1: type_inference 0.00% : 0.000135s : 1: validate TotalTime = 6.96463, [21] [bootstrap]: 0.00145075 [type_inference]: 0.0145813 [auto_monad]: 0.00018539 [graph_reusing]: 2.42e-06 [inline]: 2.22e-06 [parallel-infer-symbol]: 2.76e-06 [pre_auto_parallel]: 3.909e-05 [insert-virtual-dataset]: 3.39e-06 [parallel-infer-symbol-second]: 6.19999e-07 [dataset_repeat_opt]: 1.75e-06 [pipeline_split]: 1.53e-06 [optimize]: 0.0115426, [52] [py_interpret_to_execute]: 1.423e-05 [rewriter_before_opt_a]: 6.151e-05 [opt_a]: 0.00988809, [2] [Cycle 1]: 0.00130466, [43] [expand_dump_flag]: 3.76001e-06 [switch_simplify]: 2.672e-05 [loop_unroll]: 9.25e-06 [a_1]: 0.00024242 [recompute_prepare]: 6.04999e-06 [updatestate_depend_eliminate]: 7.49e-06 [updatestate_assign_eliminate]: 3.33e-06 [updatestate_loads_eliminate]: 2.67e-06 [parameter_eliminate]: 5.05999e-06 [a_2]: 7.551e-05 [accelerated_algorithm]: 5.05999e-06 [shard]: 2.81e-06 [meta_shard_fg_expand]: 3.05999e-06 [shard_inline]: 4.99999e-06 [auto_parallel]: 1.131e-05 [parallel]: 9.05001e-06 [flash_sp]: 1.691e-05 [merge_comm]: 7.017e-05 [allreduce_fusion]: 3.78001e-06 [matmul_add_comm_reduction]: 9.05001e-06 [allreduce_slice_to_reducescatter]: 7.29997e-07 [virtual_shard_identity]: 6.19999e-06 [virtual_dataset]: 5.01001e-06 [get_grad_eliminate_]: 4.63999e-06 [virtual_output]: 4.65001e-06 [merge_forward]: 3.71999e-06 [cell_reuse_recompute_pass]: 1.60999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.126e-05 [before_grad]: 8.66e-06 [inplace_validation]: 3.18e-06 [meta_fg_expand]: 3.22e-06 [inplace_validation_after_expand]: 4.52e-06 [flash_sp_send_recv_attached]: 3.61999e-06 [receive_attached]: 2.72e-06 [after_resolve]: 8.34999e-06 [a_after_grad]: 7.43e-06 [special_op_eliminate]: 7.06999e-06 [renormalize]: 0.0003961 [add_forward_monad_depend]: 3.61e-06 [auto_monad_grad]: 2.53e-06 [auto_monad_eliminator]: 1.167e-05 [cse]: 2.553e-05 [a_3]: 3.205e-05 [Cycle 2]: 0.00048031, [43] [expand_dump_flag]: 9.79999e-07 [switch_simplify]: 5.66e-06 [loop_unroll]: 5.35999e-06 [a_1]: 9.112e-05 [recompute_prepare]: 3.94e-06 [updatestate_depend_eliminate]: 2.99001e-06 [updatestate_assign_eliminate]: 2.28999e-06 [updatestate_loads_eliminate]: 2.17999e-06 [parameter_eliminate]: 1.05001e-06 [a_2]: 5.34e-05 [accelerated_algorithm]: 4.94e-06 [shard]: 1.4e-06 [meta_shard_fg_expand]: 1.61999e-06 [shard_inline]: 4.4e-06 [auto_parallel]: 8.27e-06 [parallel]: 3.63e-06 [flash_sp]: 6.07001e-06 [merge_comm]: 3.54e-06 [allreduce_fusion]: 2.87e-06 [matmul_add_comm_reduction]: 4.68e-06 [allreduce_slice_to_reducescatter]: 4.79995e-07 [virtual_shard_identity]: 5.26999e-06 [virtual_dataset]: 4.63e-06 [get_grad_eliminate_]: 4.43999e-06 [virtual_output]: 4.26e-06 [merge_forward]: 2.4e-06 [cell_reuse_recompute_pass]: 1.91e-06 [cell_reuse_handle_not_recompute_node_pass]: 8.62e-06 [before_grad]: 6.96999e-06 [inplace_validation]: 2.23001e-06 [meta_fg_expand]: 2.58999e-06 [inplace_validation_after_expand]: 2.64999e-06 [flash_sp_send_recv_attached]: 8.60004e-07 [receive_attached]: 6.10002e-07 [after_resolve]: 6.2e-06 [a_after_grad]: 6.58999e-06 [special_op_eliminate]: 4.26e-06 [renormalize]: 7.99919e-08 [add_forward_monad_depend]: 6.19999e-07 [auto_monad_grad]: 9.79999e-07 [auto_monad_eliminator]: 5.03e-06 [cse]: 1.073e-05 [a_3]: 2.555e-05 [py_interpret_to_execute_after_opt_a]: 8.42e-06 [slice_cell_reuse_recomputed_activation]: 2.44001e-06 [rewriter_after_opt_a]: 9.692e-05 [convert_after_rewriter]: 6.28999e-06 [order_py_execute_after_rewriter]: 4.48e-06 [opt_b]: 0.00013416, [1] [Cycle 1]: 0.00012854, [7] [b_1]: 7.988e-05 [b_2]: 5.47001e-06 [updatestate_depend_eliminate]: 2.55999e-06 [updatestate_assign_eliminate]: 2.12e-06 [updatestate_loads_eliminate]: 1.96999e-06 [renormalize]: 2.50002e-07 [cse]: 9.3e-06 [optimize_parallel_all_gather_comm]: 5.54e-06 [overlap_param_gather]: 1.903e-05 [cconv]: 2.281e-05 [loop_unroll]: 0.00050513 [opt_after_cconv]: 8.449e-05, [1] [Cycle 1]: 7.89e-05, [7] [c_1]: 2.321e-05 [parameter_eliminate]: 2.21e-06 [updatestate_depend_eliminate]: 5.36001e-06 [updatestate_assign_eliminate]: 2.45e-06 [updatestate_loads_eliminate]: 2.11e-06 [cse]: 1.437e-05 [renormalize]: 4.20012e-07 [remove_dup_value]: 1.048e-05 [tuple_transform]: 4.516e-05, [1] [Cycle 1]: 4.108e-05, [2] [d_1]: 3.269e-05 [renormalize]: 1.69995e-07 [partial_unused_args_eliminate]: 2.06e-06 [add_cache_embedding]: 1.865e-05 [add_recomputation]: 5.682e-05 [cse_after_recomputation]: 2.005e-05, [1] [Cycle 1]: 1.55e-05, [1] [cse]: 1.016e-05 [environ_conv]: 1.596e-05 [swap_dp_allreduce_reducescatter]: 5.92e-06 [bias_add_comm_swap]: 2.2e-06 [label_micro_interleaved_index]: 2.01e-06 [label_fine_grained_interleaved_index]: 2.02001e-06 [merge_cast_opt]: 2.13001e-06 [slice_recompute_activation]: 1.9e-06 [micro_interleaved_order_control]: 1.76e-06 [assign_add_opt]: 1.034e-05 [ForceFp32Comm]: 8.9001e-07 [remove_cast_before_assign_add]: 1.11999e-06 [full_micro_interleaved_order_control]: 2.25e-06 [reorder_send_recv_between_fp_bp]: 2.05e-06 [comm_op_add_attrs]: 9.9001e-07 [add_comm_op_reuse_tag]: 1.05999e-06 [interleave_split_concat_branches]: 1.13e-06 [interleave_parallel_branches]: 1.02e-06 [overlap_opt_shard_in_pipeline]: 2.719e-05 [overlap_opt_shard_grad_in_pipeline]: 2.17e-06 [control_data_broadcast_order]: 1.13e-06 [grouped_pairwise_exchange_alltoall]: 1.3e-06 [offloading_packed_experts]: 1.495e-05 [overlap_recompute_and_grad_model_parallel]: 2.92e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.50007e-07 [overlap_recompute_allgather_and_fa_grad]: 1.10999e-06 [overlap_grad_ring_attention]: 1.76001e-06 [overlap_grad_flash_sp]: 2.763e-05 [begin_end_overlap_inline]: 7.7e-07 [split_matmul_comm_elemetwise]: 2.14e-06 [split_layernorm_comm]: 2.03001e-06 [handle_group_info]: 1.27e-06 [symbol_engine_optimizer]: 6.227e-05, [1] [Cycle 1]: 5.793e-05, [6] [build]: 2.80001e-06 [elim_shapecalc]: 8.08999e-06 [elim_not_effective]: 1.017e-05 [opt_reshape]: 5.26001e-06 [fold_const_symbol]: 7.92e-06 [renormalize]: 2.50002e-07 [pipeline_parallel_scheduler]: 2.06e-06 [auto_monad_reorder]: 2.363e-05 [get_jit_bprop_graph]: 5.10001e-07 [rewriter_after_jit_bprop_graph]: 4.29995e-07 [eliminate_special_op_node]: 0.00047598 [distribtued_split]: 9.323e-05 [validate]: 5.506e-05 [task_emit]: 6.9358 [execute]: 1.365e-05 Sums bootstrap : 0.001451s : 0.02% type_inference : 0.014581s : 0.21% auto_monad : 0.000185s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000039s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.00% optimize.rewriter_before_opt_a : 0.000062s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000032s : 0.00% optimize.opt_a.loop_unroll : 0.000015s : 0.00% optimize.opt_a.a_1 : 0.000334s : 0.00% optimize.opt_a.recompute_prepare : 0.000010s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000010s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000129s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000010s : 0.00% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000009s : 0.00% optimize.opt_a.auto_parallel : 0.000020s : 0.00% optimize.opt_a.parallel : 0.000013s : 0.00% optimize.opt_a.flash_sp : 0.000023s : 0.00% optimize.opt_a.merge_comm : 0.000074s : 0.00% optimize.opt_a.allreduce_fusion : 0.000007s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000014s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000011s : 0.00% optimize.opt_a.virtual_dataset : 0.000010s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000006s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000020s : 0.00% optimize.opt_a.before_grad : 0.000016s : 0.00% optimize.opt_a.inplace_validation : 0.000005s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000015s : 0.00% optimize.opt_a.a_after_grad : 0.000014s : 0.00% optimize.opt_a.special_op_eliminate : 0.000011s : 0.00% optimize.opt_a.renormalize : 0.000396s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000017s : 0.00% optimize.opt_a.cse : 0.000036s : 0.00% optimize.opt_a.a_3 : 0.000058s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000097s : 0.00% optimize.convert_after_rewriter : 0.000006s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000080s : 0.00% optimize.opt_b.b_2 : 0.000005s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000009s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000006s : 0.00% optimize.overlap_param_gather : 0.000019s : 0.00% optimize.cconv : 0.000023s : 0.00% optimize.loop_unroll : 0.000505s : 0.01% optimize.opt_after_cconv.c_1 : 0.000023s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000014s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.00% optimize.tuple_transform.d_1 : 0.000033s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000019s : 0.00% optimize.add_recomputation : 0.000057s : 0.00% optimize.cse_after_recomputation.cse : 0.000010s : 0.00% optimize.environ_conv : 0.000016s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000010s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000027s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000015s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000028s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000008s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000010s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000005s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000024s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000476s : 0.01% distribtued_split : 0.000093s : 0.00% validate : 0.000055s : 0.00% task_emit : 6.935803s : 99.72% execute : 0.000014s : 0.00% Time group info: ------[substitution.] 0.000096 20 1.73% : 0.000002s : 2: substitution.elim_not_effective 1.59% : 0.000002s : 2: substitution.fold_const_symbol 5.14% : 0.000005s : 3: substitution.graph_param_transform 67.75% : 0.000065s : 1: substitution.inline 3.61% : 0.000003s : 4: substitution.j_node_and_user_rematch 11.96% : 0.000012s : 2: substitution.reduce_all_const_elim 4.61% : 0.000004s : 4: substitution.remove_not_recompute_node 3.60% : 0.000003s : 2: substitution.replace_old_param ------[type_inference.] 0.014543 2 97.90% : 0.014237s : 1: type_inference.infer 2.10% : 0.000306s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000124 740 0.81% : 0.000001s : 7: predicate.accumulaten_eliminater 1.26% : 0.000002s : 3: predicate.ad_related_special_op_eliminate 0.64% : 0.000001s : 6: predicate.addn_check_dump 0.77% : 0.000001s : 7: predicate.addn_zero_filter 0.68% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.11% : 0.000003s : 13: predicate.arithmetic_simplify 0.74% : 0.000001s : 7: predicate.cast_eliminate 0.74% : 0.000001s : 6: predicate.check_bprop_eliminate 0.68% : 0.000001s : 6: predicate.compare_switch_simplify 0.20% : 0.000000s : 3: predicate.const_output_eliminate 0.44% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.90% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.74% : 0.000001s : 6: predicate.depend_value_elim 0.75% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.82% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.86% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.27% : 0.000000s : 3: predicate.elim_not_effective 0.61% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.09% : 0.000001s : 10: predicate.environ_add_const_eliminate 0.98% : 0.000001s : 10: predicate.environ_get_add_eliminate 0.99% : 0.000001s : 10: predicate.environ_get_depend_swap 1.87% : 0.000002s : 16: predicate.environ_get_eliminate 1.10% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.84% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.63% : 0.000002s : 8: predicate.float_depend_g_call 0.68% : 0.000001s : 6: predicate.float_environ_get_switch 1.03% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.26% : 0.000000s : 3: predicate.fold_const_symbol 0.85% : 0.000001s : 6: predicate.get_grad_eliminate 0.42% : 0.000001s : 3: predicate.graph_param_transform 0.69% : 0.000001s : 6: predicate.incorporate_call 0.58% : 0.000001s : 6: predicate.incorporate_call_switch 6.10% : 0.000008s : 33: predicate.inline 1.06% : 0.000001s : 6: predicate.inline_without_move 0.41% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.11% : 0.000001s : 6: predicate.less_batch_normalization 1.71% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.12% : 0.000003s : 20: predicate.load_eliminater 1.91% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.66% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.70% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.73% : 0.000001s : 6: predicate.merge_addn 0.73% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.71% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.99% : 0.000001s : 7: predicate.minmaximum_grad 1.04% : 0.000001s : 3: predicate.mutable_eliminate 0.48% : 0.000001s : 3: predicate.opt_reshape 0.48% : 0.000001s : 3: predicate.parallel_virtual_node 1.27% : 0.000002s : 8: predicate.partial_defer_inline 1.16% : 0.000001s : 10: predicate.partial_eliminate 0.73% : 0.000001s : 7: predicate.print_const_string_wrapper 0.84% : 0.000001s : 6: predicate.reduce_all_const_elim 1.14% : 0.000001s : 7: predicate.reduce_eliminate 0.73% : 0.000001s : 6: predicate.remove_not_recompute_node 1.19% : 0.000001s : 13: predicate.replace_applicator 0.49% : 0.000001s : 6: predicate.replace_old_param 0.22% : 0.000000s : 3: predicate.reset_defer_inline 0.85% : 0.000001s : 7: predicate.reshape_eliminate 0.72% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 3: predicate.row_tensor_eliminate 1.10% : 0.000001s : 6: predicate.same_eliminate 0.51% : 0.000001s : 6: predicate.set_cell_output_no_recompute 1.06% : 0.000001s : 6: predicate.shard_identity_eliminate 1.30% : 0.000002s : 9: predicate.special_op_eliminate 0.97% : 0.000001s : 6: predicate.specialize_transform 1.04% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 1.27% : 0.000002s : 6: predicate.stack_unstack_eliminate 2.11% : 0.000003s : 20: predicate.stopgrad_eliminater 0.42% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.93% : 0.000001s : 8: predicate.switch_defer_inline 1.60% : 0.000002s : 14: predicate.switch_layer_defer_inline 5.27% : 0.000007s : 24: predicate.switch_simplify 0.89% : 0.000001s : 7: predicate.tile_eliminate 0.76% : 0.000001s : 7: predicate.transpose_eliminate 1.55% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.47% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.39% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.38% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.42% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.48% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.12% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 2.86% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 3: predicate.value_based_eliminate 0.94% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.91% : 0.000001s : 6: predicate.virtual_output_eliminate 0.47% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000172 4 9.49% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.51% : 0.000155s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.977237 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000023s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000061s : 1: add_recomputation 0.00% : 0.000014s : 1: assign_add_opt 0.00% : 0.000199s : 1: auto_monad 0.00% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000006s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001508s : 1: bootstrap 0.00% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000010s : 1: convert_after_rewriter 0.00% : 0.000023s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000103s : 1: distribtued_split 0.01% : 0.000489s : 1: eliminate_special_op_node 0.00% : 0.000020s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000514s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000018s : 1: offloading_packed_experts 0.00% : 0.000011s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000646s : 80: opt.transform.opt_a 0.00% : 0.000022s : 1: opt.transform.opt_after_cconv 0.00% : 0.000069s : 27: opt.transform.opt_b 0.00% : 0.000031s : 1: opt.transform.opt_trans_graph 0.00% : 0.000020s : 3: opt.transform.special_op_eliminate 0.00% : 0.000028s : 4: opt.transform.symbol_engine_opt 0.14% : 0.009892s : 1: opt_a 0.00% : 0.000088s : 1: opt_after_cconv 0.00% : 0.000137s : 1: opt_b 0.17% : 0.011551s : 1: optimize 0.00% : 0.000009s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000032s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000031s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000023s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000029s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000045s : 1: pre_auto_parallel 0.00% : 0.000019s : 1: py_interpret_to_execute 0.00% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000014s : 1: remove_dup_value 0.00% : 0.000218s : 1: renormalize.infer 0.00% : 0.000172s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000103s : 1: rewriter_after_opt_a 0.00% : 0.000066s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000008s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000065s : 1: symbol_engine_optimizer 99.41% : 6.935846s : 1: task_emit 0.00% : 0.000048s : 1: tuple_transform 0.21% : 0.014608s : 1: type_inference 0.00% : 0.000136s : 1: validate TotalTime = 7.58555, [21] [bootstrap]: 0.00138008 [type_inference]: 0.0146374 [auto_monad]: 9.023e-05 [graph_reusing]: 2.51e-06 [inline]: 2.57e-06 [parallel-infer-symbol]: 9.74e-06 [pre_auto_parallel]: 4.209e-05 [insert-virtual-dataset]: 3.51999e-06 [parallel-infer-symbol-second]: 4.50003e-07 [dataset_repeat_opt]: 1.55001e-06 [pipeline_split]: 1.91999e-06 [optimize]: 0.0121721, [52] [py_interpret_to_execute]: 2.135e-05 [rewriter_before_opt_a]: 6.532e-05 [opt_a]: 0.0103456, [2] [Cycle 1]: 0.00153928, [43] [expand_dump_flag]: 5.13e-06 [switch_simplify]: 3.44e-05 [loop_unroll]: 9.41999e-06 [a_1]: 0.00028254 [recompute_prepare]: 5.84e-06 [updatestate_depend_eliminate]: 1.206e-05 [updatestate_assign_eliminate]: 3.61e-06 [updatestate_loads_eliminate]: 2.84e-06 [parameter_eliminate]: 1.037e-05 [a_2]: 7.068e-05 [accelerated_algorithm]: 5.8e-06 [shard]: 3.09999e-06 [meta_shard_fg_expand]: 3.30999e-06 [shard_inline]: 5.13e-06 [auto_parallel]: 1.888e-05 [parallel]: 9.26999e-06 [flash_sp]: 1.249e-05 [merge_comm]: 5.68001e-06 [allreduce_fusion]: 3.38e-06 [matmul_add_comm_reduction]: 1.038e-05 [allreduce_slice_to_reducescatter]: 5.50004e-07 [virtual_shard_identity]: 5.76e-06 [virtual_dataset]: 4.56e-06 [get_grad_eliminate_]: 4.78e-06 [virtual_output]: 4.53999e-06 [merge_forward]: 5.04999e-06 [cell_reuse_recompute_pass]: 1.82001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.904e-05 [before_grad]: 9.02e-06 [inplace_validation]: 3.94e-06 [meta_fg_expand]: 3.56e-06 [inplace_validation_after_expand]: 4.60999e-06 [flash_sp_send_recv_attached]: 4.07001e-06 [receive_attached]: 1.106e-05 [after_resolve]: 8.89e-06 [a_after_grad]: 7.76001e-06 [special_op_eliminate]: 6.88e-06 [renormalize]: 0.00058284 [add_forward_monad_depend]: 4.99001e-06 [auto_monad_grad]: 3.54e-06 [auto_monad_eliminator]: 1.252e-05 [cse]: 2.942e-05 [a_3]: 3.683e-05 [Cycle 2]: 0.00050931, [43] [expand_dump_flag]: 1.58e-06 [switch_simplify]: 5.76e-06 [loop_unroll]: 4.57e-06 [a_1]: 9.014e-05 [recompute_prepare]: 4.12e-06 [updatestate_depend_eliminate]: 3.81999e-06 [updatestate_assign_eliminate]: 2.68e-06 [updatestate_loads_eliminate]: 2.03001e-06 [parameter_eliminate]: 1.6e-06 [a_2]: 5.503e-05 [accelerated_algorithm]: 5.08e-06 [shard]: 2.09e-06 [meta_shard_fg_expand]: 1.9e-06 [shard_inline]: 4.63999e-06 [auto_parallel]: 1.123e-05 [parallel]: 5.36001e-06 [flash_sp]: 5.83001e-06 [merge_comm]: 3.69e-06 [allreduce_fusion]: 3.2e-06 [matmul_add_comm_reduction]: 7.5e-06 [allreduce_slice_to_reducescatter]: 5.10001e-07 [virtual_shard_identity]: 5.19e-06 [virtual_dataset]: 4.39001e-06 [get_grad_eliminate_]: 4.17001e-06 [virtual_output]: 4.15e-06 [merge_forward]: 2.97e-06 [cell_reuse_recompute_pass]: 2.08001e-06 [cell_reuse_handle_not_recompute_node_pass]: 9.67999e-06 [before_grad]: 7.35999e-06 [inplace_validation]: 2.90001e-06 [meta_fg_expand]: 2.44999e-06 [inplace_validation_after_expand]: 3.18e-06 [flash_sp_send_recv_attached]: 1.15001e-06 [receive_attached]: 1.57001e-06 [after_resolve]: 7.56999e-06 [a_after_grad]: 6.87e-06 [special_op_eliminate]: 4.28999e-06 [renormalize]: 7.99919e-08 [add_forward_monad_depend]: 1.22e-06 [auto_monad_grad]: 1.27e-06 [auto_monad_eliminator]: 6.39999e-06 [cse]: 1.454e-05 [a_3]: 2.6e-05 [py_interpret_to_execute_after_opt_a]: 1.473e-05 [slice_cell_reuse_recomputed_activation]: 2.69001e-06 [rewriter_after_opt_a]: 0.00010556 [convert_after_rewriter]: 6.79999e-06 [order_py_execute_after_rewriter]: 4.65001e-06 [opt_b]: 0.00014592, [1] [Cycle 1]: 0.00013877, [7] [b_1]: 8.212e-05 [b_2]: 6.72e-06 [updatestate_depend_eliminate]: 4.16e-06 [updatestate_assign_eliminate]: 2.31e-06 [updatestate_loads_eliminate]: 2.13001e-06 [renormalize]: 3.29994e-07 [cse]: 1.172e-05 [optimize_parallel_all_gather_comm]: 7.07e-06 [overlap_param_gather]: 2.351e-05 [cconv]: 2.752e-05 [loop_unroll]: 0.00058934 [opt_after_cconv]: 0.00010167, [1] [Cycle 1]: 9.452e-05, [7] [c_1]: 2.464e-05 [parameter_eliminate]: 3.58e-06 [updatestate_depend_eliminate]: 8.31e-06 [updatestate_assign_eliminate]: 2.71e-06 [updatestate_loads_eliminate]: 2.67e-06 [cse]: 2.092e-05 [renormalize]: 7.10002e-07 [remove_dup_value]: 1.148e-05 [tuple_transform]: 5.178e-05, [1] [Cycle 1]: 4.677e-05, [2] [d_1]: 3.705e-05 [renormalize]: 1.90004e-07 [partial_unused_args_eliminate]: 2.33999e-06 [add_cache_embedding]: 1.479e-05 [add_recomputation]: 5.928e-05 [cse_after_recomputation]: 2.278e-05, [1] [Cycle 1]: 1.691e-05, [1] [cse]: 1.048e-05 [environ_conv]: 2.471e-05 [swap_dp_allreduce_reducescatter]: 6.24001e-06 [bias_add_comm_swap]: 3.36001e-06 [label_micro_interleaved_index]: 2.37999e-06 [label_fine_grained_interleaved_index]: 2.84e-06 [merge_cast_opt]: 1.67001e-06 [slice_recompute_activation]: 2.14e-06 [micro_interleaved_order_control]: 2.63999e-06 [assign_add_opt]: 3.272e-05 [ForceFp32Comm]: 1.20999e-06 [remove_cast_before_assign_add]: 1.27e-06 [full_micro_interleaved_order_control]: 2.3e-06 [reorder_send_recv_between_fp_bp]: 2.47001e-06 [comm_op_add_attrs]: 1.22e-06 [add_comm_op_reuse_tag]: 1.16001e-06 [interleave_split_concat_branches]: 8.50006e-07 [interleave_parallel_branches]: 8.60004e-07 [overlap_opt_shard_in_pipeline]: 3.083e-05 [overlap_opt_shard_grad_in_pipeline]: 3.04e-06 [control_data_broadcast_order]: 1.22e-06 [grouped_pairwise_exchange_alltoall]: 1.63e-06 [offloading_packed_experts]: 1.50999e-06 [overlap_recompute_and_grad_model_parallel]: 2.33999e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.7e-07 [overlap_recompute_allgather_and_fa_grad]: 1.26999e-06 [overlap_grad_ring_attention]: 1.91999e-06 [overlap_grad_flash_sp]: 1.754e-05 [begin_end_overlap_inline]: 8.49992e-07 [split_matmul_comm_elemetwise]: 2.64001e-06 [split_layernorm_comm]: 1.96001e-06 [handle_group_info]: 9.19987e-07 [symbol_engine_optimizer]: 7.104e-05, [1] [Cycle 1]: 6.553e-05, [6] [build]: 3.74e-06 [elim_shapecalc]: 1.021e-05 [elim_not_effective]: 1.185e-05 [opt_reshape]: 5.35e-06 [fold_const_symbol]: 8.27e-06 [renormalize]: 4.50003e-07 [pipeline_parallel_scheduler]: 2.53001e-06 [auto_monad_reorder]: 2.3e-05 [get_jit_bprop_graph]: 5.89993e-07 [rewriter_after_jit_bprop_graph]: 4.69998e-07 [eliminate_special_op_node]: 0.00052448 [distribtued_split]: 9.779e-05 [validate]: 4.614e-05 [task_emit]: 7.55614 [execute]: 1.24e-05 Sums bootstrap : 0.001380s : 0.02% type_inference : 0.014637s : 0.19% auto_monad : 0.000090s : 0.00% graph_reusing : 0.000003s : 0.00% inline : 0.000003s : 0.00% parallel-infer-symbol : 0.000010s : 0.00% pre_auto_parallel : 0.000042s : 0.00% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000021s : 0.00% optimize.rewriter_before_opt_a : 0.000065s : 0.00% optimize.opt_a.expand_dump_flag : 0.000007s : 0.00% optimize.opt_a.switch_simplify : 0.000040s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000373s : 0.00% optimize.opt_a.recompute_prepare : 0.000010s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000012s : 0.00% optimize.opt_a.a_2 : 0.000126s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000011s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000030s : 0.00% optimize.opt_a.parallel : 0.000015s : 0.00% optimize.opt_a.flash_sp : 0.000018s : 0.00% optimize.opt_a.merge_comm : 0.000009s : 0.00% optimize.opt_a.allreduce_fusion : 0.000007s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000011s : 0.00% optimize.opt_a.virtual_dataset : 0.000009s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000008s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000029s : 0.00% optimize.opt_a.before_grad : 0.000016s : 0.00% optimize.opt_a.inplace_validation : 0.000007s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000008s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000013s : 0.00% optimize.opt_a.after_resolve : 0.000016s : 0.00% optimize.opt_a.a_after_grad : 0.000015s : 0.00% optimize.opt_a.special_op_eliminate : 0.000011s : 0.00% optimize.opt_a.renormalize : 0.000583s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000005s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000019s : 0.00% optimize.opt_a.cse : 0.000044s : 0.00% optimize.opt_a.a_3 : 0.000063s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000015s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000106s : 0.00% optimize.convert_after_rewriter : 0.000007s : 0.00% optimize.order_py_execute_after_rewriter : 0.000005s : 0.00% optimize.opt_b.b_1 : 0.000082s : 0.00% optimize.opt_b.b_2 : 0.000007s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000004s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000012s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.00% optimize.overlap_param_gather : 0.000024s : 0.00% optimize.cconv : 0.000028s : 0.00% optimize.loop_unroll : 0.000589s : 0.01% optimize.opt_after_cconv.c_1 : 0.000025s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.cse : 0.000021s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000011s : 0.00% optimize.tuple_transform.d_1 : 0.000037s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.00% optimize.add_recomputation : 0.000059s : 0.00% optimize.cse_after_recomputation.cse : 0.000010s : 0.00% optimize.environ_conv : 0.000025s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000033s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000031s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000010s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000012s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000005s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000023s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000524s : 0.01% distribtued_split : 0.000098s : 0.00% validate : 0.000046s : 0.00% task_emit : 7.556143s : 99.74% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.000122 20 2.00% : 0.000002s : 2: substitution.elim_not_effective 1.60% : 0.000002s : 2: substitution.fold_const_symbol 4.66% : 0.000006s : 3: substitution.graph_param_transform 68.89% : 0.000084s : 1: substitution.inline 3.55% : 0.000004s : 4: substitution.j_node_and_user_rematch 4.89% : 0.000006s : 2: substitution.reduce_all_const_elim 10.50% : 0.000013s : 4: substitution.remove_not_recompute_node 3.90% : 0.000005s : 2: substitution.replace_old_param ------[type_inference.] 0.014589 2 97.59% : 0.014238s : 1: type_inference.infer 2.41% : 0.000351s : 1: type_inference.specialize ------[replace.] 0.000015 1 100.00% : 0.000015s : 1: replace.inline ------[match.] 0.000083 1 100.00% : 0.000083s : 1: match.inline ------[predicate.] 0.000132 740 0.77% : 0.000001s : 7: predicate.accumulaten_eliminater 1.73% : 0.000002s : 3: predicate.ad_related_special_op_eliminate 0.62% : 0.000001s : 6: predicate.addn_check_dump 0.72% : 0.000001s : 7: predicate.addn_zero_filter 0.62% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.15% : 0.000003s : 13: predicate.arithmetic_simplify 0.64% : 0.000001s : 7: predicate.cast_eliminate 0.75% : 0.000001s : 6: predicate.check_bprop_eliminate 0.62% : 0.000001s : 6: predicate.compare_switch_simplify 0.20% : 0.000000s : 3: predicate.const_output_eliminate 0.53% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.78% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.87% : 0.000001s : 6: predicate.depend_value_elim 0.71% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.77% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.99% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.30% : 0.000000s : 3: predicate.elim_not_effective 0.50% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.01% : 0.000001s : 10: predicate.environ_add_const_eliminate 0.92% : 0.000001s : 10: predicate.environ_get_add_eliminate 0.92% : 0.000001s : 10: predicate.environ_get_depend_swap 1.62% : 0.000002s : 16: predicate.environ_get_eliminate 0.91% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.75% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.92% : 0.000003s : 8: predicate.float_depend_g_call 0.58% : 0.000001s : 6: predicate.float_environ_get_switch 0.92% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.23% : 0.000000s : 3: predicate.fold_const_symbol 0.76% : 0.000001s : 6: predicate.get_grad_eliminate 0.39% : 0.000001s : 3: predicate.graph_param_transform 0.66% : 0.000001s : 6: predicate.incorporate_call 0.57% : 0.000001s : 6: predicate.incorporate_call_switch 6.82% : 0.000009s : 33: predicate.inline 1.17% : 0.000002s : 6: predicate.inline_without_move 0.41% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.09% : 0.000001s : 6: predicate.less_batch_normalization 3.10% : 0.000004s : 13: predicate.list_to_tuple_eliminator_ 1.86% : 0.000002s : 20: predicate.load_eliminater 2.16% : 0.000003s : 3: predicate.loop_unroll_after_grad 1.62% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.73% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.67% : 0.000001s : 6: predicate.merge_addn 0.66% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.65% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.64% : 0.000001s : 7: predicate.minmaximum_grad 1.36% : 0.000002s : 3: predicate.mutable_eliminate 0.45% : 0.000001s : 3: predicate.opt_reshape 0.48% : 0.000001s : 3: predicate.parallel_virtual_node 1.47% : 0.000002s : 8: predicate.partial_defer_inline 1.08% : 0.000001s : 10: predicate.partial_eliminate 0.73% : 0.000001s : 7: predicate.print_const_string_wrapper 0.94% : 0.000001s : 6: predicate.reduce_all_const_elim 1.10% : 0.000001s : 7: predicate.reduce_eliminate 0.76% : 0.000001s : 6: predicate.remove_not_recompute_node 1.23% : 0.000002s : 13: predicate.replace_applicator 0.45% : 0.000001s : 6: predicate.replace_old_param 0.25% : 0.000000s : 3: predicate.reset_defer_inline 0.74% : 0.000001s : 7: predicate.reshape_eliminate 0.68% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.47% : 0.000001s : 3: predicate.row_tensor_eliminate 1.01% : 0.000001s : 6: predicate.same_eliminate 0.45% : 0.000001s : 6: predicate.set_cell_output_no_recompute 0.97% : 0.000001s : 6: predicate.shard_identity_eliminate 1.26% : 0.000002s : 9: predicate.special_op_eliminate 0.86% : 0.000001s : 6: predicate.specialize_transform 0.97% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 1.08% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.31% : 0.000003s : 20: predicate.stopgrad_eliminater 0.38% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.89% : 0.000001s : 8: predicate.switch_defer_inline 1.51% : 0.000002s : 14: predicate.switch_layer_defer_inline 6.19% : 0.000008s : 24: predicate.switch_simplify 0.73% : 0.000001s : 7: predicate.tile_eliminate 0.62% : 0.000001s : 7: predicate.transpose_eliminate 1.50% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.40% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.19% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.45% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.21% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.20% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.45% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 1.85% : 0.000002s : 20: predicate.updatestate_pure_node_eliminater 2.78% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 3: predicate.value_based_eliminate 0.75% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.74% : 0.000001s : 6: predicate.virtual_output_eliminate 0.50% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000197 4 9.29% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.71% : 0.000178s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 7.599038 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000064s : 1: add_recomputation 0.00% : 0.000038s : 1: assign_add_opt 0.00% : 0.000103s : 1: auto_monad 0.00% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.02% : 0.001431s : 1: bootstrap 0.00% : 0.000032s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000011s : 1: convert_after_rewriter 0.00% : 0.000026s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000109s : 1: distribtued_split 0.01% : 0.000540s : 1: eliminate_special_op_node 0.00% : 0.000029s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000604s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000700s : 80: opt.transform.opt_a 0.00% : 0.000023s : 1: opt.transform.opt_after_cconv 0.00% : 0.000071s : 27: opt.transform.opt_b 0.00% : 0.000036s : 1: opt.transform.opt_trans_graph 0.00% : 0.000025s : 3: opt.transform.special_op_eliminate 0.00% : 0.000031s : 4: opt.transform.symbol_engine_opt 0.14% : 0.010351s : 1: opt_a 0.00% : 0.000106s : 1: opt_after_cconv 0.00% : 0.000149s : 1: opt_b 0.16% : 0.012185s : 1: optimize 0.00% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000035s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000028s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000015s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000050s : 1: pre_auto_parallel 0.00% : 0.000028s : 1: py_interpret_to_execute 0.00% : 0.000019s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000015s : 1: remove_dup_value 0.00% : 0.000347s : 1: renormalize.infer 0.00% : 0.000228s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000113s : 1: rewriter_after_opt_a 0.00% : 0.000070s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000074s : 1: symbol_engine_optimizer 99.44% : 7.556182s : 1: task_emit 0.00% : 0.000055s : 1: tuple_transform 0.19% : 0.014669s : 1: type_inference 0.00% : 0.000124s : 1: validate TotalTime = 7.85725, [21] [bootstrap]: 0.00155642 [type_inference]: 0.0152125 [auto_monad]: 0.00010252 [graph_reusing]: 2.43999e-06 [inline]: 2.35e-06 [parallel-infer-symbol]: 3.45999e-06 [pre_auto_parallel]: 4.273e-05 [insert-virtual-dataset]: 3.63e-06 [parallel-infer-symbol-second]: 4.69998e-07 [dataset_repeat_opt]: 1.61999e-06 [pipeline_split]: 1.6e-06 [optimize]: 0.0128286, [52] [py_interpret_to_execute]: 1.563e-05 [rewriter_before_opt_a]: 6.311e-05 [opt_a]: 0.0110333, [2] [Cycle 1]: 0.00154667, [43] [expand_dump_flag]: 5.18e-06 [switch_simplify]: 3.618e-05 [loop_unroll]: 9.31e-06 [a_1]: 0.00028613 [recompute_prepare]: 5.64e-06 [updatestate_depend_eliminate]: 1.105e-05 [updatestate_assign_eliminate]: 3.32e-06 [updatestate_loads_eliminate]: 6.19001e-06 [parameter_eliminate]: 8.62e-06 [a_2]: 7.795e-05 [accelerated_algorithm]: 5.1e-06 [shard]: 3.39e-06 [meta_shard_fg_expand]: 3.24e-06 [shard_inline]: 5.31001e-06 [auto_parallel]: 1.828e-05 [parallel]: 1.606e-05 [flash_sp]: 1.769e-05 [merge_comm]: 1.255e-05 [allreduce_fusion]: 3.72001e-06 [matmul_add_comm_reduction]: 9.86e-06 [allreduce_slice_to_reducescatter]: 7.7e-07 [virtual_shard_identity]: 6.1e-06 [virtual_dataset]: 5.04001e-06 [get_grad_eliminate_]: 4.60001e-06 [virtual_output]: 4.55999e-06 [merge_forward]: 4.05e-06 [cell_reuse_recompute_pass]: 1.97999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.157e-05 [before_grad]: 8.74999e-06 [inplace_validation]: 3.32999e-06 [meta_fg_expand]: 3.66999e-06 [inplace_validation_after_expand]: 4.63e-06 [flash_sp_send_recv_attached]: 4.03001e-06 [receive_attached]: 9.26999e-06 [after_resolve]: 9.46001e-06 [a_after_grad]: 7.83001e-06 [special_op_eliminate]: 4.95001e-06 [renormalize]: 0.00057967 [add_forward_monad_depend]: 3.98999e-06 [auto_monad_grad]: 3.03e-06 [auto_monad_eliminator]: 1.342e-05 [cse]: 3.147e-05 [a_3]: 3.706e-05 [Cycle 2]: 0.00055615, [43] [expand_dump_flag]: 1.61001e-06 [switch_simplify]: 6.68e-06 [loop_unroll]: 4.79e-06 [a_1]: 9.795e-05 [recompute_prepare]: 4.33001e-06 [updatestate_depend_eliminate]: 3.81999e-06 [updatestate_assign_eliminate]: 2.4e-06 [updatestate_loads_eliminate]: 2.45e-06 [parameter_eliminate]: 1.52999e-06 [a_2]: 5.721e-05 [accelerated_algorithm]: 5.25e-06 [shard]: 2e-06 [meta_shard_fg_expand]: 1.81e-06 [shard_inline]: 3.312e-05 [auto_parallel]: 1.292e-05 [parallel]: 4.74001e-06 [flash_sp]: 6.75999e-06 [merge_comm]: 4.46e-06 [allreduce_fusion]: 3.21001e-06 [matmul_add_comm_reduction]: 6.14001e-06 [allreduce_slice_to_reducescatter]: 4.60001e-07 [virtual_shard_identity]: 6e-06 [virtual_dataset]: 4.50999e-06 [get_grad_eliminate_]: 4.49999e-06 [virtual_output]: 4.27e-06 [merge_forward]: 2.73e-06 [cell_reuse_recompute_pass]: 2.34999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1e-05 [before_grad]: 8.36e-06 [inplace_validation]: 2.63e-06 [meta_fg_expand]: 2.86e-06 [inplace_validation_after_expand]: 2.88e-06 [flash_sp_send_recv_attached]: 1.14e-06 [receive_attached]: 1.23e-06 [after_resolve]: 7.87001e-06 [a_after_grad]: 7.29e-06 [special_op_eliminate]: 5.01001e-06 [renormalize]: 5.00004e-08 [add_forward_monad_depend]: 1.03e-06 [auto_monad_grad]: 1.45001e-06 [auto_monad_eliminator]: 6.07999e-06 [cse]: 1.527e-05 [a_3]: 2.706e-05 [py_interpret_to_execute_after_opt_a]: 1.422e-05 [slice_cell_reuse_recomputed_activation]: 2.84001e-06 [rewriter_after_opt_a]: 9.013e-05 [convert_after_rewriter]: 1.32e-05 [order_py_execute_after_rewriter]: 4.78e-06 [opt_b]: 0.00014856, [1] [Cycle 1]: 0.00014191, [7] [b_1]: 8.666e-05 [b_2]: 5.57999e-06 [updatestate_depend_eliminate]: 3.92999e-06 [updatestate_assign_eliminate]: 2.54001e-06 [updatestate_loads_eliminate]: 2.01e-06 [renormalize]: 1.59998e-07 [cse]: 1.177e-05 [optimize_parallel_all_gather_comm]: 6.84999e-06 [overlap_param_gather]: 2.122e-05 [cconv]: 2.867e-05 [loop_unroll]: 0.00061468 [opt_after_cconv]: 9.837e-05, [1] [Cycle 1]: 9.151e-05, [7] [c_1]: 2.486e-05 [parameter_eliminate]: 3.39e-06 [updatestate_depend_eliminate]: 8.11e-06 [updatestate_assign_eliminate]: 2.4e-06 [updatestate_loads_eliminate]: 2.31e-06 [cse]: 1.95e-05 [renormalize]: 7.29997e-07 [remove_dup_value]: 1.143e-05 [tuple_transform]: 4.816e-05, [1] [Cycle 1]: 4.397e-05, [2] [d_1]: 3.492e-05 [renormalize]: 1.80007e-07 [partial_unused_args_eliminate]: 2.70001e-06 [add_cache_embedding]: 1.235e-05 [add_recomputation]: 6.515e-05 [cse_after_recomputation]: 1.903e-05, [1] [Cycle 1]: 1.447e-05, [1] [cse]: 1.015e-05 [environ_conv]: 2.583e-05 [swap_dp_allreduce_reducescatter]: 5.23e-06 [bias_add_comm_swap]: 3.05999e-06 [label_micro_interleaved_index]: 2.32999e-06 [label_fine_grained_interleaved_index]: 2.27e-06 [merge_cast_opt]: 1.52001e-06 [slice_recompute_activation]: 2.02999e-06 [micro_interleaved_order_control]: 1.81001e-06 [assign_add_opt]: 3.213e-05 [ForceFp32Comm]: 1.21999e-06 [remove_cast_before_assign_add]: 1.19e-06 [full_micro_interleaved_order_control]: 2.56e-06 [reorder_send_recv_between_fp_bp]: 2.41e-06 [comm_op_add_attrs]: 1.23e-06 [add_comm_op_reuse_tag]: 1.48e-06 [interleave_split_concat_branches]: 1.31001e-06 [interleave_parallel_branches]: 8.29998e-07 [overlap_opt_shard_in_pipeline]: 2.546e-05 [overlap_opt_shard_grad_in_pipeline]: 2.3e-06 [control_data_broadcast_order]: 1.18e-06 [grouped_pairwise_exchange_alltoall]: 1.47001e-06 [offloading_packed_experts]: 1.34e-06 [overlap_recompute_and_grad_model_parallel]: 2.33001e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.09989e-07 [overlap_recompute_allgather_and_fa_grad]: 1.27e-06 [overlap_grad_ring_attention]: 2.16e-06 [overlap_grad_flash_sp]: 1.571e-05 [begin_end_overlap_inline]: 8.89995e-07 [split_matmul_comm_elemetwise]: 2.19001e-06 [split_layernorm_comm]: 2.38999e-06 [handle_group_info]: 9.79999e-07 [symbol_engine_optimizer]: 6.885e-05, [1] [Cycle 1]: 6.44e-05, [6] [build]: 3.76e-06 [elim_shapecalc]: 9.3e-06 [elim_not_effective]: 1.122e-05 [opt_reshape]: 5.72001e-06 [fold_const_symbol]: 8.21e-06 [renormalize]: 2.59999e-07 [pipeline_parallel_scheduler]: 2.27999e-06 [auto_monad_reorder]: 2.629e-05 [get_jit_bprop_graph]: 4.90007e-07 [rewriter_after_jit_bprop_graph]: 4.39992e-07 [eliminate_special_op_node]: 0.00051903 [distribtued_split]: 9.214e-05 [validate]: 4.537e-05 [task_emit]: 7.82643 [execute]: 8.49999e-06 Sums bootstrap : 0.001556s : 0.02% type_inference : 0.015213s : 0.19% auto_monad : 0.000103s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000043s : 0.00% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.00% optimize.rewriter_before_opt_a : 0.000063s : 0.00% optimize.opt_a.expand_dump_flag : 0.000007s : 0.00% optimize.opt_a.switch_simplify : 0.000043s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000384s : 0.00% optimize.opt_a.recompute_prepare : 0.000010s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000009s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.000135s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000010s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000038s : 0.00% optimize.opt_a.auto_parallel : 0.000031s : 0.00% optimize.opt_a.parallel : 0.000021s : 0.00% optimize.opt_a.flash_sp : 0.000024s : 0.00% optimize.opt_a.merge_comm : 0.000017s : 0.00% optimize.opt_a.allreduce_fusion : 0.000007s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000012s : 0.00% optimize.opt_a.virtual_dataset : 0.000010s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000007s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000022s : 0.00% optimize.opt_a.before_grad : 0.000017s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000007s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000008s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000010s : 0.00% optimize.opt_a.after_resolve : 0.000017s : 0.00% optimize.opt_a.a_after_grad : 0.000015s : 0.00% optimize.opt_a.special_op_eliminate : 0.000010s : 0.00% optimize.opt_a.renormalize : 0.000580s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000019s : 0.00% optimize.opt_a.cse : 0.000047s : 0.00% optimize.opt_a.a_3 : 0.000064s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000014s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000090s : 0.00% optimize.convert_after_rewriter : 0.000013s : 0.00% optimize.order_py_execute_after_rewriter : 0.000005s : 0.00% optimize.opt_b.b_1 : 0.000087s : 0.00% optimize.opt_b.b_2 : 0.000006s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000004s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000012s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.00% optimize.overlap_param_gather : 0.000021s : 0.00% optimize.cconv : 0.000029s : 0.00% optimize.loop_unroll : 0.000615s : 0.01% optimize.opt_after_cconv.c_1 : 0.000025s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000019s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000011s : 0.00% optimize.tuple_transform.d_1 : 0.000035s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.00% optimize.add_recomputation : 0.000065s : 0.00% optimize.cse_after_recomputation.cse : 0.000010s : 0.00% optimize.environ_conv : 0.000026s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000005s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000032s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000025s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000009s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000011s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000006s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000026s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000519s : 0.01% distribtued_split : 0.000092s : 0.00% validate : 0.000045s : 0.00% task_emit : 7.826434s : 99.74% execute : 0.000008s : 0.00% Time group info: ------[substitution.] 0.000123 20 1.74% : 0.000002s : 2: substitution.elim_not_effective 1.33% : 0.000002s : 2: substitution.fold_const_symbol 4.39% : 0.000005s : 3: substitution.graph_param_transform 71.57% : 0.000088s : 1: substitution.inline 3.39% : 0.000004s : 4: substitution.j_node_and_user_rematch 10.28% : 0.000013s : 2: substitution.reduce_all_const_elim 3.70% : 0.000005s : 4: substitution.remove_not_recompute_node 3.60% : 0.000004s : 2: substitution.replace_old_param ------[type_inference.] 0.015164 2 97.54% : 0.014791s : 1: type_inference.infer 2.46% : 0.000373s : 1: type_inference.specialize ------[replace.] 0.000017 1 100.00% : 0.000017s : 1: replace.inline ------[match.] 0.000087 1 100.00% : 0.000087s : 1: match.inline ------[predicate.] 0.000138 740 0.68% : 0.000001s : 7: predicate.accumulaten_eliminater 1.33% : 0.000002s : 3: predicate.ad_related_special_op_eliminate 0.63% : 0.000001s : 6: predicate.addn_check_dump 0.70% : 0.000001s : 7: predicate.addn_zero_filter 0.59% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.16% : 0.000003s : 13: predicate.arithmetic_simplify 0.78% : 0.000001s : 7: predicate.cast_eliminate 0.68% : 0.000001s : 6: predicate.check_bprop_eliminate 0.64% : 0.000001s : 6: predicate.compare_switch_simplify 0.18% : 0.000000s : 3: predicate.const_output_eliminate 0.41% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.97% : 0.000003s : 7: predicate.convert_tensor_eliminate 0.72% : 0.000001s : 6: predicate.depend_value_elim 0.74% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.76% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.76% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.30% : 0.000000s : 3: predicate.elim_not_effective 0.71% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 0.99% : 0.000001s : 10: predicate.environ_add_const_eliminate 0.87% : 0.000001s : 10: predicate.environ_get_add_eliminate 0.89% : 0.000001s : 10: predicate.environ_get_depend_swap 1.53% : 0.000002s : 16: predicate.environ_get_eliminate 0.89% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.71% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.87% : 0.000003s : 8: predicate.float_depend_g_call 0.64% : 0.000001s : 6: predicate.float_environ_get_switch 0.88% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 3: predicate.fold_const_symbol 0.78% : 0.000001s : 6: predicate.get_grad_eliminate 0.41% : 0.000001s : 3: predicate.graph_param_transform 0.71% : 0.000001s : 6: predicate.incorporate_call 0.60% : 0.000001s : 6: predicate.incorporate_call_switch 6.12% : 0.000008s : 33: predicate.inline 0.95% : 0.000001s : 6: predicate.inline_without_move 0.38% : 0.000001s : 6: predicate.j_node_and_user_rematch 0.97% : 0.000001s : 6: predicate.less_batch_normalization 3.18% : 0.000004s : 13: predicate.list_to_tuple_eliminator_ 1.91% : 0.000003s : 20: predicate.load_eliminater 2.44% : 0.000003s : 3: predicate.loop_unroll_after_grad 1.31% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.89% : 0.000003s : 13: predicate.make_slice_get_slice_eliminator 0.63% : 0.000001s : 6: predicate.merge_addn 0.63% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.69% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.60% : 0.000001s : 7: predicate.minmaximum_grad 1.50% : 0.000002s : 3: predicate.mutable_eliminate 0.64% : 0.000001s : 3: predicate.opt_reshape 0.68% : 0.000001s : 3: predicate.parallel_virtual_node 1.45% : 0.000002s : 8: predicate.partial_defer_inline 1.16% : 0.000002s : 10: predicate.partial_eliminate 0.65% : 0.000001s : 7: predicate.print_const_string_wrapper 0.94% : 0.000001s : 6: predicate.reduce_all_const_elim 1.05% : 0.000001s : 7: predicate.reduce_eliminate 0.89% : 0.000001s : 6: predicate.remove_not_recompute_node 1.08% : 0.000001s : 13: predicate.replace_applicator 0.48% : 0.000001s : 6: predicate.replace_old_param 0.19% : 0.000000s : 3: predicate.reset_defer_inline 0.90% : 0.000001s : 7: predicate.reshape_eliminate 0.76% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.47% : 0.000001s : 3: predicate.row_tensor_eliminate 0.97% : 0.000001s : 6: predicate.same_eliminate 0.42% : 0.000001s : 6: predicate.set_cell_output_no_recompute 0.89% : 0.000001s : 6: predicate.shard_identity_eliminate 1.57% : 0.000002s : 9: predicate.special_op_eliminate 0.90% : 0.000001s : 6: predicate.specialize_transform 0.96% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 1.09% : 0.000001s : 6: predicate.stack_unstack_eliminate 1.79% : 0.000002s : 20: predicate.stopgrad_eliminater 0.44% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.82% : 0.000001s : 8: predicate.switch_defer_inline 1.52% : 0.000002s : 14: predicate.switch_layer_defer_inline 6.16% : 0.000008s : 24: predicate.switch_simplify 0.97% : 0.000001s : 7: predicate.tile_eliminate 0.77% : 0.000001s : 7: predicate.transpose_eliminate 2.05% : 0.000003s : 13: predicate.tuple_list_convert_item_index_to_positive 1.49% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.27% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.44% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.31% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.19% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.43% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 1.83% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 2.81% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 3: predicate.value_based_eliminate 0.81% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.71% : 0.000001s : 6: predicate.virtual_output_eliminate 0.56% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000201 4 9.83% : 0.000020s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.17% : 0.000181s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 7.871456 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000070s : 1: add_recomputation 0.00% : 0.000036s : 1: assign_add_opt 0.00% : 0.000114s : 1: auto_monad 0.00% : 0.000032s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.02% : 0.001623s : 1: bootstrap 0.00% : 0.000033s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000017s : 1: convert_after_rewriter 0.00% : 0.000022s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000103s : 1: distribtued_split 0.01% : 0.000533s : 1: eliminate_special_op_node 0.00% : 0.000030s : 1: environ_conv 0.00% : 0.000016s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000624s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000749s : 80: opt.transform.opt_a 0.00% : 0.000023s : 1: opt.transform.opt_after_cconv 0.00% : 0.000074s : 27: opt.transform.opt_b 0.00% : 0.000034s : 1: opt.transform.opt_trans_graph 0.00% : 0.000024s : 3: opt.transform.special_op_eliminate 0.00% : 0.000030s : 4: opt.transform.symbol_engine_opt 0.14% : 0.011039s : 1: opt_a 0.00% : 0.000103s : 1: opt_after_cconv 0.00% : 0.000152s : 1: opt_b 0.16% : 0.012841s : 1: optimize 0.00% : 0.000010s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000029s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000025s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000049s : 1: pre_auto_parallel 0.00% : 0.000021s : 1: py_interpret_to_execute 0.00% : 0.000018s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000015s : 1: remove_dup_value 0.00% : 0.000331s : 1: renormalize.infer 0.00% : 0.000241s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000096s : 1: rewriter_after_opt_a 0.00% : 0.000068s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000071s : 1: symbol_engine_optimizer 99.43% : 7.826482s : 1: task_emit 0.00% : 0.000051s : 1: tuple_transform 0.19% : 0.015246s : 1: type_inference 0.00% : 0.000117s : 1: validate TotalTime = 6.79442, [21] [bootstrap]: 0.0013492 [type_inference]: 0.0142854 [auto_monad]: 0.00015609 [graph_reusing]: 2.49999e-06 [inline]: 1.73e-06 [parallel-infer-symbol]: 8.99e-06 [pre_auto_parallel]: 4.203e-05 [insert-virtual-dataset]: 3.79e-06 [parallel-infer-symbol-second]: 7.10002e-07 [dataset_repeat_opt]: 1.54e-06 [pipeline_split]: 1.63e-06 [optimize]: 0.0114149, [52] [py_interpret_to_execute]: 1.911e-05 [rewriter_before_opt_a]: 6.183e-05 [opt_a]: 0.00971326, [2] [Cycle 1]: 0.0012882, [43] [expand_dump_flag]: 4.16e-06 [switch_simplify]: 2.826e-05 [loop_unroll]: 1.153e-05 [a_1]: 0.00026781 [recompute_prepare]: 7.73001e-06 [updatestate_depend_eliminate]: 6.12001e-06 [updatestate_assign_eliminate]: 3.66e-06 [updatestate_loads_eliminate]: 3.08e-06 [parameter_eliminate]: 4.43001e-06 [a_2]: 8.136e-05 [accelerated_algorithm]: 8.72e-06 [shard]: 2.58e-06 [meta_shard_fg_expand]: 3.98001e-06 [shard_inline]: 6.57e-06 [auto_parallel]: 1.09e-05 [parallel]: 9.19e-06 [flash_sp]: 1.17e-05 [merge_comm]: 6.60999e-06 [allreduce_fusion]: 4.94999e-06 [matmul_add_comm_reduction]: 9.15999e-06 [allreduce_slice_to_reducescatter]: 8.99992e-07 [virtual_shard_identity]: 7.18e-06 [virtual_dataset]: 6.17001e-06 [get_grad_eliminate_]: 5.52001e-06 [virtual_output]: 5.65e-06 [merge_forward]: 4.24001e-06 [cell_reuse_recompute_pass]: 1.9e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.94e-05 [before_grad]: 1.015e-05 [inplace_validation]: 3.40001e-06 [meta_fg_expand]: 3.66e-06 [inplace_validation_after_expand]: 4.27e-06 [flash_sp_send_recv_attached]: 4.39001e-06 [receive_attached]: 9.53001e-06 [after_resolve]: 1.007e-05 [a_after_grad]: 9.08e-06 [special_op_eliminate]: 5.71001e-06 [renormalize]: 0.00036247 [add_forward_monad_depend]: 4.23001e-06 [auto_monad_grad]: 2.32e-06 [auto_monad_eliminator]: 1.197e-05 [cse]: 2.628e-05 [a_3]: 3.854e-05 [Cycle 2]: 0.00056066, [43] [expand_dump_flag]: 1.12e-06 [switch_simplify]: 6.69e-06 [loop_unroll]: 5.48e-06 [a_1]: 0.00011635 [recompute_prepare]: 5.05001e-06 [updatestate_depend_eliminate]: 4.04e-06 [updatestate_assign_eliminate]: 2.56e-06 [updatestate_loads_eliminate]: 2.34001e-06 [parameter_eliminate]: 1.05001e-06 [a_2]: 6.691e-05 [accelerated_algorithm]: 5.76e-06 [shard]: 1.19e-06 [meta_shard_fg_expand]: 1.65001e-06 [shard_inline]: 5.68001e-06 [auto_parallel]: 8.98e-06 [parallel]: 3.91e-06 [flash_sp]: 5.95e-06 [merge_comm]: 4.04999e-06 [allreduce_fusion]: 3.66999e-06 [matmul_add_comm_reduction]: 5.32001e-06 [allreduce_slice_to_reducescatter]: 3.09999e-07 [virtual_shard_identity]: 6.33e-06 [virtual_dataset]: 5.41999e-06 [get_grad_eliminate_]: 5.06999e-06 [virtual_output]: 4.89e-06 [merge_forward]: 2.67e-06 [cell_reuse_recompute_pass]: 1.96001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.113e-05 [before_grad]: 8.84001e-06 [inplace_validation]: 2.47e-06 [meta_fg_expand]: 2.95001e-06 [inplace_validation_after_expand]: 3.14e-06 [flash_sp_send_recv_attached]: 1.05001e-06 [receive_attached]: 8.50006e-07 [after_resolve]: 7.83001e-06 [a_after_grad]: 8e-06 [special_op_eliminate]: 5.10999e-06 [renormalize]: 9.00036e-08 [add_forward_monad_depend]: 8.2e-07 [auto_monad_grad]: 1.07e-06 [auto_monad_eliminator]: 5.71e-06 [cse]: 1.127e-05 [a_3]: 3.096e-05 [py_interpret_to_execute_after_opt_a]: 8.32e-06 [slice_cell_reuse_recomputed_activation]: 2.53999e-06 [rewriter_after_opt_a]: 7.979e-05 [convert_after_rewriter]: 6.93e-06 [order_py_execute_after_rewriter]: 4.76e-06 [opt_b]: 0.00015875, [1] [Cycle 1]: 0.00015311, [7] [b_1]: 9.898e-05 [b_2]: 6.69e-06 [updatestate_depend_eliminate]: 2.77e-06 [updatestate_assign_eliminate]: 2.32001e-06 [updatestate_loads_eliminate]: 2.11e-06 [renormalize]: 3.40005e-07 [cse]: 1.03e-05 [optimize_parallel_all_gather_comm]: 6.02001e-06 [overlap_param_gather]: 1.674e-05 [cconv]: 2.323e-05 [loop_unroll]: 0.00052141 [opt_after_cconv]: 9.422e-05, [1] [Cycle 1]: 8.788e-05, [7] [c_1]: 2.792e-05 [parameter_eliminate]: 2.61e-06 [updatestate_depend_eliminate]: 5.68001e-06 [updatestate_assign_eliminate]: 2.61e-06 [updatestate_loads_eliminate]: 2.35e-06 [cse]: 1.467e-05 [renormalize]: 4.50003e-07 [remove_dup_value]: 1.145e-05 [tuple_transform]: 5.385e-05, [1] [Cycle 1]: 4.944e-05, [2] [d_1]: 3.987e-05 [renormalize]: 2.19996e-07 [partial_unused_args_eliminate]: 2.63001e-06 [add_cache_embedding]: 1.231e-05 [add_recomputation]: 5.318e-05 [cse_after_recomputation]: 2.179e-05, [1] [Cycle 1]: 1.702e-05, [1] [cse]: 1.128e-05 [environ_conv]: 1.682e-05 [swap_dp_allreduce_reducescatter]: 5.7e-06 [bias_add_comm_swap]: 2.47e-06 [label_micro_interleaved_index]: 2.02001e-06 [label_fine_grained_interleaved_index]: 2.61e-06 [merge_cast_opt]: 1.66e-06 [slice_recompute_activation]: 2e-06 [micro_interleaved_order_control]: 2.27999e-06 [assign_add_opt]: 9.05001e-06 [ForceFp32Comm]: 8.89995e-07 [remove_cast_before_assign_add]: 1.05999e-06 [full_micro_interleaved_order_control]: 2.44999e-06 [reorder_send_recv_between_fp_bp]: 2.35e-06 [comm_op_add_attrs]: 1.01999e-06 [add_comm_op_reuse_tag]: 1.4e-06 [interleave_split_concat_branches]: 8.79998e-07 [interleave_parallel_branches]: 8.29998e-07 [overlap_opt_shard_in_pipeline]: 2.299e-05 [overlap_opt_shard_grad_in_pipeline]: 2.75001e-06 [control_data_broadcast_order]: 1.21001e-06 [grouped_pairwise_exchange_alltoall]: 1.63e-06 [offloading_packed_experts]: 1.124e-05 [overlap_recompute_and_grad_model_parallel]: 2.62e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.60004e-07 [overlap_recompute_allgather_and_fa_grad]: 1.28e-06 [overlap_grad_ring_attention]: 2.09e-06 [overlap_grad_flash_sp]: 2.451e-05 [begin_end_overlap_inline]: 8.00006e-07 [split_matmul_comm_elemetwise]: 2.28999e-06 [split_layernorm_comm]: 2.06001e-06 [handle_group_info]: 1.00999e-06 [symbol_engine_optimizer]: 7.317e-05, [1] [Cycle 1]: 6.889e-05, [6] [build]: 2.83e-06 [elim_shapecalc]: 8.81001e-06 [elim_not_effective]: 1.244e-05 [opt_reshape]: 8.47e-06 [fold_const_symbol]: 9.97e-06 [renormalize]: 3.00002e-07 [pipeline_parallel_scheduler]: 1.86e-06 [auto_monad_reorder]: 2.397e-05 [get_jit_bprop_graph]: 4.29995e-07 [rewriter_after_jit_bprop_graph]: 4.50003e-07 [eliminate_special_op_node]: 0.0004762 [distribtued_split]: 7.617e-05 [validate]: 5.238e-05 [task_emit]: 6.76618 [execute]: 1.215e-05 Sums bootstrap : 0.001349s : 0.02% type_inference : 0.014285s : 0.21% auto_monad : 0.000156s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000009s : 0.00% pre_auto_parallel : 0.000042s : 0.00% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.00% optimize.rewriter_before_opt_a : 0.000062s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.00% optimize.opt_a.loop_unroll : 0.000017s : 0.00% optimize.opt_a.a_1 : 0.000384s : 0.01% optimize.opt_a.recompute_prepare : 0.000013s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000010s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000148s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000014s : 0.00% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.00% optimize.opt_a.shard_inline : 0.000012s : 0.00% optimize.opt_a.auto_parallel : 0.000020s : 0.00% optimize.opt_a.parallel : 0.000013s : 0.00% optimize.opt_a.flash_sp : 0.000018s : 0.00% optimize.opt_a.merge_comm : 0.000011s : 0.00% optimize.opt_a.allreduce_fusion : 0.000009s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000014s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000014s : 0.00% optimize.opt_a.virtual_dataset : 0.000012s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000011s : 0.00% optimize.opt_a.virtual_output : 0.000011s : 0.00% optimize.opt_a.merge_forward : 0.000007s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.00% optimize.opt_a.before_grad : 0.000019s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000007s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000010s : 0.00% optimize.opt_a.after_resolve : 0.000018s : 0.00% optimize.opt_a.a_after_grad : 0.000017s : 0.00% optimize.opt_a.special_op_eliminate : 0.000011s : 0.00% optimize.opt_a.renormalize : 0.000363s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000018s : 0.00% optimize.opt_a.cse : 0.000038s : 0.00% optimize.opt_a.a_3 : 0.000070s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000080s : 0.00% optimize.convert_after_rewriter : 0.000007s : 0.00% optimize.order_py_execute_after_rewriter : 0.000005s : 0.00% optimize.opt_b.b_1 : 0.000099s : 0.00% optimize.opt_b.b_2 : 0.000007s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000010s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000006s : 0.00% optimize.overlap_param_gather : 0.000017s : 0.00% optimize.cconv : 0.000023s : 0.00% optimize.loop_unroll : 0.000521s : 0.01% optimize.opt_after_cconv.c_1 : 0.000028s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000015s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.00% optimize.tuple_transform.d_1 : 0.000040s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.00% optimize.add_recomputation : 0.000053s : 0.00% optimize.cse_after_recomputation.cse : 0.000011s : 0.00% optimize.environ_conv : 0.000017s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000009s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000023s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000011s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000025s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000009s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000012s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000010s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000024s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000476s : 0.01% distribtued_split : 0.000076s : 0.00% validate : 0.000052s : 0.00% task_emit : 6.766176s : 99.72% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.000096 20 2.20% : 0.000002s : 2: substitution.elim_not_effective 1.93% : 0.000002s : 2: substitution.fold_const_symbol 6.49% : 0.000006s : 3: substitution.graph_param_transform 63.32% : 0.000061s : 1: substitution.inline 4.24% : 0.000004s : 4: substitution.j_node_and_user_rematch 5.18% : 0.000005s : 2: substitution.reduce_all_const_elim 12.76% : 0.000012s : 4: substitution.remove_not_recompute_node 3.88% : 0.000004s : 2: substitution.replace_old_param ------[type_inference.] 0.014245 2 97.17% : 0.013842s : 1: type_inference.infer 2.83% : 0.000403s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000060 1 100.00% : 0.000060s : 1: match.inline ------[predicate.] 0.000143 740 0.87% : 0.000001s : 7: predicate.accumulaten_eliminater 1.19% : 0.000002s : 3: predicate.ad_related_special_op_eliminate 0.68% : 0.000001s : 6: predicate.addn_check_dump 0.79% : 0.000001s : 7: predicate.addn_zero_filter 0.70% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.27% : 0.000003s : 13: predicate.arithmetic_simplify 0.70% : 0.000001s : 7: predicate.cast_eliminate 0.80% : 0.000001s : 6: predicate.check_bprop_eliminate 0.70% : 0.000001s : 6: predicate.compare_switch_simplify 0.21% : 0.000000s : 3: predicate.const_output_eliminate 0.50% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.79% : 0.000003s : 7: predicate.convert_tensor_eliminate 0.72% : 0.000001s : 6: predicate.depend_value_elim 0.73% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 1.06% : 0.000002s : 7: predicate.dict_get_item_eliminator 0.83% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.34% : 0.000000s : 3: predicate.elim_not_effective 0.54% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000002s : 10: predicate.environ_add_const_eliminate 1.00% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.00% : 0.000001s : 10: predicate.environ_get_depend_swap 1.78% : 0.000003s : 16: predicate.environ_get_eliminate 1.06% : 0.000002s : 10: predicate.environ_get_set_eliminate 0.88% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.67% : 0.000002s : 8: predicate.float_depend_g_call 0.65% : 0.000001s : 6: predicate.float_environ_get_switch 1.11% : 0.000002s : 9: predicate.float_tuple_getitem_switch 0.24% : 0.000000s : 3: predicate.fold_const_symbol 0.85% : 0.000001s : 6: predicate.get_grad_eliminate 0.43% : 0.000001s : 3: predicate.graph_param_transform 0.77% : 0.000001s : 6: predicate.incorporate_call 0.68% : 0.000001s : 6: predicate.incorporate_call_switch 6.34% : 0.000009s : 33: predicate.inline 1.21% : 0.000002s : 6: predicate.inline_without_move 0.45% : 0.000001s : 6: predicate.j_node_and_user_rematch 0.90% : 0.000001s : 6: predicate.less_batch_normalization 1.70% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.28% : 0.000003s : 20: predicate.load_eliminater 1.36% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.52% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.76% : 0.000003s : 13: predicate.make_slice_get_slice_eliminator 0.75% : 0.000001s : 6: predicate.merge_addn 0.70% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.75% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.70% : 0.000001s : 7: predicate.minmaximum_grad 0.94% : 0.000001s : 3: predicate.mutable_eliminate 0.53% : 0.000001s : 3: predicate.opt_reshape 0.44% : 0.000001s : 3: predicate.parallel_virtual_node 1.31% : 0.000002s : 8: predicate.partial_defer_inline 1.25% : 0.000002s : 10: predicate.partial_eliminate 0.83% : 0.000001s : 7: predicate.print_const_string_wrapper 1.23% : 0.000002s : 6: predicate.reduce_all_const_elim 0.96% : 0.000001s : 7: predicate.reduce_eliminate 0.75% : 0.000001s : 6: predicate.remove_not_recompute_node 1.21% : 0.000002s : 13: predicate.replace_applicator 0.52% : 0.000001s : 6: predicate.replace_old_param 0.24% : 0.000000s : 3: predicate.reset_defer_inline 0.75% : 0.000001s : 7: predicate.reshape_eliminate 0.72% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 3: predicate.row_tensor_eliminate 0.93% : 0.000001s : 6: predicate.same_eliminate 0.53% : 0.000001s : 6: predicate.set_cell_output_no_recompute 1.01% : 0.000001s : 6: predicate.shard_identity_eliminate 1.36% : 0.000002s : 9: predicate.special_op_eliminate 1.03% : 0.000001s : 6: predicate.specialize_transform 0.98% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 0.84% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.12% : 0.000003s : 20: predicate.stopgrad_eliminater 0.43% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.96% : 0.000001s : 8: predicate.switch_defer_inline 1.63% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.92% : 0.000007s : 24: predicate.switch_simplify 0.77% : 0.000001s : 7: predicate.tile_eliminate 0.80% : 0.000001s : 7: predicate.transpose_eliminate 1.65% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.31% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.62% : 0.000004s : 19: predicate.tuple_list_get_item_eliminator 1.46% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.37% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.72% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.14% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 2.98% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 3: predicate.value_based_eliminate 0.85% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.80% : 0.000001s : 6: predicate.virtual_output_eliminate 0.42% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000208 4 7.78% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.22% : 0.000191s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.806989 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000058s : 1: add_recomputation 0.00% : 0.000012s : 1: assign_add_opt 0.00% : 0.000169s : 1: auto_monad 0.00% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.02% : 0.001401s : 1: bootstrap 0.00% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000011s : 1: convert_after_rewriter 0.00% : 0.000025s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000086s : 1: distribtued_split 0.01% : 0.000489s : 1: eliminate_special_op_node 0.00% : 0.000021s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000009s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000531s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000015s : 1: offloading_packed_experts 0.00% : 0.000011s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000757s : 80: opt.transform.opt_a 0.00% : 0.000026s : 1: opt.transform.opt_after_cconv 0.00% : 0.000085s : 27: opt.transform.opt_b 0.00% : 0.000038s : 1: opt.transform.opt_trans_graph 0.00% : 0.000023s : 3: opt.transform.special_op_eliminate 0.00% : 0.000035s : 4: opt.transform.symbol_engine_opt 0.14% : 0.009717s : 1: opt_a 0.00% : 0.000098s : 1: opt_after_cconv 0.00% : 0.000162s : 1: opt_b 0.17% : 0.011423s : 1: optimize 0.00% : 0.000009s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000029s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000029s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000021s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000014s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000049s : 1: pre_auto_parallel 0.00% : 0.000025s : 1: py_interpret_to_execute 0.00% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000015s : 1: remove_dup_value 0.00% : 0.000209s : 1: renormalize.infer 0.00% : 0.000147s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000086s : 1: rewriter_after_opt_a 0.00% : 0.000066s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000076s : 1: symbol_engine_optimizer 99.40% : 6.766214s : 1: task_emit 0.00% : 0.000057s : 1: tuple_transform 0.21% : 0.014309s : 1: type_inference 0.00% : 0.000123s : 1: validate TotalTime = 8.06398, [21] [bootstrap]: 0.00184577 [type_inference]: 0.0141008 [auto_monad]: 0.00015392 [graph_reusing]: 1.86999e-06 [inline]: 2.21e-06 [parallel-infer-symbol]: 1.86e-06 [pre_auto_parallel]: 2.923e-05 [insert-virtual-dataset]: 2.5e-06 [parallel-infer-symbol-second]: 4.59986e-07 [dataset_repeat_opt]: 7.49991e-07 [pipeline_split]: 1.04999e-06 [optimize]: 0.0109027, [52] [py_interpret_to_execute]: 1.01e-05 [rewriter_before_opt_a]: 2.317e-05 [opt_a]: 0.00960218, [2] [Cycle 1]: 0.00111525, [43] [expand_dump_flag]: 2.74001e-06 [switch_simplify]: 2.044e-05 [loop_unroll]: 8.74999e-06 [a_1]: 0.00021258 [recompute_prepare]: 5.53e-06 [updatestate_depend_eliminate]: 5.78001e-06 [updatestate_assign_eliminate]: 3.03e-06 [updatestate_loads_eliminate]: 2.53e-06 [parameter_eliminate]: 3.00999e-06 [a_2]: 7.322e-05 [accelerated_algorithm]: 5.23e-06 [shard]: 1.85e-06 [meta_shard_fg_expand]: 2.3e-06 [shard_inline]: 5.2e-06 [auto_parallel]: 9.78999e-06 [parallel]: 5.51e-06 [flash_sp]: 2.143e-05 [merge_comm]: 4.89001e-06 [allreduce_fusion]: 3.08e-06 [matmul_add_comm_reduction]: 6.39999e-06 [allreduce_slice_to_reducescatter]: 5.69999e-07 [virtual_shard_identity]: 6.21e-06 [virtual_dataset]: 4.79e-06 [get_grad_eliminate_]: 4.28999e-06 [virtual_output]: 4.83e-06 [merge_forward]: 2.83e-06 [cell_reuse_recompute_pass]: 1.20999e-06 [cell_reuse_handle_not_recompute_node_pass]: 9.45e-06 [before_grad]: 1.193e-05 [inplace_validation]: 2.49001e-06 [meta_fg_expand]: 2.78e-06 [inplace_validation_after_expand]: 3.28e-06 [flash_sp_send_recv_attached]: 2.99e-06 [receive_attached]: 2.01e-06 [after_resolve]: 7.33e-06 [a_after_grad]: 7.04e-06 [special_op_eliminate]: 4.78e-06 [renormalize]: 0.00034057 [add_forward_monad_depend]: 2.29001e-06 [auto_monad_grad]: 1.82001e-06 [auto_monad_eliminator]: 8.05e-06 [cse]: 2.438e-05 [a_3]: 3.11e-05 [Cycle 2]: 0.00047332, [43] [expand_dump_flag]: 7.7e-07 [switch_simplify]: 5.45e-06 [loop_unroll]: 4.44999e-06 [a_1]: 9.27e-05 [recompute_prepare]: 3.85e-06 [updatestate_depend_eliminate]: 2.88e-06 [updatestate_assign_eliminate]: 2.48001e-06 [updatestate_loads_eliminate]: 1.99e-06 [parameter_eliminate]: 9.99993e-07 [a_2]: 5.365e-05 [accelerated_algorithm]: 4.69999e-06 [shard]: 1.02e-06 [meta_shard_fg_expand]: 1.36999e-06 [shard_inline]: 4.68999e-06 [auto_parallel]: 7.82001e-06 [parallel]: 2.79001e-06 [flash_sp]: 3.96e-06 [merge_comm]: 3.31999e-06 [allreduce_fusion]: 2.79e-06 [matmul_add_comm_reduction]: 4.33e-06 [allreduce_slice_to_reducescatter]: 2.80008e-07 [virtual_shard_identity]: 5.10001e-06 [virtual_dataset]: 4.34001e-06 [get_grad_eliminate_]: 4.09e-06 [virtual_output]: 4.04e-06 [merge_forward]: 2.4e-06 [cell_reuse_recompute_pass]: 1.43e-06 [cell_reuse_handle_not_recompute_node_pass]: 8.46e-06 [before_grad]: 6.95999e-06 [inplace_validation]: 2.05e-06 [meta_fg_expand]: 2.53e-06 [inplace_validation_after_expand]: 2.46e-06 [flash_sp_send_recv_attached]: 7.2e-07 [receive_attached]: 6.10002e-07 [after_resolve]: 6.35e-06 [a_after_grad]: 6.28001e-06 [special_op_eliminate]: 4.13001e-06 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 6.50005e-07 [auto_monad_grad]: 8.59989e-07 [auto_monad_eliminator]: 4.17e-06 [cse]: 1.023e-05 [a_3]: 2.495e-05 [py_interpret_to_execute_after_opt_a]: 7.21999e-06 [slice_cell_reuse_recomputed_activation]: 1.65e-06 [rewriter_after_opt_a]: 4.782e-05 [convert_after_rewriter]: 4.66e-06 [order_py_execute_after_rewriter]: 3.15999e-06 [opt_b]: 0.00013577, [1] [Cycle 1]: 0.00013043, [7] [b_1]: 8.064e-05 [b_2]: 5.81e-06 [updatestate_depend_eliminate]: 2.63e-06 [updatestate_assign_eliminate]: 1.91999e-06 [updatestate_loads_eliminate]: 1.69e-06 [renormalize]: 2.09999e-07 [cse]: 8.82e-06 [optimize_parallel_all_gather_comm]: 4.58e-06 [overlap_param_gather]: 2.56e-06 [cconv]: 1.404e-05 [loop_unroll]: 0.00046213 [opt_after_cconv]: 7.912e-05, [1] [Cycle 1]: 7.354e-05, [7] [c_1]: 2.268e-05 [parameter_eliminate]: 1.74e-06 [updatestate_depend_eliminate]: 4.26e-06 [updatestate_assign_eliminate]: 2.32e-06 [updatestate_loads_eliminate]: 1.83001e-06 [cse]: 1.178e-05 [renormalize]: 3.6e-07 [remove_dup_value]: 6.79999e-06 [tuple_transform]: 4.407e-05, [1] [Cycle 1]: 4.009e-05, [2] [d_1]: 3.105e-05 [renormalize]: 1.69995e-07 [partial_unused_args_eliminate]: 1.32e-06 [add_cache_embedding]: 8.03001e-06 [add_recomputation]: 3.479e-05 [cse_after_recomputation]: 1.73e-05, [1] [Cycle 1]: 1.328e-05, [1] [cse]: 8.60001e-06 [environ_conv]: 1.723e-05 [swap_dp_allreduce_reducescatter]: 4.47e-06 [bias_add_comm_swap]: 2.06e-06 [label_micro_interleaved_index]: 1.24e-06 [label_fine_grained_interleaved_index]: 1.16001e-06 [merge_cast_opt]: 9.70002e-07 [slice_recompute_activation]: 1.04999e-06 [micro_interleaved_order_control]: 1.14e-06 [assign_add_opt]: 6.74001e-06 [ForceFp32Comm]: 7.10002e-07 [remove_cast_before_assign_add]: 5.19998e-07 [full_micro_interleaved_order_control]: 1.25999e-06 [reorder_send_recv_between_fp_bp]: 1.24999e-06 [comm_op_add_attrs]: 5.29995e-07 [add_comm_op_reuse_tag]: 6.19999e-07 [interleave_split_concat_branches]: 5.10001e-07 [interleave_parallel_branches]: 4.99989e-07 [overlap_opt_shard_in_pipeline]: 9.12999e-06 [overlap_opt_shard_grad_in_pipeline]: 1.22e-06 [control_data_broadcast_order]: 6.39993e-07 [grouped_pairwise_exchange_alltoall]: 1.03e-06 [offloading_packed_experts]: 2.11e-06 [overlap_recompute_and_grad_model_parallel]: 1.14999e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.40006e-07 [overlap_recompute_allgather_and_fa_grad]: 5.90007e-07 [overlap_grad_ring_attention]: 9.60004e-07 [overlap_grad_flash_sp]: 3.067e-05 [begin_end_overlap_inline]: 4.79995e-07 [split_matmul_comm_elemetwise]: 1.62001e-06 [split_layernorm_comm]: 1.10001e-06 [handle_group_info]: 8.29998e-07 [symbol_engine_optimizer]: 6.334e-05, [1] [Cycle 1]: 5.907e-05, [6] [build]: 1.98001e-06 [elim_shapecalc]: 7.34e-06 [elim_not_effective]: 9.42001e-06 [opt_reshape]: 7.39e-06 [fold_const_symbol]: 7.51e-06 [renormalize]: 2.69996e-07 [pipeline_parallel_scheduler]: 1.17e-06 [auto_monad_reorder]: 1.599e-05 [get_jit_bprop_graph]: 3.69997e-07 [rewriter_after_jit_bprop_graph]: 3.30008e-07 [eliminate_special_op_node]: 0.00052762 [distribtued_split]: 3.171e-05 [validate]: 5.463e-05 [task_emit]: 8.03602 [execute]: 9.15e-06 Sums bootstrap : 0.001846s : 0.02% type_inference : 0.014101s : 0.18% auto_monad : 0.000154s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000029s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000010s : 0.00% optimize.rewriter_before_opt_a : 0.000023s : 0.00% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000026s : 0.00% optimize.opt_a.loop_unroll : 0.000013s : 0.00% optimize.opt_a.a_1 : 0.000305s : 0.00% optimize.opt_a.recompute_prepare : 0.000009s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000009s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000127s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000010s : 0.00% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000004s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000018s : 0.00% optimize.opt_a.parallel : 0.000008s : 0.00% optimize.opt_a.flash_sp : 0.000025s : 0.00% optimize.opt_a.merge_comm : 0.000008s : 0.00% optimize.opt_a.allreduce_fusion : 0.000006s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000011s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000011s : 0.00% optimize.opt_a.virtual_dataset : 0.000009s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000008s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000005s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000018s : 0.00% optimize.opt_a.before_grad : 0.000019s : 0.00% optimize.opt_a.inplace_validation : 0.000005s : 0.00% optimize.opt_a.meta_fg_expand : 0.000005s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000006s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000014s : 0.00% optimize.opt_a.a_after_grad : 0.000013s : 0.00% optimize.opt_a.special_op_eliminate : 0.000009s : 0.00% optimize.opt_a.renormalize : 0.000341s : 0.00% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000012s : 0.00% optimize.opt_a.cse : 0.000035s : 0.00% optimize.opt_a.a_3 : 0.000056s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000007s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000048s : 0.00% optimize.convert_after_rewriter : 0.000005s : 0.00% optimize.order_py_execute_after_rewriter : 0.000003s : 0.00% optimize.opt_b.b_1 : 0.000081s : 0.00% optimize.opt_b.b_2 : 0.000006s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000009s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000005s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000014s : 0.00% optimize.loop_unroll : 0.000462s : 0.01% optimize.opt_after_cconv.c_1 : 0.000023s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000012s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000007s : 0.00% optimize.tuple_transform.d_1 : 0.000031s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000008s : 0.00% optimize.add_recomputation : 0.000035s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000017s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000004s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000009s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000031s : 0.00% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000002s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000007s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000009s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000007s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000016s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000528s : 0.01% distribtued_split : 0.000032s : 0.00% validate : 0.000055s : 0.00% task_emit : 8.036024s : 99.77% execute : 0.000009s : 0.00% Time group info: ------[substitution.] 0.000067 20 2.39% : 0.000002s : 2: substitution.elim_not_effective 1.77% : 0.000001s : 2: substitution.fold_const_symbol 6.18% : 0.000004s : 3: substitution.graph_param_transform 60.09% : 0.000040s : 1: substitution.inline 4.61% : 0.000003s : 4: substitution.j_node_and_user_rematch 14.99% : 0.000010s : 2: substitution.reduce_all_const_elim 6.14% : 0.000004s : 4: substitution.remove_not_recompute_node 3.84% : 0.000003s : 2: substitution.replace_old_param ------[type_inference.] 0.014069 2 98.12% : 0.013804s : 1: type_inference.infer 1.88% : 0.000264s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000039 1 100.00% : 0.000039s : 1: match.inline ------[predicate.] 0.000122 740 0.82% : 0.000001s : 7: predicate.accumulaten_eliminater 1.12% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.68% : 0.000001s : 6: predicate.addn_check_dump 0.84% : 0.000001s : 7: predicate.addn_zero_filter 0.76% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.56% : 0.000003s : 13: predicate.arithmetic_simplify 0.85% : 0.000001s : 7: predicate.cast_eliminate 0.69% : 0.000001s : 6: predicate.check_bprop_eliminate 0.71% : 0.000001s : 6: predicate.compare_switch_simplify 0.22% : 0.000000s : 3: predicate.const_output_eliminate 0.44% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.51% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.68% : 0.000001s : 6: predicate.depend_value_elim 0.87% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.87% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.88% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.27% : 0.000000s : 3: predicate.elim_not_effective 0.57% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000001s : 10: predicate.environ_add_const_eliminate 0.99% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.01% : 0.000001s : 10: predicate.environ_get_depend_swap 1.79% : 0.000002s : 16: predicate.environ_get_eliminate 1.06% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.81% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.62% : 0.000002s : 8: predicate.float_depend_g_call 0.70% : 0.000001s : 6: predicate.float_environ_get_switch 1.07% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.25% : 0.000000s : 3: predicate.fold_const_symbol 0.80% : 0.000001s : 6: predicate.get_grad_eliminate 0.31% : 0.000000s : 3: predicate.graph_param_transform 0.78% : 0.000001s : 6: predicate.incorporate_call 0.63% : 0.000001s : 6: predicate.incorporate_call_switch 6.00% : 0.000007s : 33: predicate.inline 0.97% : 0.000001s : 6: predicate.inline_without_move 0.43% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.09% : 0.000001s : 6: predicate.less_batch_normalization 1.94% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.21% : 0.000003s : 20: predicate.load_eliminater 1.23% : 0.000001s : 3: predicate.loop_unroll_after_grad 1.43% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.83% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.76% : 0.000001s : 6: predicate.merge_addn 0.70% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.63% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.73% : 0.000001s : 7: predicate.minmaximum_grad 0.79% : 0.000001s : 3: predicate.mutable_eliminate 0.54% : 0.000001s : 3: predicate.opt_reshape 0.47% : 0.000001s : 3: predicate.parallel_virtual_node 1.44% : 0.000002s : 8: predicate.partial_defer_inline 1.26% : 0.000002s : 10: predicate.partial_eliminate 0.75% : 0.000001s : 7: predicate.print_const_string_wrapper 0.83% : 0.000001s : 6: predicate.reduce_all_const_elim 1.18% : 0.000001s : 7: predicate.reduce_eliminate 0.58% : 0.000001s : 6: predicate.remove_not_recompute_node 1.23% : 0.000001s : 13: predicate.replace_applicator 0.49% : 0.000001s : 6: predicate.replace_old_param 0.21% : 0.000000s : 3: predicate.reset_defer_inline 0.90% : 0.000001s : 7: predicate.reshape_eliminate 0.68% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.71% : 0.000001s : 3: predicate.row_tensor_eliminate 1.05% : 0.000001s : 6: predicate.same_eliminate 0.53% : 0.000001s : 6: predicate.set_cell_output_no_recompute 1.05% : 0.000001s : 6: predicate.shard_identity_eliminate 1.51% : 0.000002s : 9: predicate.special_op_eliminate 0.99% : 0.000001s : 6: predicate.specialize_transform 1.03% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.31% : 0.000003s : 20: predicate.stopgrad_eliminater 0.40% : 0.000000s : 3: predicate.switch_call_monad_eliminater 0.91% : 0.000001s : 8: predicate.switch_defer_inline 1.78% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.75% : 0.000006s : 24: predicate.switch_simplify 0.79% : 0.000001s : 7: predicate.tile_eliminate 0.80% : 0.000001s : 7: predicate.transpose_eliminate 1.62% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.55% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.48% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.66% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.41% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.65% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.55% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.02% : 0.000002s : 20: predicate.updatestate_pure_node_eliminater 2.95% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 3: predicate.value_based_eliminate 0.85% : 0.000001s : 6: predicate.virtual_dataset_eliminate 1.09% : 0.000001s : 6: predicate.virtual_output_eliminate 0.53% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000146 4 6.01% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.99% : 0.000137s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 8.075924 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000012s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000040s : 1: add_recomputation 0.00% : 0.000010s : 1: assign_add_opt 0.00% : 0.000165s : 1: auto_monad 0.00% : 0.000023s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001892s : 1: bootstrap 0.00% : 0.000017s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.00% : 0.000008s : 1: convert_after_rewriter 0.00% : 0.000020s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000039s : 1: distribtued_split 0.01% : 0.000540s : 1: eliminate_special_op_node 0.00% : 0.000022s : 1: environ_conv 0.00% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000007s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.000471s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.00% : 0.000009s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000606s : 80: opt.transform.opt_a 0.00% : 0.000021s : 1: opt.transform.opt_after_cconv 0.00% : 0.000070s : 27: opt.transform.opt_b 0.00% : 0.000030s : 1: opt.transform.opt_trans_graph 0.00% : 0.000018s : 3: opt.transform.special_op_eliminate 0.00% : 0.000028s : 4: opt.transform.symbol_engine_opt 0.12% : 0.009606s : 1: opt_a 0.00% : 0.000083s : 1: opt_after_cconv 0.00% : 0.000138s : 1: opt_b 0.14% : 0.010911s : 1: optimize 0.00% : 0.000008s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000006s : 1: order_py_execute_after_rewriter 0.00% : 0.000035s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000013s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000008s : 1: partial_unused_args_eliminate 0.00% : 0.000006s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.00% : 0.000035s : 1: pre_auto_parallel 0.00% : 0.000015s : 1: py_interpret_to_execute 0.00% : 0.000011s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.00% : 0.000011s : 1: remove_dup_value 0.00% : 0.000200s : 1: renormalize.infer 0.00% : 0.000135s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000053s : 1: rewriter_after_opt_a 0.00% : 0.000027s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000008s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000066s : 1: symbol_engine_optimizer 99.51% : 8.036059s : 1: task_emit 0.00% : 0.000047s : 1: tuple_transform 0.17% : 0.014126s : 1: type_inference 0.00% : 0.000093s : 1: validate [WARNING] DEVICE(173053,fffed803e0f0,python):2025-02-07-13:55:16.480.882 [mindspore/ccsrc/plugin/device/ascend/hal/device/dump/ascend_dump.cc:188] ConvertFormatForOneTensor] The host shape is empty for file: /tmp/tmpy7wgwqaf/test_dump_hccl/0/20250207135516/3//kernel_graph0_1/1/0/Data.x.2.249.1738907716909768, use device shape as host shape: [const vector]{1, 1} [WARNING] DEVICE(173036,fffedc0390f0,python):2025-02-07-13:55:16.481.121 [mindspore/ccsrc/plugin/device/ascend/hal/device/dump/ascend_dump.cc:188] ConvertFormatForOneTensor] The host shape is empty for file: /tmp/tmpy7wgwqaf/test_dump_hccl/0/20250207135516/2//kernel_graph0_1/1/0/Data.x.2.15.1738907716910025, use device shape as host shape: [const vector]{1, 1} [WARNING] DEVICE(173024,fffeb40380f0,python):2025-02-07-13:55:16.481.515 [mindspore/ccsrc/plugin/device/ascend/hal/device/dump/ascend_dump.cc:188] ConvertFormatForOneTensor] The host shape is empty for file: /tmp/tmpy7wgwqaf/test_dump_hccl/0/20250207135516/1//kernel_graph0_1/1/0/Data.x.2.55.1738907716910363, use device shape as host shape: [const vector]{1, 1} [WARNING] DEVICE(173011,fffee803c0f0,python):2025-02-07-13:55:16.485.011 [mindspore/ccsrc/plugin/device/ascend/hal/device/dump/ascend_dump.cc:188] ConvertFormatForOneTensor] The host shape is empty for file: /tmp/tmpy7wgwqaf/test_dump_hccl/0/20250207135516/0//kernel_graph0_1/1/0/Data.x.2.30.1738907716914923, use device shape as host shape: [const vector]{1, 1} [WARNING] DEVICE(173104,fffee002e0f0,python):2025-02-07-13:55:16.485.617 [mindspore/ccsrc/plugin/device/ascend/hal/device/dump/ascend_dump.cc:188] ConvertFormatForOneTensor] The host shape is empty for file: /tmp/tmpy7wgwqaf/test_dump_hccl/0/20250207135516/6//kernel_graph0_1/1/0/Data.x.2.60.1738907717061333, use device shape as host shape: [const vector]{1, 1} [WARNING] DEVICE(173073,fffebc0240f0,python):2025-02-07-13:55:16.486.364 [mindspore/ccsrc/plugin/device/ascend/hal/device/dump/ascend_dump.cc:188] ConvertFormatForOneTensor] The host shape is empty for file: /tmp/tmpy7wgwqaf/test_dump_hccl/0/20250207135516/4//kernel_graph0_1/1/0/Data.x.2.207.1738907717062030, use device shape as host shape: [const vector]{1, 1} [WARNING] DEVICE(173118,fffef80200f0,python):2025-02-07-13:55:16.486.621 [mindspore/ccsrc/plugin/device/ascend/hal/device/dump/ascend_dump.cc:188] ConvertFormatForOneTensor] The host shape is empty for file: /tmp/tmpy7wgwqaf/test_dump_hccl/0/20250207135516/7//kernel_graph0_1/1/0/Data.x.2.32.1738907717062284, use device shape as host shape: [const vector]{1, 1} [WARNING] DEVICE(173087,fffed80300f0,python):2025-02-07-13:55:16.490.710 [mindspore/ccsrc/plugin/device/ascend/hal/device/dump/ascend_dump.cc:188] ConvertFormatForOneTensor] The host shape is empty for file: /tmp/tmpy7wgwqaf/test_dump_hccl/0/20250207135516/5//kernel_graph0_1/1/0/Data.x.2.121.1738907717067348, use device shape as host shape: [const vector]{1, 1} [WARNING] DEVICE(173036,ffffa9ff1c10,python):2025-02-07-13:55:20.899.675 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3a4a86a0 is not exist. [WARNING] DEVICE(173053,ffff98e3bc10,python):2025-02-07-13:55:21.033.926 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x26ad9660 is not exist. [WARNING] DEVICE(173011,ffffb4920c10,python):2025-02-07-13:55:23.254.648 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x303b5410 is not exist. [WARNING] DEVICE(173024,ffff9891fc10,python):2025-02-07-13:55:23.385.560 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x30212f20 is not exist. [WARNING] DEVICE(173104,ffff9e959c10,python):2025-02-07-13:55:23.417.104 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4b0a1000 is not exist. [WARNING] DEVICE(173087,ffffa5985c10,python):2025-02-07-13:55:23.450.537 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x284dbce0 is not exist. [WARNING] DEVICE(173073,ffff86a8fc10,python):2025-02-07-13:55:23.494.299 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4b77ebe0 is not exist. [WARNING] DEVICE(173118,ffffb8c65c10,python):2025-02-07-13:55:24.097.766 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x1fed6e20 is not exist. ret of exec_network_cmd: 0 . ============================== 1 passed in 42.53s ============================== ff8c39f2e51611efac92c4447d93fe45/pass/test_entry_msrun_test_msrun.log0000644000175400017540000107250114751343157025033 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/msrun, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_entry_msrun.py [WARNING] ME(54123:281473096207376,MainProcess):2025-02-07-15:54:10.337.898 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(54123:281473096207376,MainProcess):2025-02-07-15:54:10.467.011 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(54123:281473096207376,MainProcess):2025-02-07-15:54:10.600.821 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(54123:281473096207376,MainProcess):2025-02-07-15:54:10.749.251 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. [WARNING] ME(54123:281473096207376,MainProcess):2025-02-07-15:54:10.900.359 [mindspore/parallel/cluster/process_entity/_api.py:223] Distributed job is spawned. Waiting all processes to exit... [WARNING] ME(54175:281473095994384,MainProcess):2025-02-07-15:54:15.681.184 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:15.683.054 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:59358, destination: 127.0.0.1:10969 [WARNING] DISTRIBUTED(54175,ffff0b7fe0f0,python):2025-02-07-15:54:15.683.060 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:59358 to 127.0.0.1:10969 is successfully created. System errno: Success [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:15.683.105 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10969 to be connected...Retry number: 1 [WARNING] ME(54186:281473137400848,MainProcess):2025-02-07-15:54:15.865.049 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:15.867.096 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:59360, destination: 127.0.0.1:10969 [WARNING] DISTRIBUTED(54186,ffff124c80f0,python):2025-02-07-15:54:15.867.096 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:59360 to 127.0.0.1:10969 is successfully created. System errno: Success [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:15.867.184 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10969 to be connected...Retry number: 1 [WARNING] ME(54208:281472863673360,MainProcess):2025-02-07-15:54:16.136.315 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] DISTRIBUTED(54208,ffff01fb90f0,python):2025-02-07-15:54:16.138.237 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:59364 to 127.0.0.1:10969 is successfully created. System errno: Success [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:16.138.215 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:59364, destination: 127.0.0.1:10969 [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:16.138.391 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:59366, destination: 127.0.0.1:10969 [WARNING] DISTRIBUTED(54208,ffff02fbb0f0,python):2025-02-07-15:54:16.138.420 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:59366 to 127.0.0.1:10969 is successfully created. System errno: Success [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:16.138.437 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10969 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:16.183.478 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:59370, destination: 127.0.0.1:10969 [WARNING] DISTRIBUTED(54175,ffff10d4c0f0,python):2025-02-07-15:54:16.183.507 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:59370 to 127.0.0.1:10969 is successfully created. System errno: Success [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:16.183.547 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10969 to be connected...Retry number: 2 [WARNING] ME(54221:281472867404816,MainProcess):2025-02-07-15:54:16.298.759 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:16.300.696 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:59372, destination: 127.0.0.1:10969 [WARNING] DISTRIBUTED(54221,ffff0235e0f0,python):2025-02-07-15:54:16.300.699 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:59372 to 127.0.0.1:10969 is successfully created. System errno: Success [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:16.300.746 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10969 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:16.367.447 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:59374, destination: 127.0.0.1:10969 [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:16.367.486 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10969 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(54186,ffff134ca0f0,python):2025-02-07-15:54:16.367.500 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:59374 to 127.0.0.1:10969 is successfully created. System errno: Success [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:16.638.992 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:16.683.904 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:16.800.921 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:59380, destination: 127.0.0.1:10969 [WARNING] DISTRIBUTED(54221,ffff033600f0,python):2025-02-07-15:54:16.800.944 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:59380 to 127.0.0.1:10969 is successfully created. System errno: Success [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:16.800.955 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10969 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:16.867.859 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:17.139.083 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:17.184.006 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:17.301.662 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:17.367.984 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:17.639.165 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:17.684.114 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:17.801.796 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:17.801.840 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 3 rank id: 3 [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:17.868.102 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:17.868.131 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 1 rank id: 1 [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:18.139.273 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:18.139.302 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 2 rank id: 2 [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:18.184.265 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:18.184.317 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 0 rank id: 0 [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:22.728.308 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(54186,ffff925e5c10,python):2025-02-07-15:54:22.728.489 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(54186,fffe86ffd0f0,python):2025-02-07-15:54:22.729.093 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:22.770.377 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(54221,ffff82468c10,python):2025-02-07-15:54:22.770.564 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(54221,fffe777fe0f0,python):2025-02-07-15:54:22.771.101 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:23.094.635 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(54175,ffff8fe68c10,python):2025-02-07-15:54:23.094.800 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(54175,fffe837fe0f0,python):2025-02-07-15:54:23.098.129 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(54175,fffe817fa0f0,python):2025-02-07-15:54:23.098.399 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:23.201.581 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(54208,ffff820d9c10,python):2025-02-07-15:54:23.201.764 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(54208,fffe7e7fc0f0,python):2025-02-07-15:54:23.202.386 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(54208,fffe7dffb0f0,python):2025-02-07-15:54:23.202.633 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(54186,fffe86ffd0f0,python):2025-02-07-15:54:23.229.433 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(54186,fffe867fc0f0,python):2025-02-07-15:54:23.229.724 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(54221,fffe777fe0f0,python):2025-02-07-15:54:23.271.404 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(54221,fffe76ffd0f0,python):2025-02-07-15:54:23.271.685 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(54175,fffe817fa0f0,python):2025-02-07-15:54:23.469.856 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(54175,fffe837fe0f0,python):2025-02-07-15:54:23.470.063 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(54208,fffe7dffb0f0,python):2025-02-07-15:54:23.588.622 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(54208,fffe7e7fc0f0,python):2025-02-07-15:54:23.589.368 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(54186,fffe867fc0f0,python):2025-02-07-15:54:23.605.209 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(54186,fffe86ffd0f0,python):2025-02-07-15:54:23.605.389 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(54221,fffe76ffd0f0,python):2025-02-07-15:54:23.645.190 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(54221,fffe777fe0f0,python):2025-02-07-15:54:23.645.771 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PROFILER(54186,ffff925e5c10,python):2025-02-07-15:54:23.685.600 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(54221,ffff82468c10,python):2025-02-07-15:54:23.685.600 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(54175,ffff8fe68c10,python):2025-02-07-15:54:23.685.600 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(54208,ffff820d9c10,python):2025-02-07-15:54:23.686.046 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] ME(54186:281473137400848,MainProcess):2025-02-07-15:54:23.725.535 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. [WARNING] ME(54175:281473095994384,MainProcess):2025-02-07-15:54:23.725.825 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. [WARNING] ME(54208:281472863673360,MainProcess):2025-02-07-15:54:23.726.967 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. [WARNING] ME(54221:281472867404816,MainProcess):2025-02-07-15:54:23.727.521 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. TotalTime = 14.1273, [21] [bootstrap]: 0.00089157 [type_inference]: 0.555479 [auto_monad]: 0.00415206 [graph_reusing]: 5.16502e-05 [inline]: 2.04984e-06 [parallel-infer-symbol]: 3.10037e-06 [pre_auto_parallel]: 0.00118502 [insert-virtual-dataset]: 4.4601e-06 [parallel-infer-symbol-second]: 1.19023e-06 [dataset_repeat_opt]: 1.4999e-06 [pipeline_split]: 1.79e-06 [optimize]: 0.277164, [52] [py_interpret_to_execute]: 0.0011453 [rewriter_before_opt_a]: 0.00308642 [opt_a]: 0.260951, [3] [Cycle 1]: 0.193535, [43] [expand_dump_flag]: 8.38698e-05 [switch_simplify]: 0.00216191 [loop_unroll]: 0.00140015 [a_1]: 0.0400203 [recompute_prepare]: 0.00036735 [updatestate_depend_eliminate]: 0.00086339 [updatestate_assign_eliminate]: 0.00016704 [updatestate_loads_eliminate]: 0.00057947 [parameter_eliminate]: 2.47802e-05 [a_2]: 0.005357 [accelerated_algorithm]: 0.00034834 [shard]: 2.44984e-06 [meta_shard_fg_expand]: 0.00012281 [shard_inline]: 0.00017487 [auto_parallel]: 0.00017072 [parallel]: 1.13803e-05 [flash_sp]: 8.30698e-05 [merge_comm]: 0.00012497 [allreduce_fusion]: 0.00011229 [matmul_add_comm_reduction]: 0.0002074 [allreduce_slice_to_reducescatter]: 6.59842e-07 [virtual_shard_identity]: 0.0001756 [virtual_dataset]: 0.00016203 [get_grad_eliminate_]: 0.0001594 [virtual_output]: 0.0001592 [merge_forward]: 0.00010946 [cell_reuse_recompute_pass]: 2.90014e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00030374 [before_grad]: 0.00029035 [inplace_validation]: 0.00014714 [meta_fg_expand]: 0.0449152 [inplace_validation_after_expand]: 0.00051492 [flash_sp_send_recv_attached]: 5.85988e-06 [receive_attached]: 1.68001e-05 [after_resolve]: 0.00086436 [a_after_grad]: 0.00123222 [special_op_eliminate]: 0.000615 [renormalize]: 0.0734763 [add_forward_monad_depend]: 0.00036471 [auto_monad_grad]: 9.67397e-05 [auto_monad_eliminator]: 0.00114831 [cse]: 0.0027247 [a_3]: 0.0131506 [Cycle 2]: 0.0509092, [43] [expand_dump_flag]: 3.218e-05 [switch_simplify]: 0.00082403 [loop_unroll]: 0.00082252 [a_1]: 0.0227689 [recompute_prepare]: 0.00015801 [updatestate_depend_eliminate]: 0.00063224 [updatestate_assign_eliminate]: 9.36999e-05 [updatestate_loads_eliminate]: 0.00019709 [parameter_eliminate]: 5.25964e-06 [a_2]: 0.00188542 [accelerated_algorithm]: 0.00013502 [shard]: 2.48011e-06 [meta_shard_fg_expand]: 7.234e-05 [shard_inline]: 0.0001158 [auto_parallel]: 0.00010207 [parallel]: 1.335e-05 [flash_sp]: 4.97978e-06 [merge_comm]: 8.57799e-05 [allreduce_fusion]: 7.65398e-05 [matmul_add_comm_reduction]: 0.00011103 [allreduce_slice_to_reducescatter]: 5.80214e-07 [virtual_shard_identity]: 0.00012074 [virtual_dataset]: 0.00011203 [get_grad_eliminate_]: 0.0001095 [virtual_output]: 0.00011168 [merge_forward]: 7.33398e-05 [cell_reuse_recompute_pass]: 2.41026e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021104 [before_grad]: 0.00019965 [inplace_validation]: 6.853e-05 [meta_fg_expand]: 0.00227998 [inplace_validation_after_expand]: 0.00068612 [flash_sp_send_recv_attached]: 3.08035e-06 [receive_attached]: 2.06986e-06 [after_resolve]: 0.00016262 [a_after_grad]: 0.00019272 [special_op_eliminate]: 0.00011428 [renormalize]: 0.0127128 [add_forward_monad_depend]: 5.01005e-06 [auto_monad_grad]: 2.36975e-06 [auto_monad_eliminator]: 0.00028744 [cse]: 0.00403896 [a_3]: 0.00089057 [Cycle 3]: 0.0102845, [43] [expand_dump_flag]: 3.58978e-06 [switch_simplify]: 0.00011552 [loop_unroll]: 0.0001111 [a_1]: 0.00380558 [recompute_prepare]: 0.0001144 [updatestate_depend_eliminate]: 0.00013193 [updatestate_assign_eliminate]: 7.79699e-05 [updatestate_loads_eliminate]: 7.946e-05 [parameter_eliminate]: 4.82984e-06 [a_2]: 0.00181998 [accelerated_algorithm]: 0.00013715 [shard]: 2.76975e-06 [meta_shard_fg_expand]: 6.32298e-05 [shard_inline]: 0.00011553 [auto_parallel]: 9.59998e-05 [parallel]: 1.228e-05 [flash_sp]: 2.88989e-06 [merge_comm]: 8.62801e-05 [allreduce_fusion]: 7.851e-05 [matmul_add_comm_reduction]: 0.00010976 [allreduce_slice_to_reducescatter]: 9.00123e-07 [virtual_shard_identity]: 0.00012058 [virtual_dataset]: 0.00011205 [get_grad_eliminate_]: 0.00015318 [virtual_output]: 0.00011681 [merge_forward]: 7.95098e-05 [cell_reuse_recompute_pass]: 3.50969e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021311 [before_grad]: 0.00020038 [inplace_validation]: 7.11503e-05 [meta_fg_expand]: 9.00598e-05 [inplace_validation_after_expand]: 9.58601e-05 [flash_sp_send_recv_attached]: 2.56998e-06 [receive_attached]: 1.64984e-06 [after_resolve]: 0.00014169 [a_after_grad]: 0.00018917 [special_op_eliminate]: 0.00011288 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 3.32994e-06 [auto_monad_grad]: 3.45008e-06 [auto_monad_eliminator]: 0.00019926 [cse]: 0.00034806 [a_3]: 0.00080431 [py_interpret_to_execute_after_opt_a]: 0.00012488 [slice_cell_reuse_recomputed_activation]: 3.13995e-06 [rewriter_after_opt_a]: 0.0011672 [convert_after_rewriter]: 0.00011462 [order_py_execute_after_rewriter]: 8.21599e-05 [opt_b]: 0.00351369, [1] [Cycle 1]: 0.00350528, [7] [b_1]: 0.00270703 [b_2]: 0.00012707 [updatestate_depend_eliminate]: 9.85102e-05 [updatestate_assign_eliminate]: 7.27102e-05 [updatestate_loads_eliminate]: 8.31499e-05 [renormalize]: 4.00003e-07 [cse]: 0.00035919 [optimize_parallel_all_gather_comm]: 0.00015571 [overlap_param_gather]: 6.63009e-06 [cconv]: 7.74399e-05 [loop_unroll]: 0.0010743 [opt_after_cconv]: 0.00143024, [1] [Cycle 1]: 0.00142282, [7] [c_1]: 0.00072236 [parameter_eliminate]: 3.01981e-06 [updatestate_depend_eliminate]: 0.00011385 [updatestate_assign_eliminate]: 8.09701e-05 [updatestate_loads_eliminate]: 9.10801e-05 [cse]: 0.00035488 [renormalize]: 6.20261e-07 [remove_dup_value]: 0.00050443 [tuple_transform]: 0.00100094, [1] [Cycle 1]: 0.00099331, [2] [d_1]: 0.00097346 [renormalize]: 4.49829e-07 [partial_unused_args_eliminate]: 3.53996e-06 [add_cache_embedding]: 0.00014124 [add_recomputation]: 0.00072787 [cse_after_recomputation]: 0.00027426, [1] [Cycle 1]: 0.00026434, [1] [cse]: 0.00025253 [environ_conv]: 0.00011604 [swap_dp_allreduce_reducescatter]: 0.00012489 [bias_add_comm_swap]: 2.35997e-06 [label_micro_interleaved_index]: 2.47033e-06 [label_fine_grained_interleaved_index]: 2.11969e-06 [merge_cast_opt]: 1.36998e-06 [slice_recompute_activation]: 2.08011e-06 [micro_interleaved_order_control]: 1.79e-06 [assign_add_opt]: 1.39102e-05 [ForceFp32Comm]: 9.29926e-07 [remove_cast_before_assign_add]: 1.39e-06 [full_micro_interleaved_order_control]: 2.0396e-06 [reorder_send_recv_between_fp_bp]: 2.21003e-06 [comm_op_add_attrs]: 1.05985e-06 [add_comm_op_reuse_tag]: 1.06031e-06 [interleave_split_concat_branches]: 8.30274e-07 [interleave_parallel_branches]: 9.49949e-07 [overlap_opt_shard_in_pipeline]: 1.57203e-05 [overlap_opt_shard_grad_in_pipeline]: 2.33017e-06 [control_data_broadcast_order]: 1.43982e-06 [grouped_pairwise_exchange_alltoall]: 1.26008e-06 [offloading_packed_experts]: 1.53016e-06 [overlap_recompute_and_grad_model_parallel]: 2.10991e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.07009e-06 [overlap_recompute_allgather_and_fa_grad]: 1.23028e-06 [overlap_grad_ring_attention]: 1.91992e-06 [overlap_grad_flash_sp]: 0.00013562 [begin_end_overlap_inline]: 9.69972e-07 [split_matmul_comm_elemetwise]: 2.03028e-06 [split_layernorm_comm]: 1.8701e-06 [handle_group_info]: 9.80217e-07 [symbol_engine_optimizer]: 0.00073096, [1] [Cycle 1]: 0.00072433, [6] [build]: 5.38998e-05 [elim_shapecalc]: 0.00012529 [elim_not_effective]: 0.00019982 [opt_reshape]: 0.00011649 [fold_const_symbol]: 0.00019152 [renormalize]: 4.70318e-07 [pipeline_parallel_scheduler]: 2.12993e-06 [auto_monad_reorder]: 0.00035772 [get_jit_bprop_graph]: 7.79983e-07 [rewriter_after_jit_bprop_graph]: 4.60073e-07 [eliminate_special_op_node]: 0.00099916 [distribtued_split]: 0.00035147 [validate]: 0.00029765 [task_emit]: 13.2858 [execute]: 1.14897e-05 Sums bootstrap : 0.000892s : 0.01% type_inference : 0.555479s : 3.93% auto_monad : 0.004152s : 0.03% graph_reusing : 0.000052s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.001185s : 0.01% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.001145s : 0.01% optimize.rewriter_before_opt_a : 0.003086s : 0.02% optimize.opt_a.expand_dump_flag : 0.000120s : 0.00% optimize.opt_a.switch_simplify : 0.003101s : 0.02% optimize.opt_a.loop_unroll : 0.002334s : 0.02% optimize.opt_a.a_1 : 0.066595s : 0.47% optimize.opt_a.recompute_prepare : 0.000640s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.001628s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000339s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000856s : 0.01% optimize.opt_a.parameter_eliminate : 0.000035s : 0.00% optimize.opt_a.a_2 : 0.009062s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000621s : 0.00% optimize.opt_a.shard : 0.000008s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000258s : 0.00% optimize.opt_a.shard_inline : 0.000406s : 0.00% optimize.opt_a.auto_parallel : 0.000369s : 0.00% optimize.opt_a.parallel : 0.000037s : 0.00% optimize.opt_a.flash_sp : 0.000091s : 0.00% optimize.opt_a.merge_comm : 0.000297s : 0.00% optimize.opt_a.allreduce_fusion : 0.000267s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000428s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000417s : 0.00% optimize.opt_a.virtual_dataset : 0.000386s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000422s : 0.00% optimize.opt_a.virtual_output : 0.000388s : 0.00% optimize.opt_a.merge_forward : 0.000262s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000728s : 0.01% optimize.opt_a.before_grad : 0.000690s : 0.00% optimize.opt_a.inplace_validation : 0.000287s : 0.00% optimize.opt_a.meta_fg_expand : 0.047285s : 0.33% optimize.opt_a.inplace_validation_after_expand : 0.001297s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000012s : 0.00% optimize.opt_a.receive_attached : 0.000021s : 0.00% optimize.opt_a.after_resolve : 0.001169s : 0.01% optimize.opt_a.a_after_grad : 0.001614s : 0.01% optimize.opt_a.special_op_eliminate : 0.000842s : 0.01% optimize.opt_a.renormalize : 0.086189s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000373s : 0.00% optimize.opt_a.auto_monad_grad : 0.000103s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001635s : 0.01% optimize.opt_a.cse : 0.007112s : 0.05% optimize.opt_a.a_3 : 0.014845s : 0.11% optimize.py_interpret_to_execute_after_opt_a : 0.000125s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001167s : 0.01% optimize.convert_after_rewriter : 0.000115s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.002707s : 0.02% optimize.opt_b.b_2 : 0.000127s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000099s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000073s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000083s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000359s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000156s : 0.00% optimize.overlap_param_gather : 0.000007s : 0.00% optimize.cconv : 0.000077s : 0.00% optimize.loop_unroll : 0.001074s : 0.01% optimize.opt_after_cconv.c_1 : 0.000722s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000114s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000081s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.cse : 0.000355s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000504s : 0.00% optimize.tuple_transform.d_1 : 0.000973s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000141s : 0.00% optimize.add_recomputation : 0.000728s : 0.01% optimize.cse_after_recomputation.cse : 0.000253s : 0.00% optimize.environ_conv : 0.000116s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000125s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000014s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000016s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000136s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000054s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000125s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000200s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000116s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000192s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000358s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000999s : 0.01% distribtued_split : 0.000351s : 0.00% validate : 0.000298s : 0.00% task_emit : 13.285838s : 94.10% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.017607 4191 1.53% : 0.000269s : 57: substitution.arithmetic_simplify 0.32% : 0.000057s : 16: substitution.cast_eliminate 0.31% : 0.000054s : 48: substitution.depend_value_elim 0.16% : 0.000028s : 97: substitution.elim_not_effective 0.12% : 0.000021s : 16: substitution.environ_get_add_eliminate 0.05% : 0.000009s : 8: substitution.environ_get_depend_swap 0.17% : 0.000030s : 32: substitution.environ_get_eliminate 0.29% : 0.000051s : 16: substitution.environ_get_set_eliminate 0.42% : 0.000074s : 110: substitution.float_depend_g_call 0.06% : 0.000011s : 16: substitution.float_environ_get_switch 0.13% : 0.000022s : 14: substitution.float_tuple_getitem_switch 0.16% : 0.000028s : 97: substitution.fold_const_symbol 6.14% : 0.001081s : 8: substitution.getattr_setattr_resolve 0.47% : 0.000083s : 120: substitution.graph_param_transform 0.07% : 0.000012s : 20: substitution.incorporate_call 0.05% : 0.000010s : 20: substitution.incorporate_call_switch 66.97% : 0.011792s : 369: substitution.inline 0.86% : 0.000151s : 28: substitution.inline_without_move 0.75% : 0.000133s : 338: substitution.j_node_and_user_rematch 0.95% : 0.000168s : 36: substitution.less_batch_normalization 0.42% : 0.000074s : 158: substitution.load_eliminater 0.43% : 0.000076s : 72: substitution.minmaximum_grad 0.01% : 0.000001s : 1: substitution.opt_reshape 0.14% : 0.000024s : 8: substitution.partial_defer_inline 1.23% : 0.000216s : 110: substitution.partial_eliminate 0.12% : 0.000021s : 32: substitution.reduce_all_const_elim 0.99% : 0.000173s : 338: substitution.remove_not_recompute_node 2.90% : 0.000511s : 263: substitution.replace_applicator 0.40% : 0.000070s : 148: substitution.replace_old_param 0.09% : 0.000016s : 2: substitution.reshape_eliminate 0.07% : 0.000013s : 10: substitution.set_cell_output_no_recompute 0.04% : 0.000007s : 2: substitution.specialize_transform 0.21% : 0.000037s : 32: substitution.split_environ_get_set_with_tuple_value 0.39% : 0.000070s : 31: substitution.switch_simplify 1.34% : 0.000236s : 76: substitution.tuple_list_convert_item_index_to_positive 0.71% : 0.000124s : 92: substitution.tuple_list_get_item_const_eliminator 1.22% : 0.000215s : 92: substitution.tuple_list_get_item_depend_reorder 3.76% : 0.000662s : 283: substitution.tuple_list_get_item_eliminator 0.81% : 0.000143s : 92: substitution.tuple_list_get_set_item_eliminator 2.06% : 0.000363s : 416: substitution.updatestate_pure_node_eliminater 2.67% : 0.000471s : 467: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.554728 2 92.21% : 0.511520s : 1: type_inference.infer 7.79% : 0.043208s : 1: type_inference.specialize ------[replace.] 0.006514 662 0.10% : 0.000007s : 1: replace.arithmetic_simplify 1.40% : 0.000091s : 16: replace.cast_eliminate 0.44% : 0.000029s : 3: replace.depend_value_elim 2.05% : 0.000134s : 8: replace.environ_get_set_eliminate 1.60% : 0.000104s : 6: replace.getattr_setattr_resolve 48.97% : 0.003190s : 360: replace.inline 7.41% : 0.000483s : 32: replace.partial_eliminate 2.94% : 0.000191s : 9: replace.replace_applicator 5.58% : 0.000363s : 31: replace.switch_simplify 2.05% : 0.000133s : 16: replace.tuple_list_get_item_depend_reorder 27.05% : 0.001762s : 179: replace.tuple_list_get_item_eliminator 0.40% : 0.000026s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.013376 662 0.11% : 0.000015s : 1: match.arithmetic_simplify 0.38% : 0.000050s : 16: match.cast_eliminate 0.01% : 0.000001s : 3: match.depend_value_elim 0.29% : 0.000039s : 8: match.environ_get_set_eliminate 7.22% : 0.000965s : 6: match.getattr_setattr_resolve 86.71% : 0.011598s : 360: match.inline 1.09% : 0.000146s : 32: match.partial_eliminate 0.40% : 0.000054s : 9: match.replace_applicator 0.42% : 0.000056s : 31: match.switch_simplify 0.60% : 0.000080s : 16: match.tuple_list_get_item_depend_reorder 2.71% : 0.000363s : 179: match.tuple_list_get_item_eliminator 0.07% : 0.000010s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.017037113263 0.99% : 0.000169s : 1278: predicate.accumulaten_eliminater 0.16% : 0.000027s : 120: predicate.ad_related_special_op_eliminate 0.48% : 0.000082s : 613: predicate.addn_check_dump 1.00% : 0.000171s : 1278: predicate.addn_zero_filter 1.01% : 0.000172s : 1278: predicate.adjust_all_reduce_mul_add 1.91% : 0.000326s : 1892: predicate.arithmetic_simplify 1.02% : 0.000174s : 1295: predicate.cast_eliminate 1.99% : 0.000339s : 2090: predicate.check_bprop_eliminate 0.48% : 0.000082s : 613: predicate.compare_switch_simplify 0.05% : 0.000009s : 120: predicate.const_output_eliminate 0.11% : 0.000018s : 120: predicate.convert_tensor_all_eliminate 1.47% : 0.000251s : 1498: predicate.convert_tensor_eliminate 0.50% : 0.000086s : 613: predicate.depend_value_elim 1.11% : 0.000190s : 1303: predicate.dict_get_item_const_eliminator 1.13% : 0.000193s : 1303: predicate.dict_get_item_eliminator 1.10% : 0.000188s : 1303: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 120: predicate.elim_not_effective 0.11% : 0.000019s : 120: predicate.elim_shapecalc_of_broadcastargs 1.48% : 0.000252s : 1415: predicate.environ_add_const_eliminate 1.41% : 0.000240s : 1423: predicate.environ_get_add_eliminate 1.08% : 0.000185s : 1415: predicate.environ_get_depend_swap 1.62% : 0.000277s : 2036: predicate.environ_get_eliminate 1.13% : 0.000192s : 1423: predicate.environ_get_set_eliminate 1.49% : 0.000254s : 1891: predicate.exchange_switch_depend_value 1.84% : 0.000314s : 1891: predicate.float_depend_g_call 0.49% : 0.000084s : 613: predicate.float_environ_get_switch 0.59% : 0.000100s : 733: predicate.float_tuple_getitem_switch 0.05% : 0.000009s : 120: predicate.fold_const_symbol 0.60% : 0.000102s : 424: predicate.get_grad_eliminate 0.07% : 0.000012s : 40: predicate.getattr_setattr_resolve 0.06% : 0.000010s : 120: predicate.graph_param_transform 0.48% : 0.000083s : 613: predicate.incorporate_call 0.48% : 0.000082s : 613: predicate.incorporate_call_switch 5.01% : 0.000853s : 4446: predicate.inline 1.10% : 0.000187s : 922: predicate.inline_without_move 0.18% : 0.000031s : 424: predicate.j_node_and_user_rematch 0.42% : 0.000072s : 442: predicate.less_batch_normalization 1.41% : 0.000241s : 1738: predicate.list_to_tuple_eliminator_ 2.45% : 0.000417s : 3048: predicate.load_eliminater 0.18% : 0.000031s : 120: predicate.loop_unroll_after_grad 2.70% : 0.000461s : 2580: predicate.loop_unroll_before_grad 1.28% : 0.000219s : 1559: predicate.make_slice_get_slice_eliminator 0.48% : 0.000083s : 613: predicate.merge_addn 1.91% : 0.000326s : 2054: predicate.micro_step_allgather_replace 1.93% : 0.000329s : 2054: predicate.mini_step_allgather_replace 1.01% : 0.000173s : 1279: predicate.minmaximum_grad 0.11% : 0.000019s : 120: predicate.mutable_eliminate 0.10% : 0.000018s : 120: predicate.opt_reshape 0.11% : 0.000018s : 120: predicate.parallel_virtual_node 2.86% : 0.000487s : 1891: predicate.partial_defer_inline 1.45% : 0.000246s : 1650: predicate.partial_eliminate 1.03% : 0.000175s : 1278: predicate.print_const_string_wrapper 0.50% : 0.000085s : 608: predicate.reduce_all_const_elim 1.20% : 0.000205s : 1279: predicate.reduce_eliminate 0.18% : 0.000030s : 424: predicate.remove_not_recompute_node 1.75% : 0.000299s : 3570: predicate.replace_applicator 0.41% : 0.000069s : 922: predicate.replace_old_param 0.05% : 0.000009s : 120: predicate.reset_defer_inline 1.00% : 0.000171s : 1279: predicate.reshape_eliminate 2.04% : 0.000347s : 2054: predicate.row_tensor_add_zeros_like 0.11% : 0.000019s : 120: predicate.row_tensor_eliminate 2.15% : 0.000366s : 2090: predicate.same_eliminate 0.25% : 0.000042s : 534: predicate.set_cell_output_no_recompute 0.37% : 0.000063s : 424: predicate.shard_identity_eliminate 1.02% : 0.000173s : 1042: predicate.special_op_eliminate 0.56% : 0.000095s : 613: predicate.specialize_transform 2.10% : 0.000358s : 2054: predicate.split_environ_get_set_with_tuple_value 0.84% : 0.000143s : 922: predicate.stack_unstack_eliminate 2.40% : 0.000408s : 3048: predicate.stopgrad_eliminater 0.10% : 0.000017s : 120: predicate.switch_call_monad_eliminater 1.67% : 0.000284s : 1891: predicate.switch_defer_inline 3.63% : 0.000618s : 3981: predicate.switch_layer_defer_inline 5.19% : 0.000884s : 5146: predicate.switch_simplify 1.02% : 0.000174s : 1279: predicate.tile_eliminate 1.03% : 0.000175s : 1279: predicate.transpose_eliminate 1.36% : 0.000232s : 1543: predicate.tuple_list_convert_item_index_to_positive 1.33% : 0.000227s : 1559: predicate.tuple_list_get_item_const_eliminator 1.25% : 0.000213s : 1559: predicate.tuple_list_get_item_depend_reorder 2.12% : 0.000361s : 2351: predicate.tuple_list_get_item_eliminator 1.32% : 0.000225s : 1559: predicate.tuple_list_get_set_item_eliminator 1.90% : 0.000323s : 2172: predicate.tuple_list_set_item_eliminator 1.42% : 0.000242s : 1738: predicate.tuple_to_list_eliminator_ 2.46% : 0.000419s : 3048: predicate.updatestate_pure_node_eliminater 3.06% : 0.000522s : 3662: predicate.updatestate_useless_node_eliminater 0.10% : 0.000018s : 120: predicate.value_based_eliminate 0.36% : 0.000061s : 424: predicate.virtual_dataset_eliminate 0.36% : 0.000061s : 424: predicate.virtual_output_eliminate 0.11% : 0.000019s : 120: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.045604 674 58.53% : 0.026692s : 274: func_graph_cloner_run.FuncGraphClonerGraph 0.38% : 0.000173s : 8: func_graph_cloner_run.FuncGraphClonerNode 41.09% : 0.018738s : 392: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.601445 263 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000148s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000739s : 1: add_recomputation 0.00% : 0.000018s : 1: assign_add_opt 0.03% : 0.004178s : 1: auto_monad 0.00% : 0.000371s : 1: auto_monad_reorder 0.00% : 0.000006s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.000926s : 1: bootstrap 0.00% : 0.000083s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.00% : 0.000122s : 1: convert_after_rewriter 0.00% : 0.000279s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000365s : 1: distribtued_split 0.01% : 0.001016s : 1: eliminate_special_op_node 0.00% : 0.000124s : 1: environ_conv 0.00% : 0.000020s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000060s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000011s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.001085s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.00% : 0.000143s : 1: opt.transform.loop_unroll_optimizer 0.71% : 0.104021s : 145: opt.transform.opt_a 0.00% : 0.000720s : 1: opt.transform.opt_after_cconv 0.02% : 0.002800s : 27: opt.transform.opt_b 0.01% : 0.001289s : 4: opt.transform.opt_resolve 0.01% : 0.000970s : 1: opt.transform.opt_trans_graph 0.00% : 0.000404s : 3: opt.transform.special_op_eliminate 0.00% : 0.000627s : 4: opt.transform.symbol_engine_opt 1.79% : 0.260958s : 1: opt_a 0.01% : 0.001435s : 1: opt_after_cconv 0.02% : 0.003518s : 1: opt_b 1.90% : 0.277178s : 1: optimize 0.00% : 0.000164s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000090s : 1: order_py_execute_after_rewriter 0.00% : 0.000143s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000020s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000011s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000008s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.001204s : 1: pre_auto_parallel 0.01% : 0.001159s : 1: py_interpret_to_execute 0.00% : 0.000132s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000005s : 1: remove_cast_before_assign_add 0.00% : 0.000517s : 1: remove_dup_value 0.33% : 0.048637s : 2: renormalize.infer 0.26% : 0.037513s : 2: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000007s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001179s : 1: rewriter_after_opt_a 0.02% : 0.003100s : 1: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000131s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000735s : 1: symbol_engine_optimizer 90.99% : 13.285873s : 1: task_emit 0.01% : 0.001006s : 1: tuple_transform 3.80% : 0.555578s : 1: type_inference 0.00% : 0.000442s : 1: validate [WARNING] ME(54175:281473095994384,MainProcess):2025-02-07-15:54:39.322.544 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. TotalTime = 13.9929, [21] [bootstrap]: 0.00093197 [type_inference]: 0.550516 [auto_monad]: 0.00421544 [graph_reusing]: 5.43999e-05 [inline]: 2.1304e-06 [parallel-infer-symbol]: 3.30014e-06 [pre_auto_parallel]: 0.00123501 [insert-virtual-dataset]: 4.83962e-06 [parallel-infer-symbol-second]: 1.30991e-06 [dataset_repeat_opt]: 1.66008e-06 [pipeline_split]: 1.84029e-06 [optimize]: 0.276301, [52] [py_interpret_to_execute]: 0.00113185 [rewriter_before_opt_a]: 0.00302946 [opt_a]: 0.260034, [3] [Cycle 1]: 0.189271, [43] [expand_dump_flag]: 7.209e-05 [switch_simplify]: 0.00210186 [loop_unroll]: 0.00143582 [a_1]: 0.0395599 [recompute_prepare]: 0.00031224 [updatestate_depend_eliminate]: 0.00087409 [updatestate_assign_eliminate]: 0.00020366 [updatestate_loads_eliminate]: 0.00059491 [parameter_eliminate]: 2.51597e-05 [a_2]: 0.00540406 [accelerated_algorithm]: 0.00035348 [shard]: 2.99001e-06 [meta_shard_fg_expand]: 0.00010089 [shard_inline]: 0.00017933 [auto_parallel]: 0.00012972 [parallel]: 1.32099e-05 [flash_sp]: 8.18302e-05 [merge_comm]: 0.00012753 [allreduce_fusion]: 0.00011309 [matmul_add_comm_reduction]: 0.00020337 [allreduce_slice_to_reducescatter]: 8.2003e-07 [virtual_shard_identity]: 0.00019287 [virtual_dataset]: 0.00017014 [get_grad_eliminate_]: 0.00016245 [virtual_output]: 0.00016423 [merge_forward]: 0.00011413 [cell_reuse_recompute_pass]: 2.88012e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00030698 [before_grad]: 0.00029248 [inplace_validation]: 0.00015159 [meta_fg_expand]: 0.0411077 [inplace_validation_after_expand]: 0.0005252 [flash_sp_send_recv_attached]: 6.82985e-06 [receive_attached]: 3.64985e-06 [after_resolve]: 0.00081335 [a_after_grad]: 0.00126509 [special_op_eliminate]: 0.000615 [renormalize]: 0.073386 [add_forward_monad_depend]: 0.00034263 [auto_monad_grad]: 9.06698e-05 [auto_monad_eliminator]: 0.00114299 [cse]: 0.00256956 [a_3]: 0.0133973 [Cycle 2]: 0.0541285, [43] [expand_dump_flag]: 2.90303e-05 [switch_simplify]: 0.00084008 [loop_unroll]: 0.00083289 [a_1]: 0.0221329 [recompute_prepare]: 0.00014613 [updatestate_depend_eliminate]: 0.00054796 [updatestate_assign_eliminate]: 9.793e-05 [updatestate_loads_eliminate]: 0.00018998 [parameter_eliminate]: 4.27989e-06 [a_2]: 0.0018572 [accelerated_algorithm]: 0.00013857 [shard]: 2.57976e-06 [meta_shard_fg_expand]: 0.0058271 [shard_inline]: 0.00014851 [auto_parallel]: 0.00011505 [parallel]: 1.79904e-05 [flash_sp]: 5.24987e-06 [merge_comm]: 9.02e-05 [allreduce_fusion]: 7.708e-05 [matmul_add_comm_reduction]: 0.00010712 [allreduce_slice_to_reducescatter]: 6.9011e-07 [virtual_shard_identity]: 0.00012341 [virtual_dataset]: 0.00012225 [get_grad_eliminate_]: 0.00011394 [virtual_output]: 0.00011732 [merge_forward]: 7.569e-05 [cell_reuse_recompute_pass]: 2.62028e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021712 [before_grad]: 0.00020211 [inplace_validation]: 6.99502e-05 [meta_fg_expand]: 0.00206943 [inplace_validation_after_expand]: 0.00062824 [flash_sp_send_recv_attached]: 2.98023e-06 [receive_attached]: 2.04006e-06 [after_resolve]: 0.00015871 [a_after_grad]: 0.00020601 [special_op_eliminate]: 0.00011305 [renormalize]: 0.0116286 [add_forward_monad_depend]: 5.66989e-06 [auto_monad_grad]: 3.38024e-06 [auto_monad_eliminator]: 0.00029958 [cse]: 0.0034896 [a_3]: 0.00084742 [Cycle 3]: 0.0103334, [43] [expand_dump_flag]: 1.88034e-06 [switch_simplify]: 0.00011713 [loop_unroll]: 0.00011231 [a_1]: 0.00382705 [recompute_prepare]: 0.00011564 [updatestate_depend_eliminate]: 0.00012149 [updatestate_assign_eliminate]: 7.73496e-05 [updatestate_loads_eliminate]: 7.83498e-05 [parameter_eliminate]: 2.50014e-06 [a_2]: 0.00186195 [accelerated_algorithm]: 0.00014035 [shard]: 1.69966e-06 [meta_shard_fg_expand]: 4.16902e-05 [shard_inline]: 0.00011898 [auto_parallel]: 9.02098e-05 [parallel]: 9.1698e-06 [flash_sp]: 2.00002e-06 [merge_comm]: 8.485e-05 [allreduce_fusion]: 7.98102e-05 [matmul_add_comm_reduction]: 0.00010433 [allreduce_slice_to_reducescatter]: 5.0012e-07 [virtual_shard_identity]: 0.00011863 [virtual_dataset]: 0.00011842 [get_grad_eliminate_]: 0.00011161 [virtual_output]: 0.00011809 [merge_forward]: 7.79801e-05 [cell_reuse_recompute_pass]: 2.75997e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021957 [before_grad]: 0.00020569 [inplace_validation]: 7.60402e-05 [meta_fg_expand]: 9.20701e-05 [inplace_validation_after_expand]: 9.395e-05 [flash_sp_send_recv_attached]: 1.34995e-06 [receive_attached]: 1.07009e-06 [after_resolve]: 0.0001378 [a_after_grad]: 0.00020245 [special_op_eliminate]: 0.00012785 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 2.35997e-06 [auto_monad_grad]: 1.51992e-06 [auto_monad_eliminator]: 0.00018283 [cse]: 0.00033365 [a_3]: 0.00084162 [py_interpret_to_execute_after_opt_a]: 0.00011786 [slice_cell_reuse_recomputed_activation]: 3.41004e-06 [rewriter_after_opt_a]: 0.00110544 [convert_after_rewriter]: 0.00011347 [order_py_execute_after_rewriter]: 8.169e-05 [opt_b]: 0.00361032, [1] [Cycle 1]: 0.00360166, [7] [b_1]: 0.00279957 [b_2]: 0.00012246 [updatestate_depend_eliminate]: 9.84198e-05 [updatestate_assign_eliminate]: 7.47801e-05 [updatestate_loads_eliminate]: 8.49199e-05 [renormalize]: 5.69969e-07 [cse]: 0.00035805 [optimize_parallel_all_gather_comm]: 0.00013287 [overlap_param_gather]: 2.283e-05 [cconv]: 6.44703e-05 [loop_unroll]: 0.00108245 [opt_after_cconv]: 0.00151286, [1] [Cycle 1]: 0.00150515, [7] [c_1]: 0.00079396 [parameter_eliminate]: 2.6701e-06 [updatestate_depend_eliminate]: 0.00011282 [updatestate_assign_eliminate]: 8.46698e-05 [updatestate_loads_eliminate]: 9.42298e-05 [cse]: 0.00035422 [renormalize]: 5.10365e-07 [remove_dup_value]: 0.00048766 [tuple_transform]: 0.00093726, [1] [Cycle 1]: 0.00092967, [2] [d_1]: 0.00090842 [renormalize]: 5.10365e-07 [partial_unused_args_eliminate]: 3.29036e-06 [add_cache_embedding]: 0.00013153 [add_recomputation]: 0.00068414 [cse_after_recomputation]: 0.00029078, [1] [Cycle 1]: 0.00027926, [1] [cse]: 0.00026524 [environ_conv]: 0.00011633 [swap_dp_allreduce_reducescatter]: 0.00012951 [bias_add_comm_swap]: 2.72039e-06 [label_micro_interleaved_index]: 2.46987e-06 [label_fine_grained_interleaved_index]: 2.10013e-06 [merge_cast_opt]: 1.56974e-06 [slice_recompute_activation]: 1.88034e-06 [micro_interleaved_order_control]: 1.9297e-06 [assign_add_opt]: 1.23703e-05 [ForceFp32Comm]: 8.79634e-07 [remove_cast_before_assign_add]: 1.03004e-06 [full_micro_interleaved_order_control]: 2.46987e-06 [reorder_send_recv_between_fp_bp]: 1.93994e-06 [comm_op_add_attrs]: 1.0198e-06 [add_comm_op_reuse_tag]: 1.11992e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 8.2003e-07 [overlap_opt_shard_in_pipeline]: 2.9e-05 [overlap_opt_shard_grad_in_pipeline]: 2.54018e-06 [control_data_broadcast_order]: 1.43005e-06 [grouped_pairwise_exchange_alltoall]: 1.54972e-06 [offloading_packed_experts]: 1.11014e-06 [overlap_recompute_and_grad_model_parallel]: 2.25985e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.5018e-07 [overlap_recompute_allgather_and_fa_grad]: 1.21025e-06 [overlap_grad_ring_attention]: 1.87987e-06 [overlap_grad_flash_sp]: 0.00013848 [begin_end_overlap_inline]: 8.69855e-07 [split_matmul_comm_elemetwise]: 2.33995e-06 [split_layernorm_comm]: 1.87987e-06 [handle_group_info]: 1.13994e-06 [symbol_engine_optimizer]: 0.00074034, [1] [Cycle 1]: 0.00073311, [6] [build]: 5.029e-05 [elim_shapecalc]: 0.00012863 [elim_not_effective]: 0.00020267 [opt_reshape]: 0.0001171 [fold_const_symbol]: 0.00019357 [renormalize]: 5.09899e-07 [pipeline_parallel_scheduler]: 2.65008e-06 [auto_monad_reorder]: 0.00035602 [get_jit_bprop_graph]: 5.69969e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00093238 [distribtued_split]: 0.00037318 [validate]: 0.00029709 [task_emit]: 13.1571 [execute]: 1.41002e-05 Sums bootstrap : 0.000932s : 0.01% type_inference : 0.550516s : 3.94% auto_monad : 0.004215s : 0.03% graph_reusing : 0.000054s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.001235s : 0.01% insert-virtual-dataset : 0.000005s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.001132s : 0.01% optimize.rewriter_before_opt_a : 0.003029s : 0.02% optimize.opt_a.expand_dump_flag : 0.000103s : 0.00% optimize.opt_a.switch_simplify : 0.003059s : 0.02% optimize.opt_a.loop_unroll : 0.002381s : 0.02% optimize.opt_a.a_1 : 0.065520s : 0.47% optimize.opt_a.recompute_prepare : 0.000574s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.001544s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000379s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000863s : 0.01% optimize.opt_a.parameter_eliminate : 0.000032s : 0.00% optimize.opt_a.a_2 : 0.009123s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000632s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.005970s : 0.04% optimize.opt_a.shard_inline : 0.000447s : 0.00% optimize.opt_a.auto_parallel : 0.000335s : 0.00% optimize.opt_a.parallel : 0.000040s : 0.00% optimize.opt_a.flash_sp : 0.000089s : 0.00% optimize.opt_a.merge_comm : 0.000303s : 0.00% optimize.opt_a.allreduce_fusion : 0.000270s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000415s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000435s : 0.00% optimize.opt_a.virtual_dataset : 0.000411s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000388s : 0.00% optimize.opt_a.virtual_output : 0.000400s : 0.00% optimize.opt_a.merge_forward : 0.000268s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000744s : 0.01% optimize.opt_a.before_grad : 0.000700s : 0.01% optimize.opt_a.inplace_validation : 0.000298s : 0.00% optimize.opt_a.meta_fg_expand : 0.043269s : 0.31% optimize.opt_a.inplace_validation_after_expand : 0.001247s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000011s : 0.00% optimize.opt_a.receive_attached : 0.000007s : 0.00% optimize.opt_a.after_resolve : 0.001110s : 0.01% optimize.opt_a.a_after_grad : 0.001674s : 0.01% optimize.opt_a.special_op_eliminate : 0.000856s : 0.01% optimize.opt_a.renormalize : 0.085015s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000351s : 0.00% optimize.opt_a.auto_monad_grad : 0.000096s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001625s : 0.01% optimize.opt_a.cse : 0.006393s : 0.05% optimize.opt_a.a_3 : 0.015086s : 0.11% optimize.py_interpret_to_execute_after_opt_a : 0.000118s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001105s : 0.01% optimize.convert_after_rewriter : 0.000113s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.002800s : 0.02% optimize.opt_b.b_2 : 0.000122s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000098s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000075s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000085s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000358s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000133s : 0.00% optimize.overlap_param_gather : 0.000023s : 0.00% optimize.cconv : 0.000064s : 0.00% optimize.loop_unroll : 0.001082s : 0.01% optimize.opt_after_cconv.c_1 : 0.000794s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000113s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000085s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.cse : 0.000354s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000488s : 0.00% optimize.tuple_transform.d_1 : 0.000908s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000132s : 0.00% optimize.add_recomputation : 0.000684s : 0.00% optimize.cse_after_recomputation.cse : 0.000265s : 0.00% optimize.environ_conv : 0.000116s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000130s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000012s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000029s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000138s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000050s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000129s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000203s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000117s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000194s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000356s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000932s : 0.01% distribtued_split : 0.000373s : 0.00% validate : 0.000297s : 0.00% task_emit : 13.157142s : 94.09% execute : 0.000014s : 0.00% Time group info: ------[substitution.] 0.016670 4191 1.52% : 0.000253s : 57: substitution.arithmetic_simplify 0.35% : 0.000058s : 16: substitution.cast_eliminate 0.32% : 0.000053s : 48: substitution.depend_value_elim 0.17% : 0.000028s : 97: substitution.elim_not_effective 0.13% : 0.000022s : 16: substitution.environ_get_add_eliminate 0.06% : 0.000009s : 8: substitution.environ_get_depend_swap 0.19% : 0.000031s : 32: substitution.environ_get_eliminate 0.32% : 0.000053s : 16: substitution.environ_get_set_eliminate 0.42% : 0.000071s : 110: substitution.float_depend_g_call 0.07% : 0.000011s : 16: substitution.float_environ_get_switch 0.12% : 0.000020s : 14: substitution.float_tuple_getitem_switch 0.16% : 0.000027s : 97: substitution.fold_const_symbol 6.39% : 0.001066s : 8: substitution.getattr_setattr_resolve 0.49% : 0.000082s : 120: substitution.graph_param_transform 0.08% : 0.000013s : 20: substitution.incorporate_call 0.06% : 0.000009s : 20: substitution.incorporate_call_switch 65.56% : 0.010930s : 369: substitution.inline 0.93% : 0.000155s : 28: substitution.inline_without_move 0.80% : 0.000133s : 338: substitution.j_node_and_user_rematch 1.03% : 0.000171s : 36: substitution.less_batch_normalization 0.44% : 0.000074s : 158: substitution.load_eliminater 0.47% : 0.000079s : 72: substitution.minmaximum_grad 0.01% : 0.000001s : 1: substitution.opt_reshape 0.35% : 0.000058s : 8: substitution.partial_defer_inline 1.29% : 0.000216s : 110: substitution.partial_eliminate 0.12% : 0.000020s : 32: substitution.reduce_all_const_elim 1.04% : 0.000174s : 338: substitution.remove_not_recompute_node 2.98% : 0.000497s : 263: substitution.replace_applicator 0.40% : 0.000067s : 148: substitution.replace_old_param 0.07% : 0.000012s : 2: substitution.reshape_eliminate 0.07% : 0.000012s : 10: substitution.set_cell_output_no_recompute 0.04% : 0.000006s : 2: substitution.specialize_transform 0.24% : 0.000040s : 32: substitution.split_environ_get_set_with_tuple_value 0.40% : 0.000066s : 31: substitution.switch_simplify 1.35% : 0.000225s : 76: substitution.tuple_list_convert_item_index_to_positive 0.77% : 0.000128s : 92: substitution.tuple_list_get_item_const_eliminator 1.25% : 0.000208s : 92: substitution.tuple_list_get_item_depend_reorder 3.84% : 0.000641s : 283: substitution.tuple_list_get_item_eliminator 0.86% : 0.000143s : 92: substitution.tuple_list_get_set_item_eliminator 2.12% : 0.000353s : 416: substitution.updatestate_pure_node_eliminater 2.72% : 0.000454s : 467: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.549787 2 92.24% : 0.507151s : 1: type_inference.infer 7.76% : 0.042636s : 1: type_inference.specialize ------[replace.] 0.006046 662 0.10% : 0.000006s : 1: replace.arithmetic_simplify 1.51% : 0.000091s : 16: replace.cast_eliminate 0.45% : 0.000027s : 3: replace.depend_value_elim 1.66% : 0.000100s : 8: replace.environ_get_set_eliminate 1.80% : 0.000109s : 6: replace.getattr_setattr_resolve 49.46% : 0.002990s : 360: replace.inline 7.89% : 0.000477s : 32: replace.partial_eliminate 2.84% : 0.000172s : 9: replace.replace_applicator 5.39% : 0.000326s : 31: replace.switch_simplify 2.16% : 0.000131s : 16: replace.tuple_list_get_item_depend_reorder 26.33% : 0.001592s : 179: replace.tuple_list_get_item_eliminator 0.41% : 0.000025s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.012463 662 0.10% : 0.000012s : 1: match.arithmetic_simplify 0.41% : 0.000051s : 16: match.cast_eliminate 0.01% : 0.000001s : 3: match.depend_value_elim 0.32% : 0.000040s : 8: match.environ_get_set_eliminate 7.62% : 0.000950s : 6: match.getattr_setattr_resolve 86.05% : 0.010725s : 360: match.inline 1.14% : 0.000142s : 32: match.partial_eliminate 0.41% : 0.000052s : 9: match.replace_applicator 0.43% : 0.000053s : 31: match.switch_simplify 0.61% : 0.000076s : 16: match.tuple_list_get_item_depend_reorder 2.82% : 0.000352s : 179: match.tuple_list_get_item_eliminator 0.08% : 0.000010s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.017287113263 1.04% : 0.000180s : 1278: predicate.accumulaten_eliminater 0.17% : 0.000029s : 120: predicate.ad_related_special_op_eliminate 0.48% : 0.000083s : 613: predicate.addn_check_dump 0.99% : 0.000171s : 1278: predicate.addn_zero_filter 1.03% : 0.000178s : 1278: predicate.adjust_all_reduce_mul_add 1.97% : 0.000340s : 1892: predicate.arithmetic_simplify 1.15% : 0.000200s : 1295: predicate.cast_eliminate 2.09% : 0.000361s : 2090: predicate.check_bprop_eliminate 0.48% : 0.000083s : 613: predicate.compare_switch_simplify 0.05% : 0.000009s : 120: predicate.const_output_eliminate 0.10% : 0.000017s : 120: predicate.convert_tensor_all_eliminate 1.42% : 0.000246s : 1498: predicate.convert_tensor_eliminate 0.51% : 0.000087s : 613: predicate.depend_value_elim 1.10% : 0.000189s : 1303: predicate.dict_get_item_const_eliminator 1.15% : 0.000198s : 1303: predicate.dict_get_item_eliminator 1.08% : 0.000187s : 1303: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 120: predicate.elim_not_effective 0.11% : 0.000020s : 120: predicate.elim_shapecalc_of_broadcastargs 1.09% : 0.000188s : 1415: predicate.environ_add_const_eliminate 1.12% : 0.000193s : 1423: predicate.environ_get_add_eliminate 1.11% : 0.000192s : 1415: predicate.environ_get_depend_swap 1.65% : 0.000285s : 2036: predicate.environ_get_eliminate 1.17% : 0.000203s : 1423: predicate.environ_get_set_eliminate 1.49% : 0.000258s : 1891: predicate.exchange_switch_depend_value 1.84% : 0.000317s : 1891: predicate.float_depend_g_call 0.49% : 0.000084s : 613: predicate.float_environ_get_switch 0.58% : 0.000101s : 733: predicate.float_tuple_getitem_switch 0.05% : 0.000009s : 120: predicate.fold_const_symbol 0.36% : 0.000061s : 424: predicate.get_grad_eliminate 0.07% : 0.000012s : 40: predicate.getattr_setattr_resolve 0.06% : 0.000010s : 120: predicate.graph_param_transform 0.48% : 0.000083s : 613: predicate.incorporate_call 0.47% : 0.000082s : 613: predicate.incorporate_call_switch 4.64% : 0.000803s : 4446: predicate.inline 1.10% : 0.000191s : 922: predicate.inline_without_move 0.18% : 0.000031s : 424: predicate.j_node_and_user_rematch 0.44% : 0.000076s : 442: predicate.less_batch_normalization 1.61% : 0.000278s : 1738: predicate.list_to_tuple_eliminator_ 2.44% : 0.000421s : 3048: predicate.load_eliminater 0.18% : 0.000031s : 120: predicate.loop_unroll_after_grad 2.70% : 0.000467s : 2580: predicate.loop_unroll_before_grad 1.40% : 0.000241s : 1559: predicate.make_slice_get_slice_eliminator 0.48% : 0.000084s : 613: predicate.merge_addn 2.00% : 0.000346s : 2054: predicate.micro_step_allgather_replace 2.03% : 0.000351s : 2054: predicate.mini_step_allgather_replace 1.01% : 0.000174s : 1279: predicate.minmaximum_grad 0.11% : 0.000019s : 120: predicate.mutable_eliminate 0.10% : 0.000017s : 120: predicate.opt_reshape 0.12% : 0.000021s : 120: predicate.parallel_virtual_node 2.79% : 0.000483s : 1891: predicate.partial_defer_inline 1.46% : 0.000252s : 1650: predicate.partial_eliminate 1.01% : 0.000175s : 1278: predicate.print_const_string_wrapper 0.49% : 0.000085s : 608: predicate.reduce_all_const_elim 1.24% : 0.000214s : 1279: predicate.reduce_eliminate 0.17% : 0.000030s : 424: predicate.remove_not_recompute_node 1.75% : 0.000302s : 3570: predicate.replace_applicator 0.40% : 0.000069s : 922: predicate.replace_old_param 0.05% : 0.000009s : 120: predicate.reset_defer_inline 1.00% : 0.000173s : 1279: predicate.reshape_eliminate 2.13% : 0.000368s : 2054: predicate.row_tensor_add_zeros_like 0.12% : 0.000020s : 120: predicate.row_tensor_eliminate 2.22% : 0.000384s : 2090: predicate.same_eliminate 0.24% : 0.000041s : 534: predicate.set_cell_output_no_recompute 0.38% : 0.000066s : 424: predicate.shard_identity_eliminate 1.07% : 0.000185s : 1042: predicate.special_op_eliminate 0.57% : 0.000099s : 613: predicate.specialize_transform 2.32% : 0.000401s : 2054: predicate.split_environ_get_set_with_tuple_value 0.86% : 0.000148s : 922: predicate.stack_unstack_eliminate 2.38% : 0.000411s : 3048: predicate.stopgrad_eliminater 0.10% : 0.000017s : 120: predicate.switch_call_monad_eliminater 1.65% : 0.000285s : 1891: predicate.switch_defer_inline 3.68% : 0.000635s : 3981: predicate.switch_layer_defer_inline 5.08% : 0.000878s : 5146: predicate.switch_simplify 0.99% : 0.000171s : 1279: predicate.tile_eliminate 1.07% : 0.000184s : 1279: predicate.transpose_eliminate 1.33% : 0.000229s : 1543: predicate.tuple_list_convert_item_index_to_positive 1.32% : 0.000229s : 1559: predicate.tuple_list_get_item_const_eliminator 1.32% : 0.000228s : 1559: predicate.tuple_list_get_item_depend_reorder 2.10% : 0.000364s : 2351: predicate.tuple_list_get_item_eliminator 1.25% : 0.000216s : 1559: predicate.tuple_list_get_set_item_eliminator 2.00% : 0.000346s : 2172: predicate.tuple_list_set_item_eliminator 1.37% : 0.000237s : 1738: predicate.tuple_to_list_eliminator_ 2.55% : 0.000441s : 3048: predicate.updatestate_pure_node_eliminater 3.06% : 0.000530s : 3662: predicate.updatestate_useless_node_eliminater 0.12% : 0.000021s : 120: predicate.value_based_eliminate 0.37% : 0.000064s : 424: predicate.virtual_dataset_eliminate 0.35% : 0.000061s : 424: predicate.virtual_output_eliminate 0.11% : 0.000019s : 120: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.044469 674 58.97% : 0.026223s : 274: func_graph_cloner_run.FuncGraphClonerGraph 0.44% : 0.000194s : 8: func_graph_cloner_run.FuncGraphClonerNode 40.60% : 0.018052s : 392: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.464261 263 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000139s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000697s : 1: add_recomputation 0.00% : 0.000016s : 1: assign_add_opt 0.03% : 0.004240s : 1: auto_monad 0.00% : 0.000369s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.000963s : 1: bootstrap 0.00% : 0.000071s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000122s : 1: convert_after_rewriter 0.00% : 0.000296s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000389s : 1: distribtued_split 0.01% : 0.000950s : 1: eliminate_special_op_node 0.00% : 0.000126s : 1: environ_conv 0.00% : 0.000024s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000064s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000012s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.001095s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000152s : 1: opt.transform.loop_unroll_optimizer 0.71% : 0.103305s : 145: opt.transform.opt_a 0.01% : 0.000791s : 1: opt.transform.opt_after_cconv 0.02% : 0.002891s : 27: opt.transform.opt_b 0.01% : 0.001280s : 4: opt.transform.opt_resolve 0.01% : 0.000905s : 1: opt.transform.opt_trans_graph 0.00% : 0.000384s : 3: opt.transform.special_op_eliminate 0.00% : 0.000636s : 4: opt.transform.symbol_engine_opt 1.80% : 0.260044s : 1: opt_a 0.01% : 0.001519s : 1: opt_after_cconv 0.02% : 0.003615s : 1: opt_b 1.91% : 0.276315s : 1: optimize 0.00% : 0.000142s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000146s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000035s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000029s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000009s : 1: parallel-infer-symbol 0.00% : 0.000007s : 1: parallel-infer-symbol-second 0.00% : 0.000008s : 1: partial_unused_args_eliminate 0.00% : 0.000010s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.01% : 0.001256s : 1: pre_auto_parallel 0.01% : 0.001148s : 1: py_interpret_to_execute 0.00% : 0.000127s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000503s : 1: remove_dup_value 0.34% : 0.048593s : 2: renormalize.infer 0.25% : 0.036385s : 2: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000009s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001117s : 1: rewriter_after_opt_a 0.02% : 0.003045s : 1: rewriter_before_opt_a 0.00% : 0.000008s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000007s : 1: split_matmul_comm_elemetwise 0.00% : 0.000137s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000745s : 1: symbol_engine_optimizer 90.96% : 13.157182s : 1: task_emit 0.01% : 0.000943s : 1: tuple_transform 3.81% : 0.550558s : 1: type_inference 0.00% : 0.000469s : 1: validate [WARNING] ME(54208:281472863673360,MainProcess):2025-02-07-15:54:39.352.052 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. TotalTime = 14.1919, [21] [bootstrap]: 0.00086006 [type_inference]: 0.569048 [auto_monad]: 0.00498784 [graph_reusing]: 4.80101e-05 [inline]: 2.00002e-06 [parallel-infer-symbol]: 2.49036e-06 [pre_auto_parallel]: 0.0012323 [insert-virtual-dataset]: 4.42006e-06 [parallel-infer-symbol-second]: 1.11992e-06 [dataset_repeat_opt]: 1.42027e-06 [pipeline_split]: 1.85007e-06 [optimize]: 0.345934, [52] [py_interpret_to_execute]: 0.00129587 [rewriter_before_opt_a]: 0.00314786 [opt_a]: 0.326551, [3] [Cycle 1]: 0.224859, [43] [expand_dump_flag]: 0.0001147 [switch_simplify]: 0.0026317 [loop_unroll]: 0.00135484 [a_1]: 0.0406741 [recompute_prepare]: 0.00030967 [updatestate_depend_eliminate]: 0.00087666 [updatestate_assign_eliminate]: 0.00019806 [updatestate_loads_eliminate]: 0.00061697 [parameter_eliminate]: 2.62801e-05 [a_2]: 0.00555097 [accelerated_algorithm]: 0.00035279 [shard]: 2.31992e-06 [meta_shard_fg_expand]: 0.00013093 [shard_inline]: 0.00017754 [auto_parallel]: 0.00012967 [parallel]: 2.73599e-05 [flash_sp]: 7.413e-05 [merge_comm]: 0.0001395 [allreduce_fusion]: 0.00011511 [matmul_add_comm_reduction]: 0.00021237 [allreduce_slice_to_reducescatter]: 7.69738e-07 [virtual_shard_identity]: 0.00017366 [virtual_dataset]: 0.00016534 [get_grad_eliminate_]: 0.00016266 [virtual_output]: 0.0001621 [merge_forward]: 0.00011104 [cell_reuse_recompute_pass]: 2.72971e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00030681 [before_grad]: 0.00029276 [inplace_validation]: 0.00015776 [meta_fg_expand]: 0.0501645 [inplace_validation_after_expand]: 0.0005466 [flash_sp_send_recv_attached]: 3.38098e-05 [receive_attached]: 1.866e-05 [after_resolve]: 0.00080278 [a_after_grad]: 0.00127727 [special_op_eliminate]: 0.00061653 [renormalize]: 0.0846552 [add_forward_monad_depend]: 0.00037716 [auto_monad_grad]: 0.00011025 [auto_monad_eliminator]: 0.00127373 [cse]: 0.00280772 [a_3]: 0.0261853 [Cycle 2]: 0.0836033, [43] [expand_dump_flag]: 0.00068454 [switch_simplify]: 0.00097022 [loop_unroll]: 0.00092931 [a_1]: 0.0476864 [recompute_prepare]: 0.00016004 [updatestate_depend_eliminate]: 0.00137004 [updatestate_assign_eliminate]: 0.00013915 [updatestate_loads_eliminate]: 0.0002957 [parameter_eliminate]: 1.67401e-05 [a_2]: 0.00191667 [accelerated_algorithm]: 0.00019287 [shard]: 2.44007e-06 [meta_shard_fg_expand]: 0.00013917 [shard_inline]: 0.00012228 [auto_parallel]: 0.00016497 [parallel]: 2.36998e-05 [flash_sp]: 6.50994e-06 [merge_comm]: 0.00016604 [allreduce_fusion]: 0.00014192 [matmul_add_comm_reduction]: 0.00011452 [allreduce_slice_to_reducescatter]: 5.89993e-07 [virtual_shard_identity]: 0.00012282 [virtual_dataset]: 0.00011593 [get_grad_eliminate_]: 0.00011111 [virtual_output]: 0.00011415 [merge_forward]: 7.62204e-05 [cell_reuse_recompute_pass]: 2.88012e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021474 [before_grad]: 0.00020271 [inplace_validation]: 7.00401e-05 [meta_fg_expand]: 0.0047857 [inplace_validation_after_expand]: 0.00135156 [flash_sp_send_recv_attached]: 2.62028e-06 [receive_attached]: 1.53016e-06 [after_resolve]: 0.00017134 [a_after_grad]: 0.00019845 [special_op_eliminate]: 0.00011281 [renormalize]: 0.0150669 [add_forward_monad_depend]: 6.42985e-06 [auto_monad_grad]: 3.81004e-06 [auto_monad_eliminator]: 0.00031773 [cse]: 0.00393784 [a_3]: 0.00087035 [Cycle 3]: 0.0103814, [43] [expand_dump_flag]: 2.56998e-06 [switch_simplify]: 0.00011937 [loop_unroll]: 0.00011408 [a_1]: 0.00386205 [recompute_prepare]: 0.00011646 [updatestate_depend_eliminate]: 0.00012965 [updatestate_assign_eliminate]: 8.129e-05 [updatestate_loads_eliminate]: 8.106e-05 [parameter_eliminate]: 3.43006e-06 [a_2]: 0.00185378 [accelerated_algorithm]: 0.00014046 [shard]: 2.65008e-06 [meta_shard_fg_expand]: 4.26997e-05 [shard_inline]: 0.00011849 [auto_parallel]: 9.418e-05 [parallel]: 1.05998e-05 [flash_sp]: 1.93994e-06 [merge_comm]: 8.687e-05 [allreduce_fusion]: 7.99997e-05 [matmul_add_comm_reduction]: 0.0001057 [allreduce_slice_to_reducescatter]: 7.79983e-07 [virtual_shard_identity]: 0.00011981 [virtual_dataset]: 0.00011497 [get_grad_eliminate_]: 0.00011271 [virtual_output]: 0.00011478 [merge_forward]: 8.55201e-05 [cell_reuse_recompute_pass]: 4.17e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022142 [before_grad]: 0.0002051 [inplace_validation]: 7.314e-05 [meta_fg_expand]: 9.07201e-05 [inplace_validation_after_expand]: 9.40403e-05 [flash_sp_send_recv_attached]: 2.14996e-06 [receive_attached]: 1.48965e-06 [after_resolve]: 0.00013741 [a_after_grad]: 0.00019243 [special_op_eliminate]: 0.00011352 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 2.13971e-06 [auto_monad_grad]: 2.15042e-06 [auto_monad_eliminator]: 0.00021329 [cse]: 0.00034832 [a_3]: 0.00082909 [py_interpret_to_execute_after_opt_a]: 0.00012294 [slice_cell_reuse_recomputed_activation]: 2.71015e-06 [rewriter_after_opt_a]: 0.00106524 [convert_after_rewriter]: 9.915e-05 [order_py_execute_after_rewriter]: 9.319e-05 [opt_b]: 0.00364347, [1] [Cycle 1]: 0.00363526, [7] [b_1]: 0.00282895 [b_2]: 0.00012032 [updatestate_depend_eliminate]: 0.00010274 [updatestate_assign_eliminate]: 7.40001e-05 [updatestate_loads_eliminate]: 8.60998e-05 [renormalize]: 4.59608e-07 [cse]: 0.00036508 [optimize_parallel_all_gather_comm]: 0.00015529 [overlap_param_gather]: 1.48402e-05 [cconv]: 9.98899e-05 [loop_unroll]: 0.0011278 [opt_after_cconv]: 0.00146305, [1] [Cycle 1]: 0.00145488, [7] [c_1]: 0.00074224 [parameter_eliminate]: 2.65986e-06 [updatestate_depend_eliminate]: 0.00011392 [updatestate_assign_eliminate]: 8.50898e-05 [updatestate_loads_eliminate]: 9.42098e-05 [cse]: 0.00036146 [renormalize]: 4.49829e-07 [remove_dup_value]: 0.00050215 [tuple_transform]: 0.00336309, [1] [Cycle 1]: 0.00335521, [2] [d_1]: 0.00332956 [renormalize]: 5.69969e-07 [partial_unused_args_eliminate]: 3.58e-06 [add_cache_embedding]: 0.00024871 [add_recomputation]: 0.00080957 [cse_after_recomputation]: 0.00027301, [1] [Cycle 1]: 0.00026293, [1] [cse]: 0.00025036 [environ_conv]: 0.00012228 [swap_dp_allreduce_reducescatter]: 0.00012908 [bias_add_comm_swap]: 2.88989e-06 [label_micro_interleaved_index]: 2.23005e-06 [label_fine_grained_interleaved_index]: 2.00002e-06 [merge_cast_opt]: 1.22003e-06 [slice_recompute_activation]: 1.71969e-06 [micro_interleaved_order_control]: 1.68011e-06 [assign_add_opt]: 1.27298e-05 [ForceFp32Comm]: 8.10251e-07 [remove_cast_before_assign_add]: 1.45007e-06 [full_micro_interleaved_order_control]: 1.95019e-06 [reorder_send_recv_between_fp_bp]: 2.25008e-06 [comm_op_add_attrs]: 1.16974e-06 [add_comm_op_reuse_tag]: 1.41002e-06 [interleave_split_concat_branches]: 9.09902e-07 [interleave_parallel_branches]: 8.49832e-07 [overlap_opt_shard_in_pipeline]: 3.637e-05 [overlap_opt_shard_grad_in_pipeline]: 1.8701e-06 [control_data_broadcast_order]: 1.17999e-06 [grouped_pairwise_exchange_alltoall]: 1.24983e-06 [offloading_packed_experts]: 1.27032e-06 [overlap_recompute_and_grad_model_parallel]: 2.16998e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.10133e-07 [overlap_recompute_allgather_and_fa_grad]: 1.12969e-06 [overlap_grad_ring_attention]: 1.7304e-06 [overlap_grad_flash_sp]: 0.0001697 [begin_end_overlap_inline]: 8.40053e-07 [split_matmul_comm_elemetwise]: 2.04984e-06 [split_layernorm_comm]: 2.40002e-06 [handle_group_info]: 9.69972e-07 [symbol_engine_optimizer]: 0.00074761, [1] [Cycle 1]: 0.00074075, [6] [build]: 5.67399e-05 [elim_shapecalc]: 0.00012834 [elim_not_effective]: 0.00020425 [opt_reshape]: 0.00011674 [fold_const_symbol]: 0.00019681 [renormalize]: 7.70204e-07 [pipeline_parallel_scheduler]: 2.2403e-06 [auto_monad_reorder]: 0.0003896 [get_jit_bprop_graph]: 6.3004e-07 [rewriter_after_jit_bprop_graph]: 7.59959e-07 [eliminate_special_op_node]: 0.00099512 [distribtued_split]: 0.0003707 [validate]: 0.0003158 [task_emit]: 13.2671 [execute]: 1.24299e-05 Sums bootstrap : 0.000860s : 0.01% type_inference : 0.569048s : 4.01% auto_monad : 0.004988s : 0.04% graph_reusing : 0.000048s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.001232s : 0.01% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.001296s : 0.01% optimize.rewriter_before_opt_a : 0.003148s : 0.02% optimize.opt_a.expand_dump_flag : 0.000802s : 0.01% optimize.opt_a.switch_simplify : 0.003721s : 0.03% optimize.opt_a.loop_unroll : 0.002398s : 0.02% optimize.opt_a.a_1 : 0.092223s : 0.65% optimize.opt_a.recompute_prepare : 0.000586s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.002376s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000418s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000994s : 0.01% optimize.opt_a.parameter_eliminate : 0.000046s : 0.00% optimize.opt_a.a_2 : 0.009321s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000686s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000313s : 0.00% optimize.opt_a.shard_inline : 0.000418s : 0.00% optimize.opt_a.auto_parallel : 0.000389s : 0.00% optimize.opt_a.parallel : 0.000062s : 0.00% optimize.opt_a.flash_sp : 0.000083s : 0.00% optimize.opt_a.merge_comm : 0.000392s : 0.00% optimize.opt_a.allreduce_fusion : 0.000337s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000433s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000416s : 0.00% optimize.opt_a.virtual_dataset : 0.000396s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000386s : 0.00% optimize.opt_a.virtual_output : 0.000391s : 0.00% optimize.opt_a.merge_forward : 0.000273s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000010s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000743s : 0.01% optimize.opt_a.before_grad : 0.000701s : 0.00% optimize.opt_a.inplace_validation : 0.000301s : 0.00% optimize.opt_a.meta_fg_expand : 0.055041s : 0.39% optimize.opt_a.inplace_validation_after_expand : 0.001992s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000039s : 0.00% optimize.opt_a.receive_attached : 0.000022s : 0.00% optimize.opt_a.after_resolve : 0.001112s : 0.01% optimize.opt_a.a_after_grad : 0.001668s : 0.01% optimize.opt_a.special_op_eliminate : 0.000843s : 0.01% optimize.opt_a.renormalize : 0.099722s : 0.70% optimize.opt_a.add_forward_monad_depend : 0.000386s : 0.00% optimize.opt_a.auto_monad_grad : 0.000116s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001805s : 0.01% optimize.opt_a.cse : 0.007094s : 0.05% optimize.opt_a.a_3 : 0.027885s : 0.20% optimize.py_interpret_to_execute_after_opt_a : 0.000123s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001065s : 0.01% optimize.convert_after_rewriter : 0.000099s : 0.00% optimize.order_py_execute_after_rewriter : 0.000093s : 0.00% optimize.opt_b.b_1 : 0.002829s : 0.02% optimize.opt_b.b_2 : 0.000120s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000103s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000074s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000086s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000365s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000155s : 0.00% optimize.overlap_param_gather : 0.000015s : 0.00% optimize.cconv : 0.000100s : 0.00% optimize.loop_unroll : 0.001128s : 0.01% optimize.opt_after_cconv.c_1 : 0.000742s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000114s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000085s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.cse : 0.000361s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000502s : 0.00% optimize.tuple_transform.d_1 : 0.003330s : 0.02% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000249s : 0.00% optimize.add_recomputation : 0.000810s : 0.01% optimize.cse_after_recomputation.cse : 0.000250s : 0.00% optimize.environ_conv : 0.000122s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000129s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000013s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000036s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000170s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000057s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000128s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000204s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000117s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000197s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000390s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000995s : 0.01% distribtued_split : 0.000371s : 0.00% validate : 0.000316s : 0.00% task_emit : 13.267141s : 93.55% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.040208 4191 0.71% : 0.000287s : 57: substitution.arithmetic_simplify 0.14% : 0.000058s : 16: substitution.cast_eliminate 0.13% : 0.000054s : 48: substitution.depend_value_elim 0.07% : 0.000028s : 97: substitution.elim_not_effective 0.06% : 0.000022s : 16: substitution.environ_get_add_eliminate 0.02% : 0.000009s : 8: substitution.environ_get_depend_swap 0.07% : 0.000030s : 32: substitution.environ_get_eliminate 0.51% : 0.000204s : 16: substitution.environ_get_set_eliminate 0.21% : 0.000086s : 110: substitution.float_depend_g_call 0.03% : 0.000012s : 16: substitution.float_environ_get_switch 0.05% : 0.000020s : 14: substitution.float_tuple_getitem_switch 0.07% : 0.000028s : 97: substitution.fold_const_symbol 2.78% : 0.001118s : 8: substitution.getattr_setattr_resolve 0.22% : 0.000087s : 120: substitution.graph_param_transform 0.03% : 0.000012s : 20: substitution.incorporate_call 0.02% : 0.000010s : 20: substitution.incorporate_call_switch 71.54% : 0.028767s : 369: substitution.inline 0.40% : 0.000161s : 28: substitution.inline_without_move 0.33% : 0.000134s : 338: substitution.j_node_and_user_rematch 0.45% : 0.000180s : 36: substitution.less_batch_normalization 0.19% : 0.000076s : 158: substitution.load_eliminater 0.25% : 0.000100s : 72: substitution.minmaximum_grad 0.00% : 0.000001s : 1: substitution.opt_reshape 0.13% : 0.000054s : 8: substitution.partial_defer_inline 0.55% : 0.000221s : 110: substitution.partial_eliminate 0.05% : 0.000021s : 32: substitution.reduce_all_const_elim 0.44% : 0.000175s : 338: substitution.remove_not_recompute_node 12.15% : 0.004885s : 263: substitution.replace_applicator 0.17% : 0.000069s : 148: substitution.replace_old_param 0.04% : 0.000015s : 2: substitution.reshape_eliminate 0.03% : 0.000012s : 10: substitution.set_cell_output_no_recompute 0.02% : 0.000006s : 2: substitution.specialize_transform 0.96% : 0.000386s : 32: substitution.split_environ_get_set_with_tuple_value 0.24% : 0.000097s : 31: substitution.switch_simplify 0.91% : 0.000368s : 76: substitution.tuple_list_convert_item_index_to_positive 0.35% : 0.000141s : 92: substitution.tuple_list_get_item_const_eliminator 0.61% : 0.000245s : 92: substitution.tuple_list_get_item_depend_reorder 1.92% : 0.000773s : 283: substitution.tuple_list_get_item_eliminator 0.39% : 0.000158s : 92: substitution.tuple_list_get_set_item_eliminator 1.30% : 0.000521s : 416: substitution.updatestate_pure_node_eliminater 1.44% : 0.000579s : 467: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.568186 2 91.06% : 0.517392s : 1: type_inference.infer 8.94% : 0.050795s : 1: type_inference.specialize ------[replace.] 0.012030 662 0.12% : 0.000015s : 1: replace.arithmetic_simplify 0.71% : 0.000085s : 16: replace.cast_eliminate 0.24% : 0.000029s : 3: replace.depend_value_elim 5.26% : 0.000633s : 8: replace.environ_get_set_eliminate 0.95% : 0.000115s : 6: replace.getattr_setattr_resolve 41.43% : 0.004984s : 360: replace.inline 4.16% : 0.000501s : 32: replace.partial_eliminate 1.71% : 0.000206s : 9: replace.replace_applicator 3.56% : 0.000429s : 31: replace.switch_simplify 1.11% : 0.000134s : 16: replace.tuple_list_get_item_depend_reorder 40.13% : 0.004828s : 179: replace.tuple_list_get_item_eliminator 0.60% : 0.000072s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.030625 662 0.09% : 0.000027s : 1: match.arithmetic_simplify 0.16% : 0.000050s : 16: match.cast_eliminate 0.00% : 0.000001s : 3: match.depend_value_elim 0.59% : 0.000182s : 8: match.environ_get_set_eliminate 3.26% : 0.001000s : 6: match.getattr_setattr_resolve 93.14% : 0.028525s : 360: match.inline 0.48% : 0.000148s : 32: match.partial_eliminate 0.18% : 0.000056s : 9: match.replace_applicator 0.27% : 0.000083s : 31: match.switch_simplify 0.27% : 0.000082s : 16: match.tuple_list_get_item_depend_reorder 1.38% : 0.000422s : 179: match.tuple_list_get_item_eliminator 0.16% : 0.000049s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.019208113263 1.15% : 0.000221s : 1278: predicate.accumulaten_eliminater 0.15% : 0.000029s : 120: predicate.ad_related_special_op_eliminate 0.43% : 0.000083s : 613: predicate.addn_check_dump 0.90% : 0.000174s : 1278: predicate.addn_zero_filter 0.92% : 0.000176s : 1278: predicate.adjust_all_reduce_mul_add 1.83% : 0.000351s : 1892: predicate.arithmetic_simplify 0.91% : 0.000174s : 1295: predicate.cast_eliminate 1.66% : 0.000319s : 2090: predicate.check_bprop_eliminate 0.44% : 0.000084s : 613: predicate.compare_switch_simplify 0.05% : 0.000009s : 120: predicate.const_output_eliminate 0.09% : 0.000017s : 120: predicate.convert_tensor_all_eliminate 1.39% : 0.000267s : 1498: predicate.convert_tensor_eliminate 0.86% : 0.000166s : 613: predicate.depend_value_elim 0.99% : 0.000189s : 1303: predicate.dict_get_item_const_eliminator 1.12% : 0.000214s : 1303: predicate.dict_get_item_eliminator 1.02% : 0.000195s : 1303: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 120: predicate.elim_not_effective 0.10% : 0.000019s : 120: predicate.elim_shapecalc_of_broadcastargs 0.97% : 0.000186s : 1415: predicate.environ_add_const_eliminate 0.98% : 0.000188s : 1423: predicate.environ_get_add_eliminate 1.02% : 0.000197s : 1415: predicate.environ_get_depend_swap 1.50% : 0.000288s : 2036: predicate.environ_get_eliminate 1.01% : 0.000195s : 1423: predicate.environ_get_set_eliminate 1.39% : 0.000268s : 1891: predicate.exchange_switch_depend_value 1.69% : 0.000324s : 1891: predicate.float_depend_g_call 0.44% : 0.000084s : 613: predicate.float_environ_get_switch 0.53% : 0.000101s : 733: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 120: predicate.fold_const_symbol 0.31% : 0.000060s : 424: predicate.get_grad_eliminate 0.06% : 0.000012s : 40: predicate.getattr_setattr_resolve 0.05% : 0.000010s : 120: predicate.graph_param_transform 0.43% : 0.000083s : 613: predicate.incorporate_call 0.43% : 0.000083s : 613: predicate.incorporate_call_switch 4.21% : 0.000809s : 4446: predicate.inline 0.97% : 0.000186s : 922: predicate.inline_without_move 0.16% : 0.000030s : 424: predicate.j_node_and_user_rematch 0.59% : 0.000113s : 442: predicate.less_batch_normalization 1.29% : 0.000248s : 1738: predicate.list_to_tuple_eliminator_ 2.25% : 0.000433s : 3048: predicate.load_eliminater 0.17% : 0.000032s : 120: predicate.loop_unroll_after_grad 2.33% : 0.000447s : 2580: predicate.loop_unroll_before_grad 1.15% : 0.000220s : 1559: predicate.make_slice_get_slice_eliminator 0.44% : 0.000084s : 613: predicate.merge_addn 1.77% : 0.000340s : 2054: predicate.micro_step_allgather_replace 1.98% : 0.000381s : 2054: predicate.mini_step_allgather_replace 0.88% : 0.000170s : 1279: predicate.minmaximum_grad 0.10% : 0.000019s : 120: predicate.mutable_eliminate 0.09% : 0.000017s : 120: predicate.opt_reshape 0.10% : 0.000019s : 120: predicate.parallel_virtual_node 2.68% : 0.000515s : 1891: predicate.partial_defer_inline 1.28% : 0.000246s : 1650: predicate.partial_eliminate 0.93% : 0.000179s : 1278: predicate.print_const_string_wrapper 0.50% : 0.000096s : 608: predicate.reduce_all_const_elim 1.17% : 0.000225s : 1279: predicate.reduce_eliminate 0.16% : 0.000030s : 424: predicate.remove_not_recompute_node 1.62% : 0.000310s : 3570: predicate.replace_applicator 0.35% : 0.000068s : 922: predicate.replace_old_param 0.05% : 0.000009s : 120: predicate.reset_defer_inline 0.94% : 0.000180s : 1279: predicate.reshape_eliminate 10.49% : 0.002015s : 2054: predicate.row_tensor_add_zeros_like 0.10% : 0.000019s : 120: predicate.row_tensor_eliminate 1.80% : 0.000346s : 2090: predicate.same_eliminate 0.21% : 0.000040s : 534: predicate.set_cell_output_no_recompute 0.33% : 0.000064s : 424: predicate.shard_identity_eliminate 0.88% : 0.000170s : 1042: predicate.special_op_eliminate 0.50% : 0.000097s : 613: predicate.specialize_transform 1.99% : 0.000383s : 2054: predicate.split_environ_get_set_with_tuple_value 0.74% : 0.000143s : 922: predicate.stack_unstack_eliminate 2.17% : 0.000417s : 3048: predicate.stopgrad_eliminater 0.09% : 0.000017s : 120: predicate.switch_call_monad_eliminater 1.52% : 0.000292s : 1891: predicate.switch_defer_inline 3.16% : 0.000607s : 3981: predicate.switch_layer_defer_inline 5.34% : 0.001026s : 5146: predicate.switch_simplify 0.92% : 0.000177s : 1279: predicate.tile_eliminate 0.92% : 0.000177s : 1279: predicate.transpose_eliminate 1.29% : 0.000248s : 1543: predicate.tuple_list_convert_item_index_to_positive 1.30% : 0.000250s : 1559: predicate.tuple_list_get_item_const_eliminator 1.17% : 0.000225s : 1559: predicate.tuple_list_get_item_depend_reorder 1.97% : 0.000378s : 2351: predicate.tuple_list_get_item_eliminator 1.20% : 0.000230s : 1559: predicate.tuple_list_get_set_item_eliminator 1.73% : 0.000333s : 2172: predicate.tuple_list_set_item_eliminator 1.31% : 0.000251s : 1738: predicate.tuple_to_list_eliminator_ 2.23% : 0.000428s : 3048: predicate.updatestate_pure_node_eliminater 2.75% : 0.000528s : 3662: predicate.updatestate_useless_node_eliminater 0.09% : 0.000018s : 120: predicate.value_based_eliminate 0.32% : 0.000061s : 424: predicate.virtual_dataset_eliminate 0.31% : 0.000060s : 424: predicate.virtual_output_eliminate 0.10% : 0.000019s : 120: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.048722 674 56.72% : 0.027633s : 274: func_graph_cloner_run.FuncGraphClonerGraph 0.37% : 0.000182s : 8: func_graph_cloner_run.FuncGraphClonerNode 42.91% : 0.020908s : 392: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.790060 263 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000258s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000822s : 1: add_recomputation 0.00% : 0.000016s : 1: assign_add_opt 0.03% : 0.005016s : 1: auto_monad 0.00% : 0.000403s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.000889s : 1: bootstrap 0.00% : 0.000107s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000139s : 1: convert_after_rewriter 0.00% : 0.000278s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000387s : 1: distribtued_split 0.01% : 0.001010s : 1: eliminate_special_op_node 0.00% : 0.000131s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000059s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000011s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.001139s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000150s : 1: opt.transform.loop_unroll_optimizer 0.97% : 0.143391s : 145: opt.transform.opt_a 0.00% : 0.000739s : 1: opt.transform.opt_after_cconv 0.02% : 0.002893s : 27: opt.transform.opt_b 0.01% : 0.001347s : 4: opt.transform.opt_resolve 0.02% : 0.003325s : 1: opt.transform.opt_trans_graph 0.00% : 0.000384s : 3: opt.transform.special_op_eliminate 0.00% : 0.000640s : 4: opt.transform.symbol_engine_opt 2.21% : 0.326557s : 1: opt_a 0.01% : 0.001468s : 1: opt_after_cconv 0.02% : 0.003648s : 1: opt_b 2.34% : 0.345946s : 1: optimize 0.00% : 0.000166s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000100s : 1: order_py_execute_after_rewriter 0.00% : 0.000177s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000042s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000020s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000008s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.001251s : 1: pre_auto_parallel 0.01% : 0.001311s : 1: py_interpret_to_execute 0.00% : 0.000131s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000516s : 1: remove_dup_value 0.38% : 0.056426s : 2: renormalize.infer 0.29% : 0.043181s : 2: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001075s : 1: rewriter_after_opt_a 0.02% : 0.003203s : 1: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000006s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000136s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000752s : 1: symbol_engine_optimizer 89.70% : 13.267193s : 1: task_emit 0.02% : 0.003370s : 1: tuple_transform 3.85% : 0.569111s : 1: type_inference 0.00% : 0.000534s : 1: validate TotalTime = 14.1972, [21] [bootstrap]: 0.00088953 [type_inference]: 0.564609 [auto_monad]: 0.00443245 [graph_reusing]: 5.36502e-05 [inline]: 2.65008e-06 [parallel-infer-symbol]: 2.60025e-06 [pre_auto_parallel]: 0.00121646 [insert-virtual-dataset]: 4.90993e-06 [parallel-infer-symbol-second]: 1.17021e-06 [dataset_repeat_opt]: 1.8198e-06 [pipeline_split]: 2.08989e-06 [optimize]: 0.284007, [52] [py_interpret_to_execute]: 0.00117156 [rewriter_before_opt_a]: 0.00311465 [opt_a]: 0.267306, [3] [Cycle 1]: 0.197777, [43] [expand_dump_flag]: 9.12403e-05 [switch_simplify]: 0.00223283 [loop_unroll]: 0.00150121 [a_1]: 0.0403165 [recompute_prepare]: 0.0003213 [updatestate_depend_eliminate]: 0.00085728 [updatestate_assign_eliminate]: 0.00017781 [updatestate_loads_eliminate]: 0.00059779 [parameter_eliminate]: 2.52696e-05 [a_2]: 0.00542694 [accelerated_algorithm]: 0.00035548 [shard]: 2.92016e-06 [meta_shard_fg_expand]: 0.00011718 [shard_inline]: 0.00018168 [auto_parallel]: 0.00013337 [parallel]: 1.14497e-05 [flash_sp]: 9.22699e-05 [merge_comm]: 0.00012986 [allreduce_fusion]: 0.00011357 [matmul_add_comm_reduction]: 0.00021511 [allreduce_slice_to_reducescatter]: 7.09668e-07 [virtual_shard_identity]: 0.00017965 [virtual_dataset]: 0.00016891 [get_grad_eliminate_]: 0.00016557 [virtual_output]: 0.00016394 [merge_forward]: 0.00011094 [cell_reuse_recompute_pass]: 2.82982e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00030548 [before_grad]: 0.00029894 [inplace_validation]: 0.00015118 [meta_fg_expand]: 0.0441433 [inplace_validation_after_expand]: 0.00059638 [flash_sp_send_recv_attached]: 6.78003e-06 [receive_attached]: 1.80299e-05 [after_resolve]: 0.00081066 [a_after_grad]: 0.00127665 [special_op_eliminate]: 0.00064255 [renormalize]: 0.0777642 [add_forward_monad_depend]: 0.00036701 [auto_monad_grad]: 0.00010245 [auto_monad_eliminator]: 0.00114698 [cse]: 0.00282727 [a_3]: 0.0130812 [Cycle 2]: 0.0523053, [43] [expand_dump_flag]: 2.95099e-05 [switch_simplify]: 0.00085343 [loop_unroll]: 0.00092142 [a_1]: 0.0233238 [recompute_prepare]: 0.00017103 [updatestate_depend_eliminate]: 0.00072763 [updatestate_assign_eliminate]: 9.58601e-05 [updatestate_loads_eliminate]: 0.00020216 [parameter_eliminate]: 6.14021e-06 [a_2]: 0.00196526 [accelerated_algorithm]: 0.0001423 [shard]: 2.54996e-06 [meta_shard_fg_expand]: 7.58599e-05 [shard_inline]: 0.00012196 [auto_parallel]: 0.00010629 [parallel]: 1.209e-05 [flash_sp]: 5.26989e-06 [merge_comm]: 9.102e-05 [allreduce_fusion]: 8.25003e-05 [matmul_add_comm_reduction]: 0.00011672 [allreduce_slice_to_reducescatter]: 7.59959e-07 [virtual_shard_identity]: 0.00012547 [virtual_dataset]: 0.00019015 [get_grad_eliminate_]: 0.00012537 [virtual_output]: 0.00012629 [merge_forward]: 7.78297e-05 [cell_reuse_recompute_pass]: 2.60025e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022458 [before_grad]: 0.00021199 [inplace_validation]: 7.54301e-05 [meta_fg_expand]: 0.0023138 [inplace_validation_after_expand]: 0.00089919 [flash_sp_send_recv_attached]: 2.88012e-06 [receive_attached]: 1.86032e-06 [after_resolve]: 0.00018893 [a_after_grad]: 0.00020255 [special_op_eliminate]: 0.00011999 [renormalize]: 0.0127508 [add_forward_monad_depend]: 8.84989e-06 [auto_monad_grad]: 3.51993e-06 [auto_monad_eliminator]: 0.00030407 [cse]: 0.00403328 [a_3]: 0.00084906 [Cycle 3]: 0.0106587, [43] [expand_dump_flag]: 3.24007e-06 [switch_simplify]: 0.00012216 [loop_unroll]: 0.00011594 [a_1]: 0.00389621 [recompute_prepare]: 0.00012018 [updatestate_depend_eliminate]: 0.00013642 [updatestate_assign_eliminate]: 8.14004e-05 [updatestate_loads_eliminate]: 8.15303e-05 [parameter_eliminate]: 4.50993e-06 [a_2]: 0.0019207 [accelerated_algorithm]: 0.00014244 [shard]: 2.95974e-06 [meta_shard_fg_expand]: 5.04702e-05 [shard_inline]: 0.00012125 [auto_parallel]: 0.00010246 [parallel]: 1.21603e-05 [flash_sp]: 2.55974e-06 [merge_comm]: 8.92202e-05 [allreduce_fusion]: 8.17203e-05 [matmul_add_comm_reduction]: 0.00011259 [allreduce_slice_to_reducescatter]: 5.49946e-07 [virtual_shard_identity]: 0.00012381 [virtual_dataset]: 0.00011859 [get_grad_eliminate_]: 0.00011576 [virtual_output]: 0.00011717 [merge_forward]: 7.96402e-05 [cell_reuse_recompute_pass]: 4.42006e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021722 [before_grad]: 0.0002099 [inplace_validation]: 7.38599e-05 [meta_fg_expand]: 9.13702e-05 [inplace_validation_after_expand]: 0.00010055 [flash_sp_send_recv_attached]: 1.91992e-06 [receive_attached]: 2.01026e-06 [after_resolve]: 0.00016426 [a_after_grad]: 0.00021392 [special_op_eliminate]: 0.0001557 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 3.26987e-06 [auto_monad_grad]: 3.18e-06 [auto_monad_eliminator]: 0.00022103 [cse]: 0.00035561 [a_3]: 0.00082291 [py_interpret_to_execute_after_opt_a]: 0.00012579 [slice_cell_reuse_recomputed_activation]: 2.61981e-06 [rewriter_after_opt_a]: 0.00114924 [convert_after_rewriter]: 0.00011744 [order_py_execute_after_rewriter]: 8.43396e-05 [opt_b]: 0.00366766, [1] [Cycle 1]: 0.00365817, [7] [b_1]: 0.00283593 [b_2]: 0.00012425 [updatestate_depend_eliminate]: 9.97502e-05 [updatestate_assign_eliminate]: 7.50199e-05 [updatestate_loads_eliminate]: 8.49501e-05 [renormalize]: 4.39584e-07 [cse]: 0.00037572 [optimize_parallel_all_gather_comm]: 0.00013946 [overlap_param_gather]: 2.76603e-05 [cconv]: 7.67699e-05 [loop_unroll]: 0.00109483 [opt_after_cconv]: 0.00148875, [1] [Cycle 1]: 0.00148047, [7] [c_1]: 0.00075217 [parameter_eliminate]: 2.78e-06 [updatestate_depend_eliminate]: 0.00011773 [updatestate_assign_eliminate]: 8.55196e-05 [updatestate_loads_eliminate]: 9.39202e-05 [cse]: 0.00037089 [renormalize]: 5.89993e-07 [remove_dup_value]: 0.00051624 [tuple_transform]: 0.00094976, [1] [Cycle 1]: 0.00094244, [2] [d_1]: 0.00092444 [renormalize]: 5.10365e-07 [partial_unused_args_eliminate]: 4.02005e-06 [add_cache_embedding]: 0.00015043 [add_recomputation]: 0.00075318 [cse_after_recomputation]: 0.00028318, [1] [Cycle 1]: 0.00027281, [1] [cse]: 0.00026004 [environ_conv]: 0.00011663 [swap_dp_allreduce_reducescatter]: 0.0001283 [bias_add_comm_swap]: 2.81027e-06 [label_micro_interleaved_index]: 2.20025e-06 [label_fine_grained_interleaved_index]: 2.28034e-06 [merge_cast_opt]: 1.34995e-06 [slice_recompute_activation]: 2.08989e-06 [micro_interleaved_order_control]: 1.82027e-06 [assign_add_opt]: 9.73977e-06 [ForceFp32Comm]: 8.89879e-07 [remove_cast_before_assign_add]: 1.17999e-06 [full_micro_interleaved_order_control]: 2.02982e-06 [reorder_send_recv_between_fp_bp]: 2.22959e-06 [comm_op_add_attrs]: 1.40024e-06 [add_comm_op_reuse_tag]: 1.5297e-06 [interleave_split_concat_branches]: 8.09785e-07 [interleave_parallel_branches]: 9.39704e-07 [overlap_opt_shard_in_pipeline]: 5.22202e-05 [overlap_opt_shard_grad_in_pipeline]: 2.41026e-06 [control_data_broadcast_order]: 1.24983e-06 [grouped_pairwise_exchange_alltoall]: 1.59023e-06 [offloading_packed_experts]: 1.30991e-06 [overlap_recompute_and_grad_model_parallel]: 2.14018e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.4017e-07 [overlap_recompute_allgather_and_fa_grad]: 1.85985e-06 [overlap_grad_ring_attention]: 1.8701e-06 [overlap_grad_flash_sp]: 0.00015222 [begin_end_overlap_inline]: 9.89996e-07 [split_matmul_comm_elemetwise]: 2.51969e-06 [split_layernorm_comm]: 2.02004e-06 [handle_group_info]: 1.02026e-06 [symbol_engine_optimizer]: 0.00076239, [1] [Cycle 1]: 0.00075468, [6] [build]: 6.02803e-05 [elim_shapecalc]: 0.00013147 [elim_not_effective]: 0.00020869 [opt_reshape]: 0.00012068 [fold_const_symbol]: 0.00019657 [renormalize]: 4.70318e-07 [pipeline_parallel_scheduler]: 2.22027e-06 [auto_monad_reorder]: 0.00035992 [get_jit_bprop_graph]: 7.5018e-07 [rewriter_after_jit_bprop_graph]: 4.49829e-07 [eliminate_special_op_node]: 0.00094123 [distribtued_split]: 0.00039547 [validate]: 0.00031117 [task_emit]: 13.3394 [execute]: 7.20005e-06 Sums bootstrap : 0.000890s : 0.01% type_inference : 0.564609s : 3.98% auto_monad : 0.004432s : 0.03% graph_reusing : 0.000054s : 0.00% inline : 0.000003s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.001216s : 0.01% insert-virtual-dataset : 0.000005s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.001172s : 0.01% optimize.rewriter_before_opt_a : 0.003115s : 0.02% optimize.opt_a.expand_dump_flag : 0.000124s : 0.00% optimize.opt_a.switch_simplify : 0.003208s : 0.02% optimize.opt_a.loop_unroll : 0.002539s : 0.02% optimize.opt_a.a_1 : 0.067536s : 0.48% optimize.opt_a.recompute_prepare : 0.000613s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.001721s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000355s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000881s : 0.01% optimize.opt_a.parameter_eliminate : 0.000036s : 0.00% optimize.opt_a.a_2 : 0.009313s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000640s : 0.00% optimize.opt_a.shard : 0.000008s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000244s : 0.00% optimize.opt_a.shard_inline : 0.000425s : 0.00% optimize.opt_a.auto_parallel : 0.000342s : 0.00% optimize.opt_a.parallel : 0.000036s : 0.00% optimize.opt_a.flash_sp : 0.000100s : 0.00% optimize.opt_a.merge_comm : 0.000310s : 0.00% optimize.opt_a.allreduce_fusion : 0.000278s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000444s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000429s : 0.00% optimize.opt_a.virtual_dataset : 0.000478s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000407s : 0.00% optimize.opt_a.virtual_output : 0.000407s : 0.00% optimize.opt_a.merge_forward : 0.000268s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000010s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000747s : 0.01% optimize.opt_a.before_grad : 0.000721s : 0.01% optimize.opt_a.inplace_validation : 0.000300s : 0.00% optimize.opt_a.meta_fg_expand : 0.046548s : 0.33% optimize.opt_a.inplace_validation_after_expand : 0.001596s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000012s : 0.00% optimize.opt_a.receive_attached : 0.000022s : 0.00% optimize.opt_a.after_resolve : 0.001164s : 0.01% optimize.opt_a.a_after_grad : 0.001693s : 0.01% optimize.opt_a.special_op_eliminate : 0.000918s : 0.01% optimize.opt_a.renormalize : 0.090515s : 0.64% optimize.opt_a.add_forward_monad_depend : 0.000379s : 0.00% optimize.opt_a.auto_monad_grad : 0.000109s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001672s : 0.01% optimize.opt_a.cse : 0.007216s : 0.05% optimize.opt_a.a_3 : 0.014753s : 0.10% optimize.py_interpret_to_execute_after_opt_a : 0.000126s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001149s : 0.01% optimize.convert_after_rewriter : 0.000117s : 0.00% optimize.order_py_execute_after_rewriter : 0.000084s : 0.00% optimize.opt_b.b_1 : 0.002836s : 0.02% optimize.opt_b.b_2 : 0.000124s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000100s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000075s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000085s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000376s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000139s : 0.00% optimize.overlap_param_gather : 0.000028s : 0.00% optimize.cconv : 0.000077s : 0.00% optimize.loop_unroll : 0.001095s : 0.01% optimize.opt_after_cconv.c_1 : 0.000752s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000118s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000086s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.cse : 0.000371s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000516s : 0.00% optimize.tuple_transform.d_1 : 0.000924s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000150s : 0.00% optimize.add_recomputation : 0.000753s : 0.01% optimize.cse_after_recomputation.cse : 0.000260s : 0.00% optimize.environ_conv : 0.000117s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000128s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000010s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000052s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000152s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000060s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000131s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000209s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000121s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000197s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000360s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000941s : 0.01% distribtued_split : 0.000395s : 0.00% validate : 0.000311s : 0.00% task_emit : 13.339435s : 94.02% execute : 0.000007s : 0.00% Time group info: ------[substitution.] 0.017595 4191 1.49% : 0.000262s : 57: substitution.arithmetic_simplify 0.42% : 0.000074s : 16: substitution.cast_eliminate 0.31% : 0.000054s : 48: substitution.depend_value_elim 0.16% : 0.000028s : 97: substitution.elim_not_effective 0.12% : 0.000020s : 16: substitution.environ_get_add_eliminate 0.05% : 0.000009s : 8: substitution.environ_get_depend_swap 0.17% : 0.000031s : 32: substitution.environ_get_eliminate 0.32% : 0.000056s : 16: substitution.environ_get_set_eliminate 0.40% : 0.000071s : 110: substitution.float_depend_g_call 0.06% : 0.000011s : 16: substitution.float_environ_get_switch 0.12% : 0.000021s : 14: substitution.float_tuple_getitem_switch 0.16% : 0.000028s : 97: substitution.fold_const_symbol 5.88% : 0.001034s : 8: substitution.getattr_setattr_resolve 0.47% : 0.000083s : 120: substitution.graph_param_transform 0.07% : 0.000012s : 20: substitution.incorporate_call 0.05% : 0.000009s : 20: substitution.incorporate_call_switch 66.30% : 0.011666s : 369: substitution.inline 0.88% : 0.000156s : 28: substitution.inline_without_move 0.80% : 0.000141s : 338: substitution.j_node_and_user_rematch 0.96% : 0.000169s : 36: substitution.less_batch_normalization 0.44% : 0.000078s : 158: substitution.load_eliminater 0.44% : 0.000077s : 72: substitution.minmaximum_grad 0.01% : 0.000001s : 1: substitution.opt_reshape 0.34% : 0.000060s : 8: substitution.partial_defer_inline 1.26% : 0.000221s : 110: substitution.partial_eliminate 0.12% : 0.000021s : 32: substitution.reduce_all_const_elim 1.01% : 0.000178s : 338: substitution.remove_not_recompute_node 3.20% : 0.000562s : 263: substitution.replace_applicator 0.40% : 0.000071s : 148: substitution.replace_old_param 0.08% : 0.000015s : 2: substitution.reshape_eliminate 0.06% : 0.000011s : 10: substitution.set_cell_output_no_recompute 0.04% : 0.000007s : 2: substitution.specialize_transform 0.20% : 0.000036s : 32: substitution.split_environ_get_set_with_tuple_value 0.39% : 0.000068s : 31: substitution.switch_simplify 1.32% : 0.000233s : 76: substitution.tuple_list_convert_item_index_to_positive 0.70% : 0.000122s : 92: substitution.tuple_list_get_item_const_eliminator 1.21% : 0.000213s : 92: substitution.tuple_list_get_item_depend_reorder 3.92% : 0.000690s : 283: substitution.tuple_list_get_item_eliminator 0.81% : 0.000143s : 92: substitution.tuple_list_get_set_item_eliminator 2.13% : 0.000376s : 416: substitution.updatestate_pure_node_eliminater 2.70% : 0.000475s : 467: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.563862 2 92.08% : 0.519232s : 1: type_inference.infer 7.92% : 0.044630s : 1: type_inference.specialize ------[replace.] 0.006569 662 0.10% : 0.000006s : 1: replace.arithmetic_simplify 1.39% : 0.000091s : 16: replace.cast_eliminate 0.45% : 0.000029s : 3: replace.depend_value_elim 1.79% : 0.000117s : 8: replace.environ_get_set_eliminate 1.68% : 0.000111s : 6: replace.getattr_setattr_resolve 48.27% : 0.003171s : 360: replace.inline 7.51% : 0.000493s : 32: replace.partial_eliminate 2.91% : 0.000191s : 9: replace.replace_applicator 5.65% : 0.000371s : 31: replace.switch_simplify 2.00% : 0.000131s : 16: replace.tuple_list_get_item_depend_reorder 27.89% : 0.001832s : 179: replace.tuple_list_get_item_eliminator 0.39% : 0.000025s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.013230 662 0.10% : 0.000013s : 1: match.arithmetic_simplify 0.39% : 0.000052s : 16: match.cast_eliminate 0.01% : 0.000001s : 3: match.depend_value_elim 0.33% : 0.000043s : 8: match.environ_get_set_eliminate 6.97% : 0.000922s : 6: match.getattr_setattr_resolve 86.63% : 0.011461s : 360: match.inline 1.14% : 0.000151s : 32: match.partial_eliminate 0.42% : 0.000056s : 9: match.replace_applicator 0.41% : 0.000054s : 31: match.switch_simplify 0.59% : 0.000077s : 16: match.tuple_list_get_item_depend_reorder 2.93% : 0.000388s : 179: match.tuple_list_get_item_eliminator 0.09% : 0.000012s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.017218113263 1.01% : 0.000174s : 1278: predicate.accumulaten_eliminater 0.17% : 0.000029s : 120: predicate.ad_related_special_op_eliminate 0.93% : 0.000161s : 613: predicate.addn_check_dump 1.00% : 0.000172s : 1278: predicate.addn_zero_filter 0.99% : 0.000170s : 1278: predicate.adjust_all_reduce_mul_add 1.98% : 0.000341s : 1892: predicate.arithmetic_simplify 1.04% : 0.000179s : 1295: predicate.cast_eliminate 1.98% : 0.000341s : 2090: predicate.check_bprop_eliminate 0.48% : 0.000083s : 613: predicate.compare_switch_simplify 0.05% : 0.000009s : 120: predicate.const_output_eliminate 0.10% : 0.000017s : 120: predicate.convert_tensor_all_eliminate 1.44% : 0.000249s : 1498: predicate.convert_tensor_eliminate 0.50% : 0.000087s : 613: predicate.depend_value_elim 1.09% : 0.000188s : 1303: predicate.dict_get_item_const_eliminator 1.15% : 0.000199s : 1303: predicate.dict_get_item_eliminator 1.12% : 0.000192s : 1303: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 120: predicate.elim_not_effective 0.11% : 0.000019s : 120: predicate.elim_shapecalc_of_broadcastargs 1.09% : 0.000187s : 1415: predicate.environ_add_const_eliminate 1.10% : 0.000190s : 1423: predicate.environ_get_add_eliminate 1.12% : 0.000192s : 1415: predicate.environ_get_depend_swap 1.65% : 0.000285s : 2036: predicate.environ_get_eliminate 1.10% : 0.000190s : 1423: predicate.environ_get_set_eliminate 1.53% : 0.000263s : 1891: predicate.exchange_switch_depend_value 1.88% : 0.000324s : 1891: predicate.float_depend_g_call 0.50% : 0.000085s : 613: predicate.float_environ_get_switch 0.59% : 0.000102s : 733: predicate.float_tuple_getitem_switch 0.05% : 0.000009s : 120: predicate.fold_const_symbol 0.36% : 0.000061s : 424: predicate.get_grad_eliminate 0.07% : 0.000012s : 40: predicate.getattr_setattr_resolve 0.06% : 0.000010s : 120: predicate.graph_param_transform 0.49% : 0.000084s : 613: predicate.incorporate_call 0.48% : 0.000082s : 613: predicate.incorporate_call_switch 4.65% : 0.000801s : 4446: predicate.inline 1.11% : 0.000191s : 922: predicate.inline_without_move 0.18% : 0.000031s : 424: predicate.j_node_and_user_rematch 0.42% : 0.000073s : 442: predicate.less_batch_normalization 1.42% : 0.000245s : 1738: predicate.list_to_tuple_eliminator_ 2.51% : 0.000433s : 3048: predicate.load_eliminater 0.19% : 0.000032s : 120: predicate.loop_unroll_after_grad 3.13% : 0.000538s : 2580: predicate.loop_unroll_before_grad 1.26% : 0.000217s : 1559: predicate.make_slice_get_slice_eliminator 0.49% : 0.000084s : 613: predicate.merge_addn 1.87% : 0.000322s : 2054: predicate.micro_step_allgather_replace 1.90% : 0.000328s : 2054: predicate.mini_step_allgather_replace 1.03% : 0.000177s : 1279: predicate.minmaximum_grad 0.11% : 0.000019s : 120: predicate.mutable_eliminate 0.10% : 0.000017s : 120: predicate.opt_reshape 0.11% : 0.000018s : 120: predicate.parallel_virtual_node 2.78% : 0.000479s : 1891: predicate.partial_defer_inline 1.44% : 0.000247s : 1650: predicate.partial_eliminate 1.00% : 0.000172s : 1278: predicate.print_const_string_wrapper 0.50% : 0.000086s : 608: predicate.reduce_all_const_elim 1.20% : 0.000206s : 1279: predicate.reduce_eliminate 0.17% : 0.000030s : 424: predicate.remove_not_recompute_node 1.73% : 0.000298s : 3570: predicate.replace_applicator 0.40% : 0.000069s : 922: predicate.replace_old_param 0.05% : 0.000009s : 120: predicate.reset_defer_inline 1.00% : 0.000173s : 1279: predicate.reshape_eliminate 2.26% : 0.000390s : 2054: predicate.row_tensor_add_zeros_like 0.11% : 0.000019s : 120: predicate.row_tensor_eliminate 2.10% : 0.000362s : 2090: predicate.same_eliminate 0.24% : 0.000041s : 534: predicate.set_cell_output_no_recompute 0.37% : 0.000065s : 424: predicate.shard_identity_eliminate 1.04% : 0.000180s : 1042: predicate.special_op_eliminate 0.57% : 0.000099s : 613: predicate.specialize_transform 2.04% : 0.000351s : 2054: predicate.split_environ_get_set_with_tuple_value 0.83% : 0.000143s : 922: predicate.stack_unstack_eliminate 2.43% : 0.000418s : 3048: predicate.stopgrad_eliminater 0.10% : 0.000017s : 120: predicate.switch_call_monad_eliminater 1.63% : 0.000281s : 1891: predicate.switch_defer_inline 3.55% : 0.000612s : 3981: predicate.switch_layer_defer_inline 5.24% : 0.000903s : 5146: predicate.switch_simplify 1.04% : 0.000179s : 1279: predicate.tile_eliminate 1.03% : 0.000177s : 1279: predicate.transpose_eliminate 1.35% : 0.000232s : 1543: predicate.tuple_list_convert_item_index_to_positive 1.38% : 0.000237s : 1559: predicate.tuple_list_get_item_const_eliminator 1.20% : 0.000207s : 1559: predicate.tuple_list_get_item_depend_reorder 2.08% : 0.000359s : 2351: predicate.tuple_list_get_item_eliminator 1.23% : 0.000212s : 1559: predicate.tuple_list_get_set_item_eliminator 1.85% : 0.000319s : 2172: predicate.tuple_list_set_item_eliminator 1.36% : 0.000235s : 1738: predicate.tuple_to_list_eliminator_ 2.98% : 0.000513s : 3048: predicate.updatestate_pure_node_eliminater 3.02% : 0.000519s : 3662: predicate.updatestate_useless_node_eliminater 0.10% : 0.000018s : 120: predicate.value_based_eliminate 0.36% : 0.000062s : 424: predicate.virtual_dataset_eliminate 0.35% : 0.000061s : 424: predicate.virtual_output_eliminate 0.11% : 0.000020s : 120: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.047322 674 57.95% : 0.027424s : 274: func_graph_cloner_run.FuncGraphClonerGraph 0.42% : 0.000199s : 8: func_graph_cloner_run.FuncGraphClonerNode 41.63% : 0.019699s : 392: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.684269 263 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000158s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.01% : 0.000766s : 1: add_recomputation 0.00% : 0.000013s : 1: assign_add_opt 0.03% : 0.004457s : 1: auto_monad 0.00% : 0.000373s : 1: auto_monad_reorder 0.00% : 0.000006s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.000925s : 1: bootstrap 0.00% : 0.000083s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.00% : 0.000125s : 1: convert_after_rewriter 0.00% : 0.000288s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000411s : 1: distribtued_split 0.01% : 0.000956s : 1: eliminate_special_op_node 0.00% : 0.000126s : 1: environ_conv 0.00% : 0.000014s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000064s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000013s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001106s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000194s : 1: opt.transform.loop_unroll_optimizer 0.72% : 0.105741s : 145: opt.transform.opt_a 0.01% : 0.000750s : 1: opt.transform.opt_after_cconv 0.02% : 0.002927s : 27: opt.transform.opt_b 0.01% : 0.001252s : 4: opt.transform.opt_resolve 0.01% : 0.000921s : 1: opt.transform.opt_trans_graph 0.00% : 0.000380s : 3: opt.transform.special_op_eliminate 0.00% : 0.000651s : 4: opt.transform.symbol_engine_opt 1.82% : 0.267313s : 1: opt_a 0.01% : 0.001494s : 1: opt_after_cconv 0.03% : 0.003672s : 1: opt_b 1.93% : 0.284020s : 1: optimize 0.00% : 0.000148s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000091s : 1: order_py_execute_after_rewriter 0.00% : 0.000160s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000059s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000034s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000008s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.01% : 0.001236s : 1: pre_auto_parallel 0.01% : 0.001186s : 1: py_interpret_to_execute 0.00% : 0.000134s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000531s : 1: remove_dup_value 0.35% : 0.051104s : 2: renormalize.infer 0.27% : 0.039371s : 2: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000008s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001160s : 1: rewriter_after_opt_a 0.02% : 0.003129s : 1: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000006s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000135s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000766s : 1: symbol_engine_optimizer 90.84% : 13.339464s : 1: task_emit 0.01% : 0.000955s : 1: tuple_transform 3.85% : 0.564649s : 1: type_inference 0.00% : 0.000562s : 1: validate [WARNING] ME(54221:281472867404816,MainProcess):2025-02-07-15:54:39.440.254 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. [WARNING] ME(54186:281473137400848,MainProcess):2025-02-07-15:54:39.440.325 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. epoch: 1 step: 468, loss is 2.306516647338867 epoch: 1 step: 468, loss is 2.298091411590576 epoch: 1 step: 468, loss is 2.2847843170166016 epoch: 1 step: 468, loss is 2.306880474090576 epoch: 2 step: 468, loss is 2.3069849014282227 epoch: 2 step: 468, loss is 2.308450698852539 epoch: 2 step: 468, loss is 2.306143045425415 epoch: 2 step: 468, loss is 2.2997546195983887 epoch: 3 step: 468, loss is 2.306415557861328 epoch: 3 step: 468, loss is 2.2729806900024414 epoch: 3 step: 468, loss is 2.310114622116089 epoch: 3 step: 468, loss is 2.29491925239563 epoch: 4 step: 468, loss is 0.060764919966459274 epoch: 4 step: 468, loss is 0.23359327018260956 epoch: 4 step: 468, loss is 0.12016694247722626 epoch: 4 step: 468, loss is 0.3902439773082733 epoch: 5 step: 468, loss is 0.11592420935630798 epoch: 5 step: 468, loss is 0.18687066435813904 epoch: 5 step: 468, loss is 0.13159051537513733 epoch: 5 step: 468, loss is 0.10158547759056091 .... TotalTime = 0.637615, [21] [bootstrap]: 0.00058712 [type_inference]: 0.199938 [auto_monad]: 0.00125187 [graph_reusing]: 1.731e-05 [inline]: 1.96975e-06 [parallel-infer-symbol]: 2.99979e-06 [pre_auto_parallel]: 0.00028539 [insert-virtual-dataset]: 3.49013e-06 [parallel-infer-symbol-second]: 1.03004e-06 [dataset_repeat_opt]: 1.63028e-06 [pipeline_split]: 2.27988e-06 [optimize]: 0.0354506, [52] [py_interpret_to_execute]: 0.0002329 [rewriter_before_opt_a]: 0.00069104 [opt_a]: 0.0298144, [2] [Cycle 1]: 0.0227105, [43] [expand_dump_flag]: 2.15797e-05 [switch_simplify]: 0.00064066 [loop_unroll]: 0.00032933 [a_1]: 0.00900355 [recompute_prepare]: 8.999e-05 [updatestate_depend_eliminate]: 0.00032246 [updatestate_assign_eliminate]: 4.951e-05 [updatestate_loads_eliminate]: 0.00025929 [parameter_eliminate]: 4.09968e-06 [a_2]: 0.00137167 [accelerated_algorithm]: 0.00012877 [shard]: 2.35019e-06 [meta_shard_fg_expand]: 2.52002e-05 [shard_inline]: 4.149e-05 [auto_parallel]: 3.54499e-05 [parallel]: 1.06501e-05 [flash_sp]: 2.35303e-05 [merge_comm]: 2.84803e-05 [allreduce_fusion]: 2.53096e-05 [matmul_add_comm_reduction]: 3.719e-05 [allreduce_slice_to_reducescatter]: 5.49946e-07 [virtual_shard_identity]: 4.46299e-05 [virtual_dataset]: 4.12599e-05 [get_grad_eliminate_]: 4.17503e-05 [virtual_output]: 4.09503e-05 [merge_forward]: 3.23104e-05 [cell_reuse_recompute_pass]: 2.42004e-06 [cell_reuse_handle_not_recompute_node_pass]: 8.07601e-05 [before_grad]: 7.668e-05 [inplace_validation]: 2.98e-05 [meta_fg_expand]: 4.41102e-05 [inplace_validation_after_expand]: 3.11499e-05 [flash_sp_send_recv_attached]: 3.5502e-06 [receive_attached]: 2.53972e-06 [after_resolve]: 5.423e-05 [a_after_grad]: 7.42599e-05 [special_op_eliminate]: 3.94401e-05 [renormalize]: 0.0087209 [add_forward_monad_depend]: 5.07012e-06 [auto_monad_grad]: 2.88012e-06 [auto_monad_eliminator]: 0.00012998 [cse]: 0.00016525 [a_3]: 0.00028683 [Cycle 2]: 0.00358369, [43] [expand_dump_flag]: 1.79978e-06 [switch_simplify]: 4.16297e-05 [loop_unroll]: 3.86201e-05 [a_1]: 0.00132464 [recompute_prepare]: 3.79402e-05 [updatestate_depend_eliminate]: 3.11201e-05 [updatestate_assign_eliminate]: 2.61501e-05 [updatestate_loads_eliminate]: 2.73497e-05 [parameter_eliminate]: 2.86987e-06 [a_2]: 0.00060002 [accelerated_algorithm]: 5.07599e-05 [shard]: 1.53016e-06 [meta_shard_fg_expand]: 1.34199e-05 [shard_inline]: 3.93e-05 [auto_parallel]: 3.35802e-05 [parallel]: 7.20005e-06 [flash_sp]: 3.62983e-06 [merge_comm]: 2.82698e-05 [allreduce_fusion]: 2.601e-05 [matmul_add_comm_reduction]: 3.454e-05 [allreduce_slice_to_reducescatter]: 5.0012e-07 [virtual_shard_identity]: 4.18699e-05 [virtual_dataset]: 3.972e-05 [get_grad_eliminate_]: 3.793e-05 [virtual_output]: 3.84599e-05 [merge_forward]: 2.47797e-05 [cell_reuse_recompute_pass]: 2.27988e-06 [cell_reuse_handle_not_recompute_node_pass]: 7.40401e-05 [before_grad]: 6.998e-05 [inplace_validation]: 2.39699e-05 [meta_fg_expand]: 2.72999e-05 [inplace_validation_after_expand]: 2.86899e-05 [flash_sp_send_recv_attached]: 1.07987e-06 [receive_attached]: 1.05007e-06 [after_resolve]: 4.85699e-05 [a_after_grad]: 6.48396e-05 [special_op_eliminate]: 3.82396e-05 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 1.51992e-06 [auto_monad_grad]: 1.70991e-06 [auto_monad_eliminator]: 6.72298e-05 [cse]: 0.00011475 [a_3]: 0.00027419 [py_interpret_to_execute_after_opt_a]: 3.76799e-05 [slice_cell_reuse_recomputed_activation]: 2.6701e-06 [rewriter_after_opt_a]: 0.00050286 [convert_after_rewriter]: 3.454e-05 [order_py_execute_after_rewriter]: 2.30903e-05 [opt_b]: 0.0012531, [1] [Cycle 1]: 0.00124593, [7] [b_1]: 0.0009737 [b_2]: 4.408e-05 [updatestate_depend_eliminate]: 2.622e-05 [updatestate_assign_eliminate]: 2.308e-05 [updatestate_loads_eliminate]: 2.58596e-05 [renormalize]: 4.4005e-07 [cse]: 0.00011494 [optimize_parallel_all_gather_comm]: 3.41902e-05 [overlap_param_gather]: 1.68988e-06 [cconv]: 3.41902e-05 [loop_unroll]: 0.00063448 [opt_after_cconv]: 0.00050904, [1] [Cycle 1]: 0.0005026, [7] [c_1]: 0.00025298 [parameter_eliminate]: 2.42004e-06 [updatestate_depend_eliminate]: 3.94597e-05 [updatestate_assign_eliminate]: 3.024e-05 [updatestate_loads_eliminate]: 2.74801e-05 [cse]: 0.00011244 [renormalize]: 5.29923e-07 [remove_dup_value]: 0.00015033 [tuple_transform]: 0.00035978, [1] [Cycle 1]: 0.00035393, [2] [d_1]: 0.00034109 [renormalize]: 4.00003e-07 [partial_unused_args_eliminate]: 3.06033e-06 [add_cache_embedding]: 3.99197e-05 [add_recomputation]: 0.00025329 [cse_after_recomputation]: 9.68501e-05, [1] [Cycle 1]: 9.123e-05, [1] [cse]: 8.47303e-05 [environ_conv]: 3.15602e-05 [swap_dp_allreduce_reducescatter]: 3.24799e-05 [bias_add_comm_swap]: 2.54018e-06 [label_micro_interleaved_index]: 2.3297e-06 [label_fine_grained_interleaved_index]: 2.04984e-06 [merge_cast_opt]: 1.35973e-06 [slice_recompute_activation]: 1.79978e-06 [micro_interleaved_order_control]: 2.3297e-06 [assign_add_opt]: 9.1698e-06 [ForceFp32Comm]: 8.60076e-07 [remove_cast_before_assign_add]: 1.09011e-06 [full_micro_interleaved_order_control]: 2.40002e-06 [reorder_send_recv_between_fp_bp]: 2.17976e-06 [comm_op_add_attrs]: 1.05985e-06 [add_comm_op_reuse_tag]: 1.10036e-06 [interleave_split_concat_branches]: 7.80448e-07 [interleave_parallel_branches]: 9.29926e-07 [overlap_opt_shard_in_pipeline]: 5.49993e-06 [overlap_opt_shard_grad_in_pipeline]: 2.31015e-06 [control_data_broadcast_order]: 1.16974e-06 [grouped_pairwise_exchange_alltoall]: 1.33039e-06 [offloading_packed_experts]: 1.19023e-06 [overlap_recompute_and_grad_model_parallel]: 2.66964e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.17021e-06 [overlap_recompute_allgather_and_fa_grad]: 1.2801e-06 [overlap_grad_ring_attention]: 1.78022e-06 [overlap_grad_flash_sp]: 5.72498e-05 [begin_end_overlap_inline]: 6.99889e-07 [split_matmul_comm_elemetwise]: 2.6403e-06 [split_layernorm_comm]: 1.8999e-06 [handle_group_info]: 1.3602e-06 [symbol_engine_optimizer]: 0.00029312, [1] [Cycle 1]: 0.00028721, [6] [build]: 2.47201e-05 [elim_shapecalc]: 4.60399e-05 [elim_not_effective]: 7.88798e-05 [opt_reshape]: 3.98499e-05 [fold_const_symbol]: 6.90301e-05 [renormalize]: 3.49712e-07 [pipeline_parallel_scheduler]: 2.31015e-06 [auto_monad_reorder]: 0.00011808 [get_jit_bprop_graph]: 6.19795e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00068426 [distribtued_split]: 0.00012001 [validate]: 0.00011297 [task_emit]: 0.398628 [execute]: 1.25999e-05 Sums bootstrap : 0.000587s : 0.09% type_inference : 0.199938s : 31.60% auto_monad : 0.001252s : 0.20% graph_reusing : 0.000017s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000285s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000233s : 0.04% optimize.rewriter_before_opt_a : 0.000691s : 0.11% optimize.opt_a.expand_dump_flag : 0.000023s : 0.00% optimize.opt_a.switch_simplify : 0.000682s : 0.11% optimize.opt_a.loop_unroll : 0.000368s : 0.06% optimize.opt_a.a_1 : 0.010328s : 1.63% optimize.opt_a.recompute_prepare : 0.000128s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000354s : 0.06% optimize.opt_a.updatestate_assign_eliminate : 0.000076s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000287s : 0.05% optimize.opt_a.parameter_eliminate : 0.000007s : 0.00% optimize.opt_a.a_2 : 0.001972s : 0.31% optimize.opt_a.accelerated_algorithm : 0.000180s : 0.03% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000039s : 0.01% optimize.opt_a.shard_inline : 0.000081s : 0.01% optimize.opt_a.auto_parallel : 0.000069s : 0.01% optimize.opt_a.parallel : 0.000018s : 0.00% optimize.opt_a.flash_sp : 0.000027s : 0.00% optimize.opt_a.merge_comm : 0.000057s : 0.01% optimize.opt_a.allreduce_fusion : 0.000051s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000072s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000086s : 0.01% optimize.opt_a.virtual_dataset : 0.000081s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000080s : 0.01% optimize.opt_a.virtual_output : 0.000079s : 0.01% optimize.opt_a.merge_forward : 0.000057s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000155s : 0.02% optimize.opt_a.before_grad : 0.000147s : 0.02% optimize.opt_a.inplace_validation : 0.000054s : 0.01% optimize.opt_a.meta_fg_expand : 0.000071s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000060s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000103s : 0.02% optimize.opt_a.a_after_grad : 0.000139s : 0.02% optimize.opt_a.special_op_eliminate : 0.000078s : 0.01% optimize.opt_a.renormalize : 0.008721s : 1.38% optimize.opt_a.add_forward_monad_depend : 0.000007s : 0.00% optimize.opt_a.auto_monad_grad : 0.000005s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000197s : 0.03% optimize.opt_a.cse : 0.000280s : 0.04% optimize.opt_a.a_3 : 0.000561s : 0.09% optimize.py_interpret_to_execute_after_opt_a : 0.000038s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000503s : 0.08% optimize.convert_after_rewriter : 0.000035s : 0.01% optimize.order_py_execute_after_rewriter : 0.000023s : 0.00% optimize.opt_b.b_1 : 0.000974s : 0.15% optimize.opt_b.b_2 : 0.000044s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000026s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000023s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000026s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000115s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000034s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000034s : 0.01% optimize.loop_unroll : 0.000634s : 0.10% optimize.opt_after_cconv.c_1 : 0.000253s : 0.04% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000039s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000030s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000027s : 0.00% optimize.opt_after_cconv.cse : 0.000112s : 0.02% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000150s : 0.02% optimize.tuple_transform.d_1 : 0.000341s : 0.05% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000040s : 0.01% optimize.add_recomputation : 0.000253s : 0.04% optimize.cse_after_recomputation.cse : 0.000085s : 0.01% optimize.environ_conv : 0.000032s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000032s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000009s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000005s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000057s : 0.01% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000025s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000046s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000079s : 0.01% optimize.symbol_engine_optimizer.opt_reshape : 0.000040s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000069s : 0.01% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000118s : 0.02% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000684s : 0.11% distribtued_split : 0.000120s : 0.02% validate : 0.000113s : 0.02% task_emit : 0.398628s : 63.00% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.002552 677 0.50% : 0.000013s : 6: substitution.depend_value_elim 0.47% : 0.000012s : 32: substitution.elim_not_effective 0.43% : 0.000011s : 8: substitution.float_tuple_getitem_switch 0.40% : 0.000010s : 32: substitution.fold_const_symbol 1.06% : 0.000027s : 40: substitution.graph_param_transform 74.97% : 0.001913s : 76: substitution.inline 1.01% : 0.000026s : 64: substitution.j_node_and_user_rematch 3.19% : 0.000081s : 14: substitution.less_batch_normalization 0.88% : 0.000023s : 48: substitution.load_eliminater 0.29% : 0.000007s : 4: substitution.minmaximum_grad 1.38% : 0.000035s : 64: substitution.remove_not_recompute_node 0.41% : 0.000010s : 16: substitution.replace_old_param 1.39% : 0.000035s : 15: substitution.switch_simplify 1.17% : 0.000030s : 8: substitution.tuple_list_convert_item_index_to_positive 0.53% : 0.000014s : 8: substitution.tuple_list_get_item_const_eliminator 0.67% : 0.000017s : 8: substitution.tuple_list_get_item_depend_reorder 2.20% : 0.000056s : 15: substitution.tuple_list_get_item_eliminator 0.68% : 0.000017s : 8: substitution.tuple_list_get_set_item_eliminator 3.48% : 0.000089s : 104: substitution.updatestate_pure_node_eliminater 4.89% : 0.000125s : 107: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.199696 2 94.23% : 0.188179s : 1: type_inference.infer 5.77% : 0.011517s : 1: type_inference.specialize ------[replace.] 0.000744 95 3.03% : 0.000023s : 3: replace.depend_value_elim 74.19% : 0.000552s : 76: replace.inline 21.66% : 0.000161s : 15: replace.switch_simplify 1.12% : 0.000008s : 1: replace.tuple_list_get_item_eliminator ------[match.] 0.001908 95 0.09% : 0.000002s : 3: match.depend_value_elim 98.22% : 0.001874s : 76: match.inline 1.45% : 0.000028s : 15: match.switch_simplify 0.24% : 0.000005s : 1: match.tuple_list_get_item_eliminator ------[predicate.] 0.002527 17318 1.18% : 0.000030s : 212: predicate.accumulaten_eliminater 0.42% : 0.000011s : 40: predicate.ad_related_special_op_eliminate 0.64% : 0.000016s : 125: predicate.addn_check_dump 1.11% : 0.000028s : 212: predicate.addn_zero_filter 1.11% : 0.000028s : 212: predicate.adjust_all_reduce_mul_add 2.36% : 0.000060s : 337: predicate.arithmetic_simplify 1.14% : 0.000029s : 212: predicate.cast_eliminate 0.44% : 0.000011s : 80: predicate.check_bprop_eliminate 0.65% : 0.000016s : 125: predicate.compare_switch_simplify 0.12% : 0.000003s : 40: predicate.const_output_eliminate 0.21% : 0.000005s : 40: predicate.convert_tensor_all_eliminate 1.37% : 0.000035s : 213: predicate.convert_tensor_eliminate 0.68% : 0.000017s : 125: predicate.depend_value_elim 1.23% : 0.000031s : 212: predicate.dict_get_item_const_eliminator 1.29% : 0.000033s : 212: predicate.dict_get_item_eliminator 1.25% : 0.000032s : 212: predicate.dict_set_item_eliminator 0.12% : 0.000003s : 40: predicate.elim_not_effective 0.25% : 0.000006s : 40: predicate.elim_shapecalc_of_broadcastargs 1.34% : 0.000034s : 252: predicate.environ_add_const_eliminate 1.35% : 0.000034s : 252: predicate.environ_get_add_eliminate 1.35% : 0.000034s : 252: predicate.environ_get_depend_swap 2.03% : 0.000051s : 377: predicate.environ_get_eliminate 1.32% : 0.000033s : 252: predicate.environ_get_set_eliminate 1.63% : 0.000041s : 289: predicate.exchange_switch_depend_value 1.91% : 0.000048s : 289: predicate.float_depend_g_call 0.66% : 0.000017s : 125: predicate.float_environ_get_switch 0.89% : 0.000022s : 165: predicate.float_tuple_getitem_switch 0.12% : 0.000003s : 40: predicate.fold_const_symbol 0.44% : 0.000011s : 80: predicate.get_grad_eliminate 0.14% : 0.000004s : 40: predicate.graph_param_transform 0.67% : 0.000017s : 125: predicate.incorporate_call 0.64% : 0.000016s : 125: predicate.incorporate_call_switch 5.52% : 0.000139s : 786: predicate.inline 0.56% : 0.000014s : 80: predicate.inline_without_move 0.22% : 0.000006s : 80: predicate.j_node_and_user_rematch 0.53% : 0.000013s : 80: predicate.less_batch_normalization 1.62% : 0.000041s : 293: predicate.list_to_tuple_eliminator_ 2.79% : 0.000071s : 505: predicate.load_eliminater 0.45% : 0.000011s : 40: predicate.loop_unroll_after_grad 2.80% : 0.000071s : 424: predicate.loop_unroll_before_grad 1.64% : 0.000042s : 292: predicate.make_slice_get_slice_eliminator 0.66% : 0.000017s : 125: predicate.merge_addn 0.42% : 0.000011s : 80: predicate.micro_step_allgather_replace 0.43% : 0.000011s : 80: predicate.mini_step_allgather_replace 1.12% : 0.000028s : 212: predicate.minmaximum_grad 0.27% : 0.000007s : 40: predicate.mutable_eliminate 0.22% : 0.000005s : 40: predicate.opt_reshape 0.23% : 0.000006s : 40: predicate.parallel_virtual_node 2.51% : 0.000063s : 289: predicate.partial_defer_inline 1.49% : 0.000038s : 253: predicate.partial_eliminate 1.21% : 0.000031s : 212: predicate.print_const_string_wrapper 0.63% : 0.000016s : 120: predicate.reduce_all_const_elim 1.48% : 0.000037s : 212: predicate.reduce_eliminate 0.23% : 0.000006s : 80: predicate.remove_not_recompute_node 1.09% : 0.000027s : 293: predicate.replace_applicator 0.23% : 0.000006s : 80: predicate.replace_old_param 0.12% : 0.000003s : 40: predicate.reset_defer_inline 1.19% : 0.000030s : 212: predicate.reshape_eliminate 0.43% : 0.000011s : 80: predicate.row_tensor_add_zeros_like 0.25% : 0.000006s : 40: predicate.row_tensor_eliminate 0.50% : 0.000013s : 80: predicate.same_eliminate 0.38% : 0.000010s : 127: predicate.set_cell_output_no_recompute 0.46% : 0.000012s : 80: predicate.shard_identity_eliminate 0.67% : 0.000017s : 120: predicate.special_op_eliminate 0.76% : 0.000019s : 125: predicate.specialize_transform 0.46% : 0.000012s : 80: predicate.split_environ_get_set_with_tuple_value 0.50% : 0.000013s : 80: predicate.stack_unstack_eliminate 2.71% : 0.000069s : 505: predicate.stopgrad_eliminater 0.23% : 0.000006s : 40: predicate.switch_call_monad_eliminater 1.70% : 0.000043s : 289: predicate.switch_defer_inline 2.10% : 0.000053s : 369: predicate.switch_layer_defer_inline 5.91% : 0.000149s : 868: predicate.switch_simplify 1.18% : 0.000030s : 212: predicate.tile_eliminate 1.12% : 0.000028s : 212: predicate.transpose_eliminate 1.74% : 0.000044s : 292: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000044s : 292: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000040s : 292: predicate.tuple_list_get_item_depend_reorder 2.46% : 0.000062s : 418: predicate.tuple_list_get_item_eliminator 1.66% : 0.000042s : 292: predicate.tuple_list_get_set_item_eliminator 2.37% : 0.000060s : 417: predicate.tuple_list_set_item_eliminator 1.66% : 0.000042s : 293: predicate.tuple_to_list_eliminator_ 2.89% : 0.000073s : 505: predicate.updatestate_pure_node_eliminater 3.54% : 0.000089s : 630: predicate.updatestate_useless_node_eliminater 0.23% : 0.000006s : 40: predicate.value_based_eliminate 0.44% : 0.000011s : 80: predicate.virtual_dataset_eliminate 0.44% : 0.000011s : 80: predicate.virtual_output_eliminate 0.24% : 0.000006s : 40: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.008480 114 56.29% : 0.004773s : 36: func_graph_cloner_run.FuncGraphClonerGraph 43.71% : 0.003707s : 78: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.698770 209 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000044s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.04% : 0.000259s : 1: add_recomputation 0.00% : 0.000012s : 1: assign_add_opt 0.18% : 0.001274s : 1: auto_monad 0.02% : 0.000127s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.09% : 0.000616s : 1: bootstrap 0.01% : 0.000039s : 1: cconv 0.00% : 0.000005s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000040s : 1: convert_after_rewriter 0.01% : 0.000100s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.02% : 0.000129s : 1: distribtued_split 0.10% : 0.000699s : 1: eliminate_special_op_node 0.01% : 0.000036s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000025s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000010s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.09% : 0.000644s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.01% : 0.000054s : 1: opt.transform.loop_unroll_optimizer 2.17% : 0.015150s : 97: opt.transform.opt_a 0.04% : 0.000251s : 1: opt.transform.opt_after_cconv 0.14% : 0.000994s : 27: opt.transform.opt_b 0.05% : 0.000339s : 1: opt.transform.opt_trans_graph 0.02% : 0.000134s : 3: opt.transform.special_op_eliminate 0.03% : 0.000229s : 4: opt.transform.symbol_engine_opt 4.27% : 0.029820s : 1: opt_a 0.07% : 0.000514s : 1: opt_after_cconv 0.18% : 0.001257s : 1: opt_b 5.07% : 0.035462s : 1: optimize 0.01% : 0.000038s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000027s : 1: order_py_execute_after_rewriter 0.01% : 0.000061s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000009s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000009s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.04% : 0.000297s : 1: pre_auto_parallel 0.03% : 0.000244s : 1: py_interpret_to_execute 0.01% : 0.000043s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000156s : 1: remove_dup_value 0.57% : 0.003995s : 1: renormalize.infer 0.67% : 0.004713s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.07% : 0.000510s : 1: rewriter_after_opt_a 0.10% : 0.000701s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000036s : 1: swap_dp_allreduce_reducescatter 0.04% : 0.000297s : 1: symbol_engine_optimizer 57.05% : 0.398664s : 1: task_emit 0.05% : 0.000363s : 1: tuple_transform 28.62% : 0.199973s : 1: type_inference 0.03% : 0.000180s : 1: validate [WARNING] ME(54175:281473095994384,MainProcess):2025-02-07-15:54:44.926.385 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. TotalTime = 0.647447, [21] [bootstrap]: 0.00061848 [type_inference]: 0.202736 [auto_monad]: 0.00128503 [graph_reusing]: 1.746e-05 [inline]: 1.73971e-06 [parallel-infer-symbol]: 2.78e-06 [pre_auto_parallel]: 0.00028572 [insert-virtual-dataset]: 3.39979e-06 [parallel-infer-symbol-second]: 9.09902e-07 [dataset_repeat_opt]: 1.50967e-06 [pipeline_split]: 1.79978e-06 [optimize]: 0.0350412, [52] [py_interpret_to_execute]: 0.00023257 [rewriter_before_opt_a]: 0.0007038 [opt_a]: 0.0293832, [2] [Cycle 1]: 0.0223846, [43] [expand_dump_flag]: 2.03303e-05 [switch_simplify]: 0.00064395 [loop_unroll]: 0.00032251 [a_1]: 0.00895541 [recompute_prepare]: 9.26498e-05 [updatestate_depend_eliminate]: 0.00032347 [updatestate_assign_eliminate]: 5.03301e-05 [updatestate_loads_eliminate]: 0.00026311 [parameter_eliminate]: 3.87989e-06 [a_2]: 0.00139357 [accelerated_algorithm]: 0.00013042 [shard]: 2.46009e-06 [meta_shard_fg_expand]: 2.43401e-05 [shard_inline]: 4.41e-05 [auto_parallel]: 3.38601e-05 [parallel]: 1.14301e-05 [flash_sp]: 2.41599e-05 [merge_comm]: 3.037e-05 [allreduce_fusion]: 2.584e-05 [matmul_add_comm_reduction]: 3.81004e-05 [allreduce_slice_to_reducescatter]: 7.00355e-07 [virtual_shard_identity]: 4.67701e-05 [virtual_dataset]: 4.20404e-05 [get_grad_eliminate_]: 4.33298e-05 [virtual_output]: 4.10401e-05 [merge_forward]: 3.37702e-05 [cell_reuse_recompute_pass]: 2.53972e-06 [cell_reuse_handle_not_recompute_node_pass]: 8.127e-05 [before_grad]: 7.54204e-05 [inplace_validation]: 3.00501e-05 [meta_fg_expand]: 4.32702e-05 [inplace_validation_after_expand]: 3.15299e-05 [flash_sp_send_recv_attached]: 4.10015e-06 [receive_attached]: 2.75997e-06 [after_resolve]: 5.642e-05 [a_after_grad]: 7.55102e-05 [special_op_eliminate]: 4.191e-05 [renormalize]: 0.00817956 [add_forward_monad_depend]: 4.9402e-06 [auto_monad_grad]: 2.21981e-06 [auto_monad_eliminator]: 0.0001559 [cse]: 0.00031381 [a_3]: 0.00029433 [Cycle 2]: 0.00364306, [43] [expand_dump_flag]: 1.62004e-06 [switch_simplify]: 4.18103e-05 [loop_unroll]: 3.98797e-05 [a_1]: 0.00132294 [recompute_prepare]: 3.86001e-05 [updatestate_depend_eliminate]: 3.13101e-05 [updatestate_assign_eliminate]: 2.57902e-05 [updatestate_loads_eliminate]: 2.69897e-05 [parameter_eliminate]: 2.25985e-06 [a_2]: 0.00063027 [accelerated_algorithm]: 5.24302e-05 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 1.28304e-05 [shard_inline]: 4.29302e-05 [auto_parallel]: 3.40398e-05 [parallel]: 7.07991e-06 [flash_sp]: 3.72995e-06 [merge_comm]: 2.74498e-05 [allreduce_fusion]: 2.59397e-05 [matmul_add_comm_reduction]: 3.454e-05 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 4.23798e-05 [virtual_dataset]: 3.86504e-05 [get_grad_eliminate_]: 3.90802e-05 [virtual_output]: 3.81102e-05 [merge_forward]: 2.49301e-05 [cell_reuse_recompute_pass]: 2.35997e-06 [cell_reuse_handle_not_recompute_node_pass]: 7.49398e-05 [before_grad]: 6.83297e-05 [inplace_validation]: 2.37301e-05 [meta_fg_expand]: 2.65199e-05 [inplace_validation_after_expand]: 3.00999e-05 [flash_sp_send_recv_attached]: 1.19023e-06 [receive_attached]: 9.49949e-07 [after_resolve]: 4.852e-05 [a_after_grad]: 6.69202e-05 [special_op_eliminate]: 3.94001e-05 [renormalize]: 1.09896e-07 [add_forward_monad_depend]: 2.00979e-06 [auto_monad_grad]: 1.45985e-06 [auto_monad_eliminator]: 6.714e-05 [cse]: 0.00011337 [a_3]: 0.00028421 [py_interpret_to_execute_after_opt_a]: 3.633e-05 [slice_cell_reuse_recomputed_activation]: 2.63005e-06 [rewriter_after_opt_a]: 0.00050045 [convert_after_rewriter]: 3.59798e-05 [order_py_execute_after_rewriter]: 2.39098e-05 [opt_b]: 0.00123351, [1] [Cycle 1]: 0.00122655, [7] [b_1]: 0.00095233 [b_2]: 4.44399e-05 [updatestate_depend_eliminate]: 2.72198e-05 [updatestate_assign_eliminate]: 2.33301e-05 [updatestate_loads_eliminate]: 2.63001e-05 [renormalize]: 3.90224e-07 [cse]: 0.00011389 [optimize_parallel_all_gather_comm]: 3.54601e-05 [overlap_param_gather]: 1.43005e-06 [cconv]: 3.553e-05 [loop_unroll]: 0.00061496 [opt_after_cconv]: 0.00052399, [1] [Cycle 1]: 0.0005176, [7] [c_1]: 0.00025958 [parameter_eliminate]: 2.50014e-06 [updatestate_depend_eliminate]: 4.12399e-05 [updatestate_assign_eliminate]: 2.938e-05 [updatestate_loads_eliminate]: 2.85399e-05 [cse]: 0.00011475 [renormalize]: 4.09782e-07 [remove_dup_value]: 0.00015263 [tuple_transform]: 0.00035237, [1] [Cycle 1]: 0.00034644, [2] [d_1]: 0.00033292 [renormalize]: 4.09782e-07 [partial_unused_args_eliminate]: 3.47989e-06 [add_cache_embedding]: 4.214e-05 [add_recomputation]: 0.00025148 [cse_after_recomputation]: 0.00010245, [1] [Cycle 1]: 9.66899e-05, [1] [cse]: 8.87401e-05 [environ_conv]: 3.161e-05 [swap_dp_allreduce_reducescatter]: 3.34699e-05 [bias_add_comm_swap]: 2.56998e-06 [label_micro_interleaved_index]: 2.46009e-06 [label_fine_grained_interleaved_index]: 2.16998e-06 [merge_cast_opt]: 1.5297e-06 [slice_recompute_activation]: 1.97999e-06 [micro_interleaved_order_control]: 1.68011e-06 [assign_add_opt]: 1.00699e-05 [ForceFp32Comm]: 1.04029e-06 [remove_cast_before_assign_add]: 9.89996e-07 [full_micro_interleaved_order_control]: 2.35997e-06 [reorder_send_recv_between_fp_bp]: 2.2403e-06 [comm_op_add_attrs]: 1.02026e-06 [add_comm_op_reuse_tag]: 1.09989e-06 [interleave_split_concat_branches]: 1.13994e-06 [interleave_parallel_branches]: 1.17021e-06 [overlap_opt_shard_in_pipeline]: 5.88968e-06 [overlap_opt_shard_grad_in_pipeline]: 2.73995e-06 [control_data_broadcast_order]: 1.11992e-06 [grouped_pairwise_exchange_alltoall]: 1.72015e-06 [offloading_packed_experts]: 1.09011e-06 [overlap_recompute_and_grad_model_parallel]: 2.42004e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.00006e-07 [overlap_recompute_allgather_and_fa_grad]: 1.23028e-06 [overlap_grad_ring_attention]: 2.11038e-06 [overlap_grad_flash_sp]: 5.526e-05 [begin_end_overlap_inline]: 7.90227e-07 [split_matmul_comm_elemetwise]: 2.27988e-06 [split_layernorm_comm]: 2.00979e-06 [handle_group_info]: 9.89996e-07 [symbol_engine_optimizer]: 0.00029811, [1] [Cycle 1]: 0.00029199, [6] [build]: 2.582e-05 [elim_shapecalc]: 4.77801e-05 [elim_not_effective]: 7.63899e-05 [opt_reshape]: 4.18401e-05 [fold_const_symbol]: 6.80299e-05 [renormalize]: 3.39933e-07 [pipeline_parallel_scheduler]: 1.60001e-06 [auto_monad_reorder]: 0.00012278 [get_jit_bprop_graph]: 5.29923e-07 [rewriter_after_jit_bprop_graph]: 4.29805e-07 [eliminate_special_op_node]: 0.00066022 [distribtued_split]: 0.00012111 [validate]: 0.00011783 [task_emit]: 0.406056 [execute]: 1.53799e-05 Sums bootstrap : 0.000618s : 0.10% type_inference : 0.202736s : 31.54% auto_monad : 0.001285s : 0.20% graph_reusing : 0.000017s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000286s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000233s : 0.04% optimize.rewriter_before_opt_a : 0.000704s : 0.11% optimize.opt_a.expand_dump_flag : 0.000022s : 0.00% optimize.opt_a.switch_simplify : 0.000686s : 0.11% optimize.opt_a.loop_unroll : 0.000362s : 0.06% optimize.opt_a.a_1 : 0.010278s : 1.60% optimize.opt_a.recompute_prepare : 0.000131s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000355s : 0.06% optimize.opt_a.updatestate_assign_eliminate : 0.000076s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000290s : 0.05% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.002024s : 0.31% optimize.opt_a.accelerated_algorithm : 0.000183s : 0.03% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000037s : 0.01% optimize.opt_a.shard_inline : 0.000087s : 0.01% optimize.opt_a.auto_parallel : 0.000068s : 0.01% optimize.opt_a.parallel : 0.000019s : 0.00% optimize.opt_a.flash_sp : 0.000028s : 0.00% optimize.opt_a.merge_comm : 0.000058s : 0.01% optimize.opt_a.allreduce_fusion : 0.000052s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000073s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000089s : 0.01% optimize.opt_a.virtual_dataset : 0.000081s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000082s : 0.01% optimize.opt_a.virtual_output : 0.000079s : 0.01% optimize.opt_a.merge_forward : 0.000059s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000156s : 0.02% optimize.opt_a.before_grad : 0.000144s : 0.02% optimize.opt_a.inplace_validation : 0.000054s : 0.01% optimize.opt_a.meta_fg_expand : 0.000070s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000062s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000105s : 0.02% optimize.opt_a.a_after_grad : 0.000142s : 0.02% optimize.opt_a.special_op_eliminate : 0.000081s : 0.01% optimize.opt_a.renormalize : 0.008180s : 1.27% optimize.opt_a.add_forward_monad_depend : 0.000007s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000223s : 0.03% optimize.opt_a.cse : 0.000427s : 0.07% optimize.opt_a.a_3 : 0.000579s : 0.09% optimize.py_interpret_to_execute_after_opt_a : 0.000036s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000500s : 0.08% optimize.convert_after_rewriter : 0.000036s : 0.01% optimize.order_py_execute_after_rewriter : 0.000024s : 0.00% optimize.opt_b.b_1 : 0.000952s : 0.15% optimize.opt_b.b_2 : 0.000044s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000027s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000023s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000026s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000114s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000035s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000036s : 0.01% optimize.loop_unroll : 0.000615s : 0.10% optimize.opt_after_cconv.c_1 : 0.000260s : 0.04% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000041s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000029s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000029s : 0.00% optimize.opt_after_cconv.cse : 0.000115s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000153s : 0.02% optimize.tuple_transform.d_1 : 0.000333s : 0.05% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000042s : 0.01% optimize.add_recomputation : 0.000251s : 0.04% optimize.cse_after_recomputation.cse : 0.000089s : 0.01% optimize.environ_conv : 0.000032s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000033s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000010s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000006s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000055s : 0.01% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000026s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000048s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000076s : 0.01% optimize.symbol_engine_optimizer.opt_reshape : 0.000042s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000068s : 0.01% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000123s : 0.02% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000660s : 0.10% distribtued_split : 0.000121s : 0.02% validate : 0.000118s : 0.02% task_emit : 0.406056s : 63.18% execute : 0.000015s : 0.00% Time group info: ------[substitution.] 0.002455 677 0.45% : 0.000011s : 6: substitution.depend_value_elim 0.46% : 0.000011s : 32: substitution.elim_not_effective 0.47% : 0.000012s : 8: substitution.float_tuple_getitem_switch 0.41% : 0.000010s : 32: substitution.fold_const_symbol 1.11% : 0.000027s : 40: substitution.graph_param_transform 73.84% : 0.001813s : 76: substitution.inline 1.05% : 0.000026s : 64: substitution.j_node_and_user_rematch 3.26% : 0.000080s : 14: substitution.less_batch_normalization 0.94% : 0.000023s : 48: substitution.load_eliminater 0.30% : 0.000007s : 4: substitution.minmaximum_grad 1.45% : 0.000036s : 64: substitution.remove_not_recompute_node 0.40% : 0.000010s : 16: substitution.replace_old_param 1.43% : 0.000035s : 15: substitution.switch_simplify 1.21% : 0.000030s : 8: substitution.tuple_list_convert_item_index_to_positive 0.54% : 0.000013s : 8: substitution.tuple_list_get_item_const_eliminator 0.71% : 0.000018s : 8: substitution.tuple_list_get_item_depend_reorder 2.27% : 0.000056s : 15: substitution.tuple_list_get_item_eliminator 0.74% : 0.000018s : 8: substitution.tuple_list_get_set_item_eliminator 4.08% : 0.000100s : 104: substitution.updatestate_pure_node_eliminater 4.89% : 0.000120s : 107: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.202505 2 94.45% : 0.191262s : 1: type_inference.infer 5.55% : 0.011244s : 1: type_inference.specialize ------[replace.] 0.000725 95 2.90% : 0.000021s : 3: replace.depend_value_elim 71.93% : 0.000521s : 76: replace.inline 23.99% : 0.000174s : 15: replace.switch_simplify 1.18% : 0.000009s : 1: replace.tuple_list_get_item_eliminator ------[match.] 0.001804 95 0.07% : 0.000001s : 3: match.depend_value_elim 98.08% : 0.001769s : 76: match.inline 1.53% : 0.000028s : 15: match.switch_simplify 0.32% : 0.000006s : 1: match.tuple_list_get_item_eliminator ------[predicate.] 0.002590 17318 1.20% : 0.000031s : 212: predicate.accumulaten_eliminater 0.41% : 0.000011s : 40: predicate.ad_related_special_op_eliminate 0.63% : 0.000016s : 125: predicate.addn_check_dump 1.17% : 0.000030s : 212: predicate.addn_zero_filter 1.10% : 0.000029s : 212: predicate.adjust_all_reduce_mul_add 2.42% : 0.000063s : 337: predicate.arithmetic_simplify 1.11% : 0.000029s : 212: predicate.cast_eliminate 0.42% : 0.000011s : 80: predicate.check_bprop_eliminate 0.65% : 0.000017s : 125: predicate.compare_switch_simplify 0.12% : 0.000003s : 40: predicate.const_output_eliminate 0.22% : 0.000006s : 40: predicate.convert_tensor_all_eliminate 1.49% : 0.000039s : 213: predicate.convert_tensor_eliminate 0.67% : 0.000017s : 125: predicate.depend_value_elim 1.35% : 0.000035s : 212: predicate.dict_get_item_const_eliminator 1.30% : 0.000034s : 212: predicate.dict_get_item_eliminator 1.23% : 0.000032s : 212: predicate.dict_set_item_eliminator 0.12% : 0.000003s : 40: predicate.elim_not_effective 0.24% : 0.000006s : 40: predicate.elim_shapecalc_of_broadcastargs 1.30% : 0.000034s : 252: predicate.environ_add_const_eliminate 1.28% : 0.000033s : 252: predicate.environ_get_add_eliminate 1.31% : 0.000034s : 252: predicate.environ_get_depend_swap 2.02% : 0.000052s : 377: predicate.environ_get_eliminate 1.28% : 0.000033s : 252: predicate.environ_get_set_eliminate 1.55% : 0.000040s : 289: predicate.exchange_switch_depend_value 1.87% : 0.000048s : 289: predicate.float_depend_g_call 0.66% : 0.000017s : 125: predicate.float_environ_get_switch 0.89% : 0.000023s : 165: predicate.float_tuple_getitem_switch 0.11% : 0.000003s : 40: predicate.fold_const_symbol 0.43% : 0.000011s : 80: predicate.get_grad_eliminate 0.14% : 0.000004s : 40: predicate.graph_param_transform 0.64% : 0.000017s : 125: predicate.incorporate_call 0.63% : 0.000016s : 125: predicate.incorporate_call_switch 5.58% : 0.000145s : 786: predicate.inline 0.57% : 0.000015s : 80: predicate.inline_without_move 0.21% : 0.000006s : 80: predicate.j_node_and_user_rematch 0.56% : 0.000014s : 80: predicate.less_batch_normalization 1.60% : 0.000041s : 293: predicate.list_to_tuple_eliminator_ 2.80% : 0.000073s : 505: predicate.load_eliminater 0.47% : 0.000012s : 40: predicate.loop_unroll_after_grad 2.67% : 0.000069s : 424: predicate.loop_unroll_before_grad 1.59% : 0.000041s : 292: predicate.make_slice_get_slice_eliminator 0.64% : 0.000017s : 125: predicate.merge_addn 0.43% : 0.000011s : 80: predicate.micro_step_allgather_replace 0.43% : 0.000011s : 80: predicate.mini_step_allgather_replace 1.10% : 0.000028s : 212: predicate.minmaximum_grad 0.26% : 0.000007s : 40: predicate.mutable_eliminate 0.22% : 0.000006s : 40: predicate.opt_reshape 0.23% : 0.000006s : 40: predicate.parallel_virtual_node 2.39% : 0.000062s : 289: predicate.partial_defer_inline 1.45% : 0.000038s : 253: predicate.partial_eliminate 1.17% : 0.000030s : 212: predicate.print_const_string_wrapper 0.64% : 0.000017s : 120: predicate.reduce_all_const_elim 1.41% : 0.000037s : 212: predicate.reduce_eliminate 0.23% : 0.000006s : 80: predicate.remove_not_recompute_node 1.02% : 0.000026s : 293: predicate.replace_applicator 0.23% : 0.000006s : 80: predicate.replace_old_param 0.12% : 0.000003s : 40: predicate.reset_defer_inline 1.10% : 0.000029s : 212: predicate.reshape_eliminate 0.43% : 0.000011s : 80: predicate.row_tensor_add_zeros_like 0.24% : 0.000006s : 40: predicate.row_tensor_eliminate 0.50% : 0.000013s : 80: predicate.same_eliminate 0.37% : 0.000009s : 127: predicate.set_cell_output_no_recompute 0.48% : 0.000012s : 80: predicate.shard_identity_eliminate 0.67% : 0.000017s : 120: predicate.special_op_eliminate 0.76% : 0.000020s : 125: predicate.specialize_transform 0.45% : 0.000012s : 80: predicate.split_environ_get_set_with_tuple_value 0.50% : 0.000013s : 80: predicate.stack_unstack_eliminate 2.87% : 0.000074s : 505: predicate.stopgrad_eliminater 0.22% : 0.000006s : 40: predicate.switch_call_monad_eliminater 1.74% : 0.000045s : 289: predicate.switch_defer_inline 2.12% : 0.000055s : 369: predicate.switch_layer_defer_inline 5.64% : 0.000146s : 868: predicate.switch_simplify 1.12% : 0.000029s : 212: predicate.tile_eliminate 1.10% : 0.000029s : 212: predicate.transpose_eliminate 1.71% : 0.000044s : 292: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000045s : 292: predicate.tuple_list_get_item_const_eliminator 1.53% : 0.000040s : 292: predicate.tuple_list_get_item_depend_reorder 2.44% : 0.000063s : 418: predicate.tuple_list_get_item_eliminator 1.59% : 0.000041s : 292: predicate.tuple_list_get_set_item_eliminator 2.36% : 0.000061s : 417: predicate.tuple_list_set_item_eliminator 1.64% : 0.000042s : 293: predicate.tuple_to_list_eliminator_ 3.58% : 0.000093s : 505: predicate.updatestate_pure_node_eliminater 3.75% : 0.000097s : 630: predicate.updatestate_useless_node_eliminater 0.23% : 0.000006s : 40: predicate.value_based_eliminate 0.43% : 0.000011s : 80: predicate.virtual_dataset_eliminate 0.42% : 0.000011s : 80: predicate.virtual_output_eliminate 0.24% : 0.000006s : 40: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.008294 114 56.09% : 0.004652s : 36: func_graph_cloner_run.FuncGraphClonerGraph 43.91% : 0.003642s : 78: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.707741 209 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000047s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.04% : 0.000258s : 1: add_recomputation 0.00% : 0.000014s : 1: assign_add_opt 0.18% : 0.001309s : 1: auto_monad 0.02% : 0.000132s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.09% : 0.000651s : 1: bootstrap 0.01% : 0.000040s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000042s : 1: convert_after_rewriter 0.01% : 0.000106s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.02% : 0.000131s : 1: distribtued_split 0.10% : 0.000675s : 1: eliminate_special_op_node 0.01% : 0.000037s : 1: environ_conv 0.00% : 0.000025s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000026s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000011s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.09% : 0.000626s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.01% : 0.000058s : 1: opt.transform.loop_unroll_optimizer 2.15% : 0.015192s : 97: opt.transform.opt_a 0.04% : 0.000258s : 1: opt.transform.opt_after_cconv 0.14% : 0.000973s : 27: opt.transform.opt_b 0.05% : 0.000331s : 1: opt.transform.opt_trans_graph 0.02% : 0.000138s : 3: opt.transform.special_op_eliminate 0.03% : 0.000229s : 4: opt.transform.symbol_engine_opt 4.15% : 0.029388s : 1: opt_a 0.07% : 0.000529s : 1: opt_after_cconv 0.17% : 0.001237s : 1: opt_b 4.95% : 0.035051s : 1: optimize 0.01% : 0.000040s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000028s : 1: order_py_execute_after_rewriter 0.01% : 0.000060s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000009s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.04% : 0.000299s : 1: pre_auto_parallel 0.04% : 0.000254s : 1: py_interpret_to_execute 0.01% : 0.000041s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000159s : 1: remove_dup_value 0.53% : 0.003774s : 1: renormalize.infer 0.62% : 0.004395s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.07% : 0.000509s : 1: rewriter_after_opt_a 0.10% : 0.000718s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000006s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000038s : 1: swap_dp_allreduce_reducescatter 0.04% : 0.000302s : 1: symbol_engine_optimizer 57.38% : 0.406094s : 1: task_emit 0.05% : 0.000357s : 1: tuple_transform 28.65% : 0.202769s : 1: type_inference 0.03% : 0.000190s : 1: validate [WARNING] ME(54208:281472863673360,MainProcess):2025-02-07-15:54:44.964.803 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. TotalTime = 0.649134, [21] [bootstrap]: 0.00056352 [type_inference]: 0.205437 [auto_monad]: 0.0013345 [graph_reusing]: 1.60499e-05 [inline]: 1.60979e-06 [parallel-infer-symbol]: 2.54996e-06 [pre_auto_parallel]: 0.00028003 [insert-virtual-dataset]: 3.74997e-06 [parallel-infer-symbol-second]: 9.90462e-07 [dataset_repeat_opt]: 1.28988e-06 [pipeline_split]: 1.62004e-06 [optimize]: 0.0360447, [52] [py_interpret_to_execute]: 0.00022036 [rewriter_before_opt_a]: 0.00069044 [opt_a]: 0.0303784, [2] [Cycle 1]: 0.0232773, [43] [expand_dump_flag]: 2.05999e-05 [switch_simplify]: 0.0006423 [loop_unroll]: 0.0003223 [a_1]: 0.00897277 [recompute_prepare]: 9.22e-05 [updatestate_depend_eliminate]: 0.00032723 [updatestate_assign_eliminate]: 4.67501e-05 [updatestate_loads_eliminate]: 0.00027047 [parameter_eliminate]: 4.1998e-06 [a_2]: 0.00136754 [accelerated_algorithm]: 0.00012897 [shard]: 1.90968e-06 [meta_shard_fg_expand]: 2.24998e-05 [shard_inline]: 4.16497e-05 [auto_parallel]: 3.538e-05 [parallel]: 1.03102e-05 [flash_sp]: 2.23899e-05 [merge_comm]: 3.02098e-05 [allreduce_fusion]: 2.62903e-05 [matmul_add_comm_reduction]: 3.753e-05 [allreduce_slice_to_reducescatter]: 6.10016e-07 [virtual_shard_identity]: 4.627e-05 [virtual_dataset]: 4.195e-05 [get_grad_eliminate_]: 4.20501e-05 [virtual_output]: 4.12799e-05 [merge_forward]: 3.22796e-05 [cell_reuse_recompute_pass]: 2.50991e-06 [cell_reuse_handle_not_recompute_node_pass]: 8.16598e-05 [before_grad]: 7.52299e-05 [inplace_validation]: 3.01697e-05 [meta_fg_expand]: 4.17302e-05 [inplace_validation_after_expand]: 3.16501e-05 [flash_sp_send_recv_attached]: 3.78955e-06 [receive_attached]: 2.44984e-06 [after_resolve]: 5.55599e-05 [a_after_grad]: 7.33002e-05 [special_op_eliminate]: 4.01698e-05 [renormalize]: 0.00911812 [add_forward_monad_depend]: 5.32996e-06 [auto_monad_grad]: 2.52994e-06 [auto_monad_eliminator]: 0.00030166 [cse]: 0.00014022 [a_3]: 0.0002882 [Cycle 2]: 0.0036218, [43] [expand_dump_flag]: 1.6503e-06 [switch_simplify]: 4.16799e-05 [loop_unroll]: 3.932e-05 [a_1]: 0.00129576 [recompute_prepare]: 3.85102e-05 [updatestate_depend_eliminate]: 3.024e-05 [updatestate_assign_eliminate]: 2.57599e-05 [updatestate_loads_eliminate]: 2.67201e-05 [parameter_eliminate]: 2.40002e-06 [a_2]: 0.00060284 [accelerated_algorithm]: 5.17401e-05 [shard]: 1.4999e-06 [meta_shard_fg_expand]: 1.37202e-05 [shard_inline]: 4.00604e-05 [auto_parallel]: 3.652e-05 [parallel]: 8.55979e-06 [flash_sp]: 3.53996e-06 [merge_comm]: 2.85301e-05 [allreduce_fusion]: 2.601e-05 [matmul_add_comm_reduction]: 3.44799e-05 [allreduce_slice_to_reducescatter]: 3.49712e-07 [virtual_shard_identity]: 4.12003e-05 [virtual_dataset]: 3.915e-05 [get_grad_eliminate_]: 3.82201e-05 [virtual_output]: 3.84203e-05 [merge_forward]: 2.51299e-05 [cell_reuse_recompute_pass]: 3.05008e-06 [cell_reuse_handle_not_recompute_node_pass]: 7.44099e-05 [before_grad]: 6.99903e-05 [inplace_validation]: 2.61399e-05 [meta_fg_expand]: 2.87504e-05 [inplace_validation_after_expand]: 3.04203e-05 [flash_sp_send_recv_attached]: 1.30991e-06 [receive_attached]: 1.13994e-06 [after_resolve]: 5.01201e-05 [a_after_grad]: 6.499e-05 [special_op_eliminate]: 3.86597e-05 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.97021e-06 [auto_monad_grad]: 1.53016e-06 [auto_monad_eliminator]: 6.63898e-05 [cse]: 0.00011272 [a_3]: 0.00027613 [py_interpret_to_execute_after_opt_a]: 3.71798e-05 [slice_cell_reuse_recomputed_activation]: 2.8396e-06 [rewriter_after_opt_a]: 0.00051128 [convert_after_rewriter]: 3.56198e-05 [order_py_execute_after_rewriter]: 2.33799e-05 [opt_b]: 0.0012206, [1] [Cycle 1]: 0.00121388, [7] [b_1]: 0.00094268 [b_2]: 4.44301e-05 [updatestate_depend_eliminate]: 2.60202e-05 [updatestate_assign_eliminate]: 2.268e-05 [updatestate_loads_eliminate]: 2.51401e-05 [renormalize]: 4.00003e-07 [cse]: 0.00011371 [optimize_parallel_all_gather_comm]: 3.39397e-05 [overlap_param_gather]: 1.61026e-06 [cconv]: 3.41102e-05 [loop_unroll]: 0.00064471 [opt_after_cconv]: 0.00055579, [1] [Cycle 1]: 0.00054932, [7] [c_1]: 0.00025206 [parameter_eliminate]: 2.63983e-06 [updatestate_depend_eliminate]: 3.63998e-05 [updatestate_assign_eliminate]: 2.641e-05 [updatestate_loads_eliminate]: 2.75201e-05 [cse]: 0.00011345 [renormalize]: 2.99886e-07 [remove_dup_value]: 0.00015561 [tuple_transform]: 0.00034909, [1] [Cycle 1]: 0.00034312, [2] [d_1]: 0.00033054 [renormalize]: 2.90107e-07 [partial_unused_args_eliminate]: 3.22005e-06 [add_cache_embedding]: 4.43696e-05 [add_recomputation]: 0.00025322 [cse_after_recomputation]: 9.92101e-05, [1] [Cycle 1]: 9.32198e-05, [1] [cse]: 8.60002e-05 [environ_conv]: 3.16896e-05 [swap_dp_allreduce_reducescatter]: 3.355e-05 [bias_add_comm_swap]: 2.54996e-06 [label_micro_interleaved_index]: 2.16998e-06 [label_fine_grained_interleaved_index]: 1.87987e-06 [merge_cast_opt]: 1.29035e-06 [slice_recompute_activation]: 1.62004e-06 [micro_interleaved_order_control]: 1.78022e-06 [assign_add_opt]: 1.00601e-05 [ForceFp32Comm]: 9.4017e-07 [remove_cast_before_assign_add]: 1.07009e-06 [full_micro_interleaved_order_control]: 2.33017e-06 [reorder_send_recv_between_fp_bp]: 2.50991e-06 [comm_op_add_attrs]: 1.09011e-06 [add_comm_op_reuse_tag]: 1.11014e-06 [interleave_split_concat_branches]: 9.49949e-07 [interleave_parallel_branches]: 8.10251e-07 [overlap_opt_shard_in_pipeline]: 6.05034e-06 [overlap_opt_shard_grad_in_pipeline]: 2.19001e-06 [control_data_broadcast_order]: 1.14972e-06 [grouped_pairwise_exchange_alltoall]: 1.30991e-06 [offloading_packed_experts]: 1.04029e-06 [overlap_recompute_and_grad_model_parallel]: 2.25008e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.5018e-07 [overlap_recompute_allgather_and_fa_grad]: 1.14972e-06 [overlap_grad_ring_attention]: 1.81003e-06 [overlap_grad_flash_sp]: 6.219e-05 [begin_end_overlap_inline]: 9.09902e-07 [split_matmul_comm_elemetwise]: 1.8198e-06 [split_layernorm_comm]: 2.01957e-06 [handle_group_info]: 8.99658e-07 [symbol_engine_optimizer]: 0.00029295, [1] [Cycle 1]: 0.00028704, [6] [build]: 2.62298e-05 [elim_shapecalc]: 4.70597e-05 [elim_not_effective]: 7.58497e-05 [opt_reshape]: 4.077e-05 [fold_const_symbol]: 6.756e-05 [renormalize]: 3.69735e-07 [pipeline_parallel_scheduler]: 1.67964e-06 [auto_monad_reorder]: 0.00012075 [get_jit_bprop_graph]: 5.20144e-07 [rewriter_after_jit_bprop_graph]: 5.0012e-07 [eliminate_special_op_node]: 0.00065065 [distribtued_split]: 0.0001176 [validate]: 0.00012118 [task_emit]: 0.404077 [execute]: 1.10799e-05 Sums bootstrap : 0.000564s : 0.09% type_inference : 0.205437s : 31.89% auto_monad : 0.001335s : 0.21% graph_reusing : 0.000016s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000280s : 0.04% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000220s : 0.03% optimize.rewriter_before_opt_a : 0.000690s : 0.11% optimize.opt_a.expand_dump_flag : 0.000022s : 0.00% optimize.opt_a.switch_simplify : 0.000684s : 0.11% optimize.opt_a.loop_unroll : 0.000362s : 0.06% optimize.opt_a.a_1 : 0.010269s : 1.59% optimize.opt_a.recompute_prepare : 0.000131s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000357s : 0.06% optimize.opt_a.updatestate_assign_eliminate : 0.000073s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000297s : 0.05% optimize.opt_a.parameter_eliminate : 0.000007s : 0.00% optimize.opt_a.a_2 : 0.001970s : 0.31% optimize.opt_a.accelerated_algorithm : 0.000181s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000036s : 0.01% optimize.opt_a.shard_inline : 0.000082s : 0.01% optimize.opt_a.auto_parallel : 0.000072s : 0.01% optimize.opt_a.parallel : 0.000019s : 0.00% optimize.opt_a.flash_sp : 0.000026s : 0.00% optimize.opt_a.merge_comm : 0.000059s : 0.01% optimize.opt_a.allreduce_fusion : 0.000052s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000072s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000087s : 0.01% optimize.opt_a.virtual_dataset : 0.000081s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000080s : 0.01% optimize.opt_a.virtual_output : 0.000080s : 0.01% optimize.opt_a.merge_forward : 0.000057s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000006s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000156s : 0.02% optimize.opt_a.before_grad : 0.000145s : 0.02% optimize.opt_a.inplace_validation : 0.000056s : 0.01% optimize.opt_a.meta_fg_expand : 0.000070s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000062s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000106s : 0.02% optimize.opt_a.a_after_grad : 0.000138s : 0.02% optimize.opt_a.special_op_eliminate : 0.000079s : 0.01% optimize.opt_a.renormalize : 0.009118s : 1.42% optimize.opt_a.add_forward_monad_depend : 0.000007s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000368s : 0.06% optimize.opt_a.cse : 0.000253s : 0.04% optimize.opt_a.a_3 : 0.000564s : 0.09% optimize.py_interpret_to_execute_after_opt_a : 0.000037s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000511s : 0.08% optimize.convert_after_rewriter : 0.000036s : 0.01% optimize.order_py_execute_after_rewriter : 0.000023s : 0.00% optimize.opt_b.b_1 : 0.000943s : 0.15% optimize.opt_b.b_2 : 0.000044s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000026s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000023s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000025s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000114s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000034s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000034s : 0.01% optimize.loop_unroll : 0.000645s : 0.10% optimize.opt_after_cconv.c_1 : 0.000252s : 0.04% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000036s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000026s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000028s : 0.00% optimize.opt_after_cconv.cse : 0.000113s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000156s : 0.02% optimize.tuple_transform.d_1 : 0.000331s : 0.05% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000044s : 0.01% optimize.add_recomputation : 0.000253s : 0.04% optimize.cse_after_recomputation.cse : 0.000086s : 0.01% optimize.environ_conv : 0.000032s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000034s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000010s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000006s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000062s : 0.01% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000026s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000047s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000076s : 0.01% optimize.symbol_engine_optimizer.opt_reshape : 0.000041s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000068s : 0.01% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000121s : 0.02% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000651s : 0.10% distribtued_split : 0.000118s : 0.02% validate : 0.000121s : 0.02% task_emit : 0.404077s : 62.72% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.002532 677 0.46% : 0.000012s : 6: substitution.depend_value_elim 0.44% : 0.000011s : 32: substitution.elim_not_effective 0.46% : 0.000012s : 8: substitution.float_tuple_getitem_switch 0.39% : 0.000010s : 32: substitution.fold_const_symbol 1.07% : 0.000027s : 40: substitution.graph_param_transform 74.72% : 0.001892s : 76: substitution.inline 1.03% : 0.000026s : 64: substitution.j_node_and_user_rematch 3.18% : 0.000080s : 14: substitution.less_batch_normalization 0.90% : 0.000023s : 48: substitution.load_eliminater 0.29% : 0.000007s : 4: substitution.minmaximum_grad 1.40% : 0.000035s : 64: substitution.remove_not_recompute_node 0.41% : 0.000010s : 16: substitution.replace_old_param 1.43% : 0.000036s : 15: substitution.switch_simplify 1.16% : 0.000029s : 8: substitution.tuple_list_convert_item_index_to_positive 0.52% : 0.000013s : 8: substitution.tuple_list_get_item_const_eliminator 0.69% : 0.000018s : 8: substitution.tuple_list_get_item_depend_reorder 2.23% : 0.000056s : 15: substitution.tuple_list_get_item_eliminator 0.68% : 0.000017s : 8: substitution.tuple_list_get_set_item_eliminator 3.70% : 0.000094s : 104: substitution.updatestate_pure_node_eliminater 4.85% : 0.000123s : 107: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.205192 2 94.15% : 0.193186s : 1: type_inference.infer 5.85% : 0.012006s : 1: type_inference.specialize ------[replace.] 0.000714 95 3.03% : 0.000022s : 3: replace.depend_value_elim 73.25% : 0.000523s : 76: replace.inline 22.58% : 0.000161s : 15: replace.switch_simplify 1.14% : 0.000008s : 1: replace.tuple_list_get_item_eliminator ------[match.] 0.001885 95 0.07% : 0.000001s : 3: match.depend_value_elim 98.23% : 0.001852s : 76: match.inline 1.45% : 0.000027s : 15: match.switch_simplify 0.25% : 0.000005s : 1: match.tuple_list_get_item_eliminator ------[predicate.] 0.002556 17318 1.31% : 0.000034s : 212: predicate.accumulaten_eliminater 0.41% : 0.000011s : 40: predicate.ad_related_special_op_eliminate 0.63% : 0.000016s : 125: predicate.addn_check_dump 1.13% : 0.000029s : 212: predicate.addn_zero_filter 1.12% : 0.000029s : 212: predicate.adjust_all_reduce_mul_add 2.38% : 0.000061s : 337: predicate.arithmetic_simplify 1.18% : 0.000030s : 212: predicate.cast_eliminate 0.42% : 0.000011s : 80: predicate.check_bprop_eliminate 0.64% : 0.000016s : 125: predicate.compare_switch_simplify 0.11% : 0.000003s : 40: predicate.const_output_eliminate 0.22% : 0.000006s : 40: predicate.convert_tensor_all_eliminate 1.44% : 0.000037s : 213: predicate.convert_tensor_eliminate 0.67% : 0.000017s : 125: predicate.depend_value_elim 1.21% : 0.000031s : 212: predicate.dict_get_item_const_eliminator 1.34% : 0.000034s : 212: predicate.dict_get_item_eliminator 1.21% : 0.000031s : 212: predicate.dict_set_item_eliminator 0.12% : 0.000003s : 40: predicate.elim_not_effective 0.25% : 0.000006s : 40: predicate.elim_shapecalc_of_broadcastargs 1.37% : 0.000035s : 252: predicate.environ_add_const_eliminate 1.34% : 0.000034s : 252: predicate.environ_get_add_eliminate 1.36% : 0.000035s : 252: predicate.environ_get_depend_swap 2.00% : 0.000051s : 377: predicate.environ_get_eliminate 1.37% : 0.000035s : 252: predicate.environ_get_set_eliminate 1.55% : 0.000040s : 289: predicate.exchange_switch_depend_value 1.95% : 0.000050s : 289: predicate.float_depend_g_call 0.66% : 0.000017s : 125: predicate.float_environ_get_switch 0.90% : 0.000023s : 165: predicate.float_tuple_getitem_switch 0.12% : 0.000003s : 40: predicate.fold_const_symbol 0.44% : 0.000011s : 80: predicate.get_grad_eliminate 0.14% : 0.000004s : 40: predicate.graph_param_transform 0.65% : 0.000017s : 125: predicate.incorporate_call 0.64% : 0.000016s : 125: predicate.incorporate_call_switch 5.55% : 0.000142s : 786: predicate.inline 0.56% : 0.000014s : 80: predicate.inline_without_move 0.23% : 0.000006s : 80: predicate.j_node_and_user_rematch 0.58% : 0.000015s : 80: predicate.less_batch_normalization 1.61% : 0.000041s : 293: predicate.list_to_tuple_eliminator_ 2.86% : 0.000073s : 505: predicate.load_eliminater 0.44% : 0.000011s : 40: predicate.loop_unroll_after_grad 2.60% : 0.000066s : 424: predicate.loop_unroll_before_grad 1.62% : 0.000041s : 292: predicate.make_slice_get_slice_eliminator 0.65% : 0.000017s : 125: predicate.merge_addn 0.41% : 0.000011s : 80: predicate.micro_step_allgather_replace 0.42% : 0.000011s : 80: predicate.mini_step_allgather_replace 1.12% : 0.000029s : 212: predicate.minmaximum_grad 0.25% : 0.000006s : 40: predicate.mutable_eliminate 0.22% : 0.000006s : 40: predicate.opt_reshape 0.24% : 0.000006s : 40: predicate.parallel_virtual_node 2.49% : 0.000064s : 289: predicate.partial_defer_inline 1.48% : 0.000038s : 253: predicate.partial_eliminate 1.19% : 0.000030s : 212: predicate.print_const_string_wrapper 0.62% : 0.000016s : 120: predicate.reduce_all_const_elim 1.53% : 0.000039s : 212: predicate.reduce_eliminate 0.23% : 0.000006s : 80: predicate.remove_not_recompute_node 1.05% : 0.000027s : 293: predicate.replace_applicator 0.24% : 0.000006s : 80: predicate.replace_old_param 0.12% : 0.000003s : 40: predicate.reset_defer_inline 1.13% : 0.000029s : 212: predicate.reshape_eliminate 0.42% : 0.000011s : 80: predicate.row_tensor_add_zeros_like 0.25% : 0.000006s : 40: predicate.row_tensor_eliminate 0.49% : 0.000012s : 80: predicate.same_eliminate 0.39% : 0.000010s : 127: predicate.set_cell_output_no_recompute 0.46% : 0.000012s : 80: predicate.shard_identity_eliminate 0.68% : 0.000017s : 120: predicate.special_op_eliminate 0.74% : 0.000019s : 125: predicate.specialize_transform 0.46% : 0.000012s : 80: predicate.split_environ_get_set_with_tuple_value 0.50% : 0.000013s : 80: predicate.stack_unstack_eliminate 2.76% : 0.000071s : 505: predicate.stopgrad_eliminater 0.22% : 0.000006s : 40: predicate.switch_call_monad_eliminater 1.74% : 0.000045s : 289: predicate.switch_defer_inline 2.11% : 0.000054s : 369: predicate.switch_layer_defer_inline 5.65% : 0.000144s : 868: predicate.switch_simplify 1.14% : 0.000029s : 212: predicate.tile_eliminate 1.11% : 0.000028s : 212: predicate.transpose_eliminate 1.73% : 0.000044s : 292: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000044s : 292: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000040s : 292: predicate.tuple_list_get_item_depend_reorder 2.53% : 0.000065s : 418: predicate.tuple_list_get_item_eliminator 1.67% : 0.000043s : 292: predicate.tuple_list_get_set_item_eliminator 2.44% : 0.000062s : 417: predicate.tuple_list_set_item_eliminator 1.61% : 0.000041s : 293: predicate.tuple_to_list_eliminator_ 2.85% : 0.000073s : 505: predicate.updatestate_pure_node_eliminater 3.68% : 0.000094s : 630: predicate.updatestate_useless_node_eliminater 0.22% : 0.000006s : 40: predicate.value_based_eliminate 0.44% : 0.000011s : 80: predicate.virtual_dataset_eliminate 0.45% : 0.000011s : 80: predicate.virtual_output_eliminate 0.25% : 0.000006s : 40: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.008653 114 56.02% : 0.004848s : 36: func_graph_cloner_run.FuncGraphClonerGraph 43.98% : 0.003806s : 78: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.711247 209 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000049s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.04% : 0.000259s : 1: add_recomputation 0.00% : 0.000014s : 1: assign_add_opt 0.19% : 0.001358s : 1: auto_monad 0.02% : 0.000130s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.08% : 0.000594s : 1: bootstrap 0.01% : 0.000039s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000041s : 1: convert_after_rewriter 0.01% : 0.000103s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.02% : 0.000127s : 1: distribtued_split 0.09% : 0.000665s : 1: eliminate_special_op_node 0.01% : 0.000036s : 1: environ_conv 0.00% : 0.000020s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000024s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000011s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.09% : 0.000655s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.01% : 0.000056s : 1: opt.transform.loop_unroll_optimizer 2.12% : 0.015093s : 97: opt.transform.opt_a 0.04% : 0.000250s : 1: opt.transform.opt_after_cconv 0.14% : 0.000962s : 27: opt.transform.opt_b 0.05% : 0.000328s : 1: opt.transform.opt_trans_graph 0.02% : 0.000137s : 3: opt.transform.special_op_eliminate 0.03% : 0.000226s : 4: opt.transform.symbol_engine_opt 4.27% : 0.030383s : 1: opt_a 0.08% : 0.000560s : 1: opt_after_cconv 0.17% : 0.001224s : 1: opt_b 5.07% : 0.036054s : 1: optimize 0.01% : 0.000039s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000027s : 1: order_py_execute_after_rewriter 0.01% : 0.000066s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000009s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.04% : 0.000293s : 1: pre_auto_parallel 0.03% : 0.000231s : 1: py_interpret_to_execute 0.01% : 0.000042s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000163s : 1: remove_dup_value 0.60% : 0.004276s : 1: renormalize.infer 0.68% : 0.004829s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.07% : 0.000520s : 1: rewriter_after_opt_a 0.10% : 0.000702s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000037s : 1: swap_dp_allreduce_reducescatter 0.04% : 0.000297s : 1: symbol_engine_optimizer 56.82% : 0.404109s : 1: task_emit 0.05% : 0.000353s : 1: tuple_transform 28.89% : 0.205474s : 1: type_inference 0.03% : 0.000192s : 1: validate TotalTime = 0.648474, [21] [bootstrap]: 0.00069485 [type_inference]: 0.206612 [auto_monad]: 0.00133522 [graph_reusing]: 1.67e-05 [inline]: 1.53016e-06 [parallel-infer-symbol]: 2.65008e-06 [pre_auto_parallel]: 0.00029418 [insert-virtual-dataset]: 3.47011e-06 [parallel-infer-symbol-second]: 8.50298e-07 [dataset_repeat_opt]: 1.43005e-06 [pipeline_split]: 1.83005e-06 [optimize]: 0.0355181, [52] [py_interpret_to_execute]: 0.00022846 [rewriter_before_opt_a]: 0.00068547 [opt_a]: 0.0298322, [2] [Cycle 1]: 0.0227863, [43] [expand_dump_flag]: 2.07899e-05 [switch_simplify]: 0.0006179 [loop_unroll]: 0.00032226 [a_1]: 0.00891895 [recompute_prepare]: 8.97301e-05 [updatestate_depend_eliminate]: 0.00033314 [updatestate_assign_eliminate]: 4.99599e-05 [updatestate_loads_eliminate]: 0.0002647 [parameter_eliminate]: 3.9502e-06 [a_2]: 0.00134054 [accelerated_algorithm]: 0.00012366 [shard]: 1.79e-06 [meta_shard_fg_expand]: 2.33701e-05 [shard_inline]: 4.23198e-05 [auto_parallel]: 3.51998e-05 [parallel]: 7.90972e-06 [flash_sp]: 1.90502e-05 [merge_comm]: 2.997e-05 [allreduce_fusion]: 2.7e-05 [matmul_add_comm_reduction]: 3.57199e-05 [allreduce_slice_to_reducescatter]: 5.39701e-07 [virtual_shard_identity]: 4.233e-05 [virtual_dataset]: 4.13503e-05 [get_grad_eliminate_]: 4.10303e-05 [virtual_output]: 4.10802e-05 [merge_forward]: 3.001e-05 [cell_reuse_recompute_pass]: 2.21981e-06 [cell_reuse_handle_not_recompute_node_pass]: 8.31196e-05 [before_grad]: 7.42101e-05 [inplace_validation]: 2.90703e-05 [meta_fg_expand]: 4.33503e-05 [inplace_validation_after_expand]: 3.10801e-05 [flash_sp_send_recv_attached]: 2.79024e-06 [receive_attached]: 2.01026e-06 [after_resolve]: 5.45001e-05 [a_after_grad]: 7.32504e-05 [special_op_eliminate]: 3.976e-05 [renormalize]: 0.00890836 [add_forward_monad_depend]: 5.32018e-06 [auto_monad_grad]: 2.54018e-06 [auto_monad_eliminator]: 0.00019273 [cse]: 0.000139 [a_3]: 0.00029082 [Cycle 2]: 0.00360809, [43] [expand_dump_flag]: 1.87987e-06 [switch_simplify]: 4.19198e-05 [loop_unroll]: 3.831e-05 [a_1]: 0.00129353 [recompute_prepare]: 3.82699e-05 [updatestate_depend_eliminate]: 3.062e-05 [updatestate_assign_eliminate]: 2.55797e-05 [updatestate_loads_eliminate]: 2.70898e-05 [parameter_eliminate]: 2.46987e-06 [a_2]: 0.00059785 [accelerated_algorithm]: 5.13201e-05 [shard]: 1.53016e-06 [meta_shard_fg_expand]: 1.34101e-05 [shard_inline]: 3.97102e-05 [auto_parallel]: 3.673e-05 [parallel]: 8.04989e-06 [flash_sp]: 4.14997e-06 [merge_comm]: 2.91201e-05 [allreduce_fusion]: 2.67103e-05 [matmul_add_comm_reduction]: 3.637e-05 [allreduce_slice_to_reducescatter]: 5.0012e-07 [virtual_shard_identity]: 4.13801e-05 [virtual_dataset]: 3.854e-05 [get_grad_eliminate_]: 3.78201e-05 [virtual_output]: 3.80501e-05 [merge_forward]: 2.546e-05 [cell_reuse_recompute_pass]: 2.70968e-06 [cell_reuse_handle_not_recompute_node_pass]: 7.41999e-05 [before_grad]: 6.75702e-05 [inplace_validation]: 2.46898e-05 [meta_fg_expand]: 2.71797e-05 [inplace_validation_after_expand]: 3.00198e-05 [flash_sp_send_recv_attached]: 1.17999e-06 [receive_attached]: 1.49012e-06 [after_resolve]: 4.888e-05 [a_after_grad]: 6.48899e-05 [special_op_eliminate]: 3.83998e-05 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.56974e-06 [auto_monad_grad]: 1.72993e-06 [auto_monad_eliminator]: 6.79204e-05 [cse]: 0.0001138 [a_3]: 0.00032218 [py_interpret_to_execute_after_opt_a]: 4.14997e-05 [slice_cell_reuse_recomputed_activation]: 2.45962e-06 [rewriter_after_opt_a]: 0.00049483 [convert_after_rewriter]: 3.517e-05 [order_py_execute_after_rewriter]: 2.367e-05 [opt_b]: 0.0012254, [1] [Cycle 1]: 0.00121841, [7] [b_1]: 0.00094719 [b_2]: 4.30299e-05 [updatestate_depend_eliminate]: 2.645e-05 [updatestate_assign_eliminate]: 2.325e-05 [updatestate_loads_eliminate]: 2.63099e-05 [renormalize]: 4.90341e-07 [cse]: 0.00011327 [optimize_parallel_all_gather_comm]: 3.43798e-05 [overlap_param_gather]: 1.34017e-06 [cconv]: 3.51602e-05 [loop_unroll]: 0.00068003 [opt_after_cconv]: 0.00050827, [1] [Cycle 1]: 0.00050174, [7] [c_1]: 0.00025227 [parameter_eliminate]: 2.69013e-06 [updatestate_depend_eliminate]: 3.94797e-05 [updatestate_assign_eliminate]: 2.879e-05 [updatestate_loads_eliminate]: 2.83304e-05 [cse]: 0.00011203 [renormalize]: 4.10248e-07 [remove_dup_value]: 0.00015565 [tuple_transform]: 0.00034791, [1] [Cycle 1]: 0.00034236, [2] [d_1]: 0.00032993 [renormalize]: 4.00003e-07 [partial_unused_args_eliminate]: 3.18e-06 [add_cache_embedding]: 4.41698e-05 [add_recomputation]: 0.00029361 [cse_after_recomputation]: 0.00010089, [1] [Cycle 1]: 9.498e-05, [1] [cse]: 8.76398e-05 [environ_conv]: 3.26801e-05 [swap_dp_allreduce_reducescatter]: 3.28501e-05 [bias_add_comm_swap]: 2.70037e-06 [label_micro_interleaved_index]: 2.16998e-06 [label_fine_grained_interleaved_index]: 2.37999e-06 [merge_cast_opt]: 1.33971e-06 [slice_recompute_activation]: 1.4496e-06 [micro_interleaved_order_control]: 1.62004e-06 [assign_add_opt]: 9.46969e-06 [ForceFp32Comm]: 8.29808e-07 [remove_cast_before_assign_add]: 1.01002e-06 [full_micro_interleaved_order_control]: 2.37022e-06 [reorder_send_recv_between_fp_bp]: 1.85007e-06 [comm_op_add_attrs]: 8.69855e-07 [add_comm_op_reuse_tag]: 8.801e-07 [interleave_split_concat_branches]: 8.69855e-07 [interleave_parallel_branches]: 8.60076e-07 [overlap_opt_shard_in_pipeline]: 5.5097e-06 [overlap_opt_shard_grad_in_pipeline]: 2.37999e-06 [control_data_broadcast_order]: 1.15996e-06 [grouped_pairwise_exchange_alltoall]: 1.15018e-06 [offloading_packed_experts]: 1.29966e-06 [overlap_recompute_and_grad_model_parallel]: 1.95997e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.29808e-07 [overlap_recompute_allgather_and_fa_grad]: 1.24983e-06 [overlap_grad_ring_attention]: 1.70991e-06 [overlap_grad_flash_sp]: 6.03399e-05 [begin_end_overlap_inline]: 8.2003e-07 [split_matmul_comm_elemetwise]: 2.2701e-06 [split_layernorm_comm]: 1.93994e-06 [handle_group_info]: 1.13016e-06 [symbol_engine_optimizer]: 0.00028785, [1] [Cycle 1]: 0.00028208, [6] [build]: 2.27704e-05 [elim_shapecalc]: 4.68199e-05 [elim_not_effective]: 7.57496e-05 [opt_reshape]: 3.94597e-05 [fold_const_symbol]: 6.737e-05 [renormalize]: 3.70201e-07 [pipeline_parallel_scheduler]: 2.44984e-06 [auto_monad_reorder]: 0.00012072 [get_jit_bprop_graph]: 5.40167e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00064618 [distribtued_split]: 0.00011849 [validate]: 0.00011994 [task_emit]: 0.402654 [execute]: 9.14e-06 Sums bootstrap : 0.000695s : 0.11% type_inference : 0.206612s : 32.09% auto_monad : 0.001335s : 0.21% graph_reusing : 0.000017s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000294s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000228s : 0.04% optimize.rewriter_before_opt_a : 0.000685s : 0.11% optimize.opt_a.expand_dump_flag : 0.000023s : 0.00% optimize.opt_a.switch_simplify : 0.000660s : 0.10% optimize.opt_a.loop_unroll : 0.000361s : 0.06% optimize.opt_a.a_1 : 0.010212s : 1.59% optimize.opt_a.recompute_prepare : 0.000128s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000364s : 0.06% optimize.opt_a.updatestate_assign_eliminate : 0.000076s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000292s : 0.05% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.001938s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000175s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000037s : 0.01% optimize.opt_a.shard_inline : 0.000082s : 0.01% optimize.opt_a.auto_parallel : 0.000072s : 0.01% optimize.opt_a.parallel : 0.000016s : 0.00% optimize.opt_a.flash_sp : 0.000023s : 0.00% optimize.opt_a.merge_comm : 0.000059s : 0.01% optimize.opt_a.allreduce_fusion : 0.000054s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000072s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000084s : 0.01% optimize.opt_a.virtual_dataset : 0.000080s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000079s : 0.01% optimize.opt_a.virtual_output : 0.000079s : 0.01% optimize.opt_a.merge_forward : 0.000055s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000157s : 0.02% optimize.opt_a.before_grad : 0.000142s : 0.02% optimize.opt_a.inplace_validation : 0.000054s : 0.01% optimize.opt_a.meta_fg_expand : 0.000071s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000061s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000103s : 0.02% optimize.opt_a.a_after_grad : 0.000138s : 0.02% optimize.opt_a.special_op_eliminate : 0.000078s : 0.01% optimize.opt_a.renormalize : 0.008908s : 1.38% optimize.opt_a.add_forward_monad_depend : 0.000007s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000261s : 0.04% optimize.opt_a.cse : 0.000253s : 0.04% optimize.opt_a.a_3 : 0.000613s : 0.10% optimize.py_interpret_to_execute_after_opt_a : 0.000041s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000495s : 0.08% optimize.convert_after_rewriter : 0.000035s : 0.01% optimize.order_py_execute_after_rewriter : 0.000024s : 0.00% optimize.opt_b.b_1 : 0.000947s : 0.15% optimize.opt_b.b_2 : 0.000043s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000026s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000023s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000026s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000113s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000034s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000035s : 0.01% optimize.loop_unroll : 0.000680s : 0.11% optimize.opt_after_cconv.c_1 : 0.000252s : 0.04% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000039s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000029s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000028s : 0.00% optimize.opt_after_cconv.cse : 0.000112s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000156s : 0.02% optimize.tuple_transform.d_1 : 0.000330s : 0.05% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000044s : 0.01% optimize.add_recomputation : 0.000294s : 0.05% optimize.cse_after_recomputation.cse : 0.000088s : 0.01% optimize.environ_conv : 0.000033s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000033s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000009s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000006s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000060s : 0.01% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000023s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000047s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000076s : 0.01% optimize.symbol_engine_optimizer.opt_reshape : 0.000039s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000067s : 0.01% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000121s : 0.02% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000646s : 0.10% distribtued_split : 0.000118s : 0.02% validate : 0.000120s : 0.02% task_emit : 0.402654s : 62.54% execute : 0.000009s : 0.00% Time group info: ------[substitution.] 0.002477 677 0.41% : 0.000010s : 6: substitution.depend_value_elim 0.47% : 0.000012s : 32: substitution.elim_not_effective 0.42% : 0.000010s : 8: substitution.float_tuple_getitem_switch 0.40% : 0.000010s : 32: substitution.fold_const_symbol 1.07% : 0.000026s : 40: substitution.graph_param_transform 74.89% : 0.001855s : 76: substitution.inline 1.03% : 0.000025s : 64: substitution.j_node_and_user_rematch 3.11% : 0.000077s : 14: substitution.less_batch_normalization 0.91% : 0.000023s : 48: substitution.load_eliminater 0.30% : 0.000008s : 4: substitution.minmaximum_grad 1.42% : 0.000035s : 64: substitution.remove_not_recompute_node 0.40% : 0.000010s : 16: substitution.replace_old_param 1.33% : 0.000033s : 15: substitution.switch_simplify 1.14% : 0.000028s : 8: substitution.tuple_list_convert_item_index_to_positive 0.54% : 0.000013s : 8: substitution.tuple_list_get_item_const_eliminator 0.67% : 0.000017s : 8: substitution.tuple_list_get_item_depend_reorder 2.17% : 0.000054s : 15: substitution.tuple_list_get_item_eliminator 0.71% : 0.000018s : 8: substitution.tuple_list_get_set_item_eliminator 3.74% : 0.000093s : 104: substitution.updatestate_pure_node_eliminater 4.88% : 0.000121s : 107: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.206377 2 94.35% : 0.194710s : 1: type_inference.infer 5.65% : 0.011666s : 1: type_inference.specialize ------[replace.] 0.000702 95 3.14% : 0.000022s : 3: replace.depend_value_elim 73.77% : 0.000518s : 76: replace.inline 21.95% : 0.000154s : 15: replace.switch_simplify 1.14% : 0.000008s : 1: replace.tuple_list_get_item_eliminator ------[match.] 0.001848 95 0.07% : 0.000001s : 3: match.depend_value_elim 98.30% : 0.001816s : 76: match.inline 1.39% : 0.000026s : 15: match.switch_simplify 0.24% : 0.000004s : 1: match.tuple_list_get_item_eliminator ------[predicate.] 0.002542 17318 1.14% : 0.000029s : 212: predicate.accumulaten_eliminater 0.41% : 0.000010s : 40: predicate.ad_related_special_op_eliminate 0.65% : 0.000016s : 125: predicate.addn_check_dump 1.16% : 0.000030s : 212: predicate.addn_zero_filter 1.11% : 0.000028s : 212: predicate.adjust_all_reduce_mul_add 2.38% : 0.000061s : 337: predicate.arithmetic_simplify 1.20% : 0.000031s : 212: predicate.cast_eliminate 0.43% : 0.000011s : 80: predicate.check_bprop_eliminate 0.65% : 0.000017s : 125: predicate.compare_switch_simplify 0.12% : 0.000003s : 40: predicate.const_output_eliminate 0.22% : 0.000006s : 40: predicate.convert_tensor_all_eliminate 1.44% : 0.000037s : 213: predicate.convert_tensor_eliminate 0.68% : 0.000017s : 125: predicate.depend_value_elim 1.27% : 0.000032s : 212: predicate.dict_get_item_const_eliminator 1.25% : 0.000032s : 212: predicate.dict_get_item_eliminator 1.23% : 0.000031s : 212: predicate.dict_set_item_eliminator 0.12% : 0.000003s : 40: predicate.elim_not_effective 0.25% : 0.000006s : 40: predicate.elim_shapecalc_of_broadcastargs 1.37% : 0.000035s : 252: predicate.environ_add_const_eliminate 1.34% : 0.000034s : 252: predicate.environ_get_add_eliminate 1.32% : 0.000033s : 252: predicate.environ_get_depend_swap 2.10% : 0.000053s : 377: predicate.environ_get_eliminate 1.39% : 0.000035s : 252: predicate.environ_get_set_eliminate 1.56% : 0.000040s : 289: predicate.exchange_switch_depend_value 1.88% : 0.000048s : 289: predicate.float_depend_g_call 0.66% : 0.000017s : 125: predicate.float_environ_get_switch 0.88% : 0.000022s : 165: predicate.float_tuple_getitem_switch 0.12% : 0.000003s : 40: predicate.fold_const_symbol 0.43% : 0.000011s : 80: predicate.get_grad_eliminate 0.14% : 0.000004s : 40: predicate.graph_param_transform 0.65% : 0.000017s : 125: predicate.incorporate_call 0.63% : 0.000016s : 125: predicate.incorporate_call_switch 5.47% : 0.000139s : 786: predicate.inline 0.57% : 0.000014s : 80: predicate.inline_without_move 0.23% : 0.000006s : 80: predicate.j_node_and_user_rematch 0.56% : 0.000014s : 80: predicate.less_batch_normalization 1.65% : 0.000042s : 293: predicate.list_to_tuple_eliminator_ 2.77% : 0.000071s : 505: predicate.load_eliminater 0.46% : 0.000012s : 40: predicate.loop_unroll_after_grad 2.71% : 0.000069s : 424: predicate.loop_unroll_before_grad 1.60% : 0.000041s : 292: predicate.make_slice_get_slice_eliminator 0.65% : 0.000017s : 125: predicate.merge_addn 0.43% : 0.000011s : 80: predicate.micro_step_allgather_replace 0.43% : 0.000011s : 80: predicate.mini_step_allgather_replace 1.10% : 0.000028s : 212: predicate.minmaximum_grad 0.26% : 0.000007s : 40: predicate.mutable_eliminate 0.23% : 0.000006s : 40: predicate.opt_reshape 0.24% : 0.000006s : 40: predicate.parallel_virtual_node 2.61% : 0.000066s : 289: predicate.partial_defer_inline 1.49% : 0.000038s : 253: predicate.partial_eliminate 1.14% : 0.000029s : 212: predicate.print_const_string_wrapper 0.63% : 0.000016s : 120: predicate.reduce_all_const_elim 1.36% : 0.000035s : 212: predicate.reduce_eliminate 0.23% : 0.000006s : 80: predicate.remove_not_recompute_node 1.06% : 0.000027s : 293: predicate.replace_applicator 0.23% : 0.000006s : 80: predicate.replace_old_param 0.12% : 0.000003s : 40: predicate.reset_defer_inline 1.21% : 0.000031s : 212: predicate.reshape_eliminate 0.44% : 0.000011s : 80: predicate.row_tensor_add_zeros_like 0.24% : 0.000006s : 40: predicate.row_tensor_eliminate 0.52% : 0.000013s : 80: predicate.same_eliminate 0.38% : 0.000010s : 127: predicate.set_cell_output_no_recompute 0.46% : 0.000012s : 80: predicate.shard_identity_eliminate 0.69% : 0.000017s : 120: predicate.special_op_eliminate 0.76% : 0.000019s : 125: predicate.specialize_transform 0.47% : 0.000012s : 80: predicate.split_environ_get_set_with_tuple_value 0.50% : 0.000013s : 80: predicate.stack_unstack_eliminate 2.69% : 0.000068s : 505: predicate.stopgrad_eliminater 0.22% : 0.000006s : 40: predicate.switch_call_monad_eliminater 1.78% : 0.000045s : 289: predicate.switch_defer_inline 2.13% : 0.000054s : 369: predicate.switch_layer_defer_inline 5.80% : 0.000148s : 868: predicate.switch_simplify 1.14% : 0.000029s : 212: predicate.tile_eliminate 1.12% : 0.000029s : 212: predicate.transpose_eliminate 1.75% : 0.000044s : 292: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000043s : 292: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000039s : 292: predicate.tuple_list_get_item_depend_reorder 2.54% : 0.000065s : 418: predicate.tuple_list_get_item_eliminator 1.60% : 0.000041s : 292: predicate.tuple_list_get_set_item_eliminator 2.40% : 0.000061s : 417: predicate.tuple_list_set_item_eliminator 1.59% : 0.000040s : 293: predicate.tuple_to_list_eliminator_ 2.88% : 0.000073s : 505: predicate.updatestate_pure_node_eliminater 3.62% : 0.000092s : 630: predicate.updatestate_useless_node_eliminater 0.24% : 0.000006s : 40: predicate.value_based_eliminate 0.44% : 0.000011s : 80: predicate.virtual_dataset_eliminate 0.45% : 0.000011s : 80: predicate.virtual_output_eliminate 0.24% : 0.000006s : 40: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.008550 114 55.30% : 0.004728s : 36: func_graph_cloner_run.FuncGraphClonerGraph 44.70% : 0.003821s : 78: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.709775 209 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000049s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.04% : 0.000300s : 1: add_recomputation 0.00% : 0.000013s : 1: assign_add_opt 0.19% : 0.001356s : 1: auto_monad 0.02% : 0.000130s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.10% : 0.000726s : 1: bootstrap 0.01% : 0.000040s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000041s : 1: convert_after_rewriter 0.01% : 0.000105s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.02% : 0.000129s : 1: distribtued_split 0.09% : 0.000659s : 1: eliminate_special_op_node 0.01% : 0.000038s : 1: environ_conv 0.00% : 0.000017s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000024s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000010s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.10% : 0.000689s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.01% : 0.000056s : 1: opt.transform.loop_unroll_optimizer 2.12% : 0.015012s : 97: opt.transform.opt_a 0.04% : 0.000251s : 1: opt.transform.opt_after_cconv 0.14% : 0.000965s : 27: opt.transform.opt_b 0.05% : 0.000328s : 1: opt.transform.opt_trans_graph 0.02% : 0.000135s : 3: opt.transform.special_op_eliminate 0.03% : 0.000224s : 4: opt.transform.symbol_engine_opt 4.20% : 0.029837s : 1: opt_a 0.07% : 0.000513s : 1: opt_after_cconv 0.17% : 0.001229s : 1: opt_b 5.01% : 0.035529s : 1: optimize 0.01% : 0.000039s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000028s : 1: order_py_execute_after_rewriter 0.01% : 0.000065s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000009s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.04% : 0.000307s : 1: pre_auto_parallel 0.04% : 0.000249s : 1: py_interpret_to_execute 0.01% : 0.000046s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000162s : 1: remove_dup_value 0.57% : 0.004051s : 1: renormalize.infer 0.68% : 0.004845s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.07% : 0.000503s : 1: rewriter_after_opt_a 0.10% : 0.000697s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000037s : 1: swap_dp_allreduce_reducescatter 0.04% : 0.000292s : 1: symbol_engine_optimizer 56.73% : 0.402683s : 1: task_emit 0.05% : 0.000352s : 1: tuple_transform 29.11% : 0.206639s : 1: type_inference 0.03% : 0.000185s : 1: validate [WARNING] ME(54186:281473137400848,MainProcess):2025-02-07-15:54:44.977.935 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. [WARNING] ME(54221:281472867404816,MainProcess):2025-02-07-15:54:44.980.362 [mindspore/parallel/_utils.py:359] You are suggested to use mindspore.context.set_auto_parallel_context(parameter_broadcast=True) or mindspore.common.set_seed() to share parameters among multi-devices. =====Accuracy===== 0.9751602564102564 =====Accuracy===== 0.9667467948717948 =====Accuracy===== 0.9639423076923077 =====Accuracy===== 0.9703525641025641 [WARNING] DEVICE(54186,ffff925e5c10,python):2025-02-07-15:54:47.286.975 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x1d92a320 is not exist. [WARNING] DEVICE(54186,ffff925e5c10,python):2025-02-07-15:54:47.288.506 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2ebdb4a0 is not exist. [WARNING] DEVICE(54175,ffff8fe68c10,python):2025-02-07-15:54:51.522.923 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3cd55910 is not exist. [WARNING] DEVICE(54175,ffff8fe68c10,python):2025-02-07-15:54:51.525.710 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4e0071e0 is not exist. [WARNING] DEVICE(54221,ffff82468c10,python):2025-02-07-15:54:51.622.416 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3795e910 is not exist. [WARNING] DEVICE(54221,ffff82468c10,python):2025-02-07-15:54:51.624.143 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x48c0e2f0 is not exist. [WARNING] DEVICE(54208,ffff820d9c10,python):2025-02-07-15:54:51.669.359 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x1c605fe0 is not exist. [WARNING] DEVICE(54208,ffff820d9c10,python):2025-02-07-15:54:51.672.387 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2d8b4d20 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 60.21s (0:01:00) =================== ff8c39f2e51611efac92c4447d93fe45/pass/test_parameter_broadcast_test_parameter_broadcast.log0000644000175400017540000351003514751343157031307 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collected 1 item test_parameter_broadcast.py ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 -- /usr/local/python/python375/bin/python3.7 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 -- /usr/local/python/python375/bin/python3.7 cachedir: .pytest_cache rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... cachedir: .pytest_cache rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 -- /usr/local/python/python375/bin/python3.7 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 -- /usr/local/python/python375/bin/python3.7 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 -- /usr/local/python/python375/bin/python3.7 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 -- /usr/local/python/python375/bin/python3.7 cachedir: .pytest_cache rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 -- /usr/local/python/python375/bin/python3.7 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 -- /usr/local/python/python375/bin/python3.7 cachedir: .pytest_cache rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... cachedir: .pytest_cache rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... cachedir: .pytest_cache rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... cachedir: .pytest_cache rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... cachedir: .pytest_cache rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... [WARNING] DISTRIBUTED(163839,ffff88d43c10,python3.7):2025-02-07-13:53:55.491.574 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163840,ffff92fd9c10,python3.7):2025-02-07-13:53:55.663.689 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163843,ffffaa156c10,python3.7):2025-02-07-13:53:55.779.159 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163841,ffff99d3ac10,python3.7):2025-02-07-13:53:55.852.443 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163846,ffff97644c10,python3.7):2025-02-07-13:53:55.871.466 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163842,ffff8ac54c10,python3.7):2025-02-07-13:53:55.932.381 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163844,ffffae145c10,python3.7):2025-02-07-13:53:55.985.324 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163845,ffff83f78c10,python3.7):2025-02-07-13:53:55.988.647 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:55.994.293 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:55.994.300 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:55.994.309 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:55.994.307 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:55.994.366 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:55.994.350 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:55.994.316 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:55.994.342 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(163844,fffeaaffd0f0,python3.7):2025-02-07-13:53:55.994.513 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163840,fffe7ffff0f0,python3.7):2025-02-07-13:53:55.994.542 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163842,fffe7f7fe0f0,python3.7):2025-02-07-13:53:55.994.588 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163841,fffe967fc0f0,python3.7):2025-02-07-13:53:55.994.560 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163839,fffe73fff0f0,python3.7):2025-02-07-13:53:55.994.560 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffe897fa0f0,python3.7):2025-02-07-13:53:55.994.641 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163843,fffea6ffd0f0,python3.7):2025-02-07-13:53:55.994.647 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163846,fffea4ff90f0,python3.7):2025-02-07-13:53:55.994.612 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163839,fffe73fff0f0,python3.7):2025-02-07-13:53:56.288.546 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:56.289.151 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group distribute network.  collecting 1 item  collected 1 item  op_parallel.py::test_param_broadcast distribute network shard. distribute network create dataset. distribute network train. [WARNING] DEVICE(163842,fffe7f7fe0f0,python3.7):2025-02-07-13:53:56.396.572 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(163843,fffea6ffd0f0,python3.7):2025-02-07-13:53:56.396.951 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:56.397.164 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:56.398.844 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group distribute network. distribute network.  collecting 1 item  collected 1 item   collecting 1 item  collected 1 item  op_parallel.py::test_param_broadcast op_parallel.py::test_param_broadcast distribute network shard. distribute network shard. distribute network create dataset. [WARNING] DEVICE(163840,fffe7ffff0f0,python3.7):2025-02-07-13:53:56.416.240 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:56.416.857 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group distribute network create dataset. distribute network.  collecting 1 item  collected 1 item  distribute network train. op_parallel.py::test_param_broadcast distribute network train. distribute network shard. distribute network create dataset. distribute network train. [WARNING] DEVICE(163841,fffe967fc0f0,python3.7):2025-02-07-13:53:56.447.204 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(163845,fffe897fa0f0,python3.7):2025-02-07-13:53:56.447.320 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(163844,fffeaaffd0f0,python3.7):2025-02-07-13:53:56.447.317 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:56.447.422 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:56.447.550 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:56.447.612 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group distribute network. distribute network. distribute network.  collecting 1 item  collecting 1 item  collected 1 item  collecting 1 item   collected 1 item   collected 1 item  op_parallel.py::test_param_broadcast op_parallel.py::test_param_broadcast op_parallel.py::test_param_broadcast distribute network shard. distribute network shard. distribute network shard. [WARNING] DEVICE(163846,fffea4ff90f0,python3.7):2025-02-07-13:53:56.463.748 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:56.463.970 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group distribute network create dataset. distribute network create dataset. distribute network. distribute network create dataset.  collecting 1 item  collected 1 item  distribute network train. op_parallel.py::test_param_broadcast distribute network train. distribute network shard. distribute network train. distribute network create dataset. distribute network train. [WARNING] PARALLEL(163839,ffff88d43c10,python3.7):2025-02-07-13:53:57.533.560 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DISTRIBUTED(163839,ffff88d43c10,python3.7):2025-02-07-13:53:57.542.323 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-6301172352641561019 [const vector]{0, 1, 2, 3}, async: 0, submit_now: 0 [WARNING] PARALLEL(163840,ffff92fd9c10,python3.7):2025-02-07-13:53:57.612.377 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DISTRIBUTED(163840,ffff92fd9c10,python3.7):2025-02-07-13:53:57.621.019 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-6301172352641561019 [const vector]{0, 1, 2, 3}, async: 0, submit_now: 0 [WARNING] PARALLEL(163843,ffffaa156c10,python3.7):2025-02-07-13:53:57.623.247 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DISTRIBUTED(163843,ffffaa156c10,python3.7):2025-02-07-13:53:57.631.903 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-15700679239691767905 [const vector]{4, 5, 6, 7}, async: 0, submit_now: 0 [WARNING] PARALLEL(163844,ffffae145c10,python3.7):2025-02-07-13:53:57.668.812 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DISTRIBUTED(163844,ffffae145c10,python3.7):2025-02-07-13:53:57.677.304 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-15700679239691767905 [const vector]{4, 5, 6, 7}, async: 0, submit_now: 0 [WARNING] PARALLEL(163842,ffff8ac54c10,python3.7):2025-02-07-13:53:57.689.295 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(163841,ffff99d3ac10,python3.7):2025-02-07-13:53:57.690.031 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DISTRIBUTED(163842,ffff8ac54c10,python3.7):2025-02-07-13:53:57.697.983 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-6301172352641561019 [const vector]{0, 1, 2, 3}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163841,ffff99d3ac10,python3.7):2025-02-07-13:53:57.698.813 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-6301172352641561019 [const vector]{0, 1, 2, 3}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163839,ffff88d43c10,python3.7):2025-02-07-13:53:57.699.143 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-16453000547691086251 [const vector]{0, 4}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163841,ffff99d3ac10,python3.7):2025-02-07-13:53:57.699.140 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-511848487187618470 [const vector]{2, 6}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163842,ffff8ac54c10,python3.7):2025-02-07-13:53:57.699.146 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5488101015797526856 [const vector]{3, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163840,ffff92fd9c10,python3.7):2025-02-07-13:53:57.699.155 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-12944936785892925600 [const vector]{1, 5}, async: 0, submit_now: 0 [WARNING] PARALLEL(163846,ffff97644c10,python3.7):2025-02-07-13:53:57.704.504 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DISTRIBUTED(163846,ffff97644c10,python3.7):2025-02-07-13:53:57.713.524 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-15700679239691767905 [const vector]{4, 5, 6, 7}, async: 0, submit_now: 0 [WARNING] PARALLEL(163845,ffff83f78c10,python3.7):2025-02-07-13:53:57.720.603 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DISTRIBUTED(163845,ffff83f78c10,python3.7):2025-02-07-13:53:57.730.853 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-15700679239691767905 [const vector]{4, 5, 6, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163844,ffffae145c10,python3.7):2025-02-07-13:53:57.731.154 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-12944936785892925600 [const vector]{1, 5}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163846,ffff97644c10,python3.7):2025-02-07-13:53:57.731.162 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5488101015797526856 [const vector]{3, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163843,ffffaa156c10,python3.7):2025-02-07-13:53:57.731.154 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-16453000547691086251 [const vector]{0, 4}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163845,ffff83f78c10,python3.7):2025-02-07-13:53:57.731.164 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-511848487187618470 [const vector]{2, 6}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163843,ffffaa156c10,python3.7):2025-02-07-13:53:57.731.362 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-6541264347459079684 [const vector]{4, 5}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163844,ffffae145c10,python3.7):2025-02-07-13:53:57.731.368 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-6541264347459079684 [const vector]{4, 5}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163845,ffff83f78c10,python3.7):2025-02-07-13:53:57.731.397 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-6853331267304275293 [const vector]{6, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163842,ffff8ac54c10,python3.7):2025-02-07-13:53:57.731.392 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-3358271254418797552 [const vector]{2, 3}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163839,ffff88d43c10,python3.7):2025-02-07-13:53:57.731.389 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5004544844489628105 [const vector]{0, 1}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163846,ffff97644c10,python3.7):2025-02-07-13:53:57.731.386 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-6853331267304275293 [const vector]{6, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163841,ffff99d3ac10,python3.7):2025-02-07-13:53:57.731.425 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-3358271254418797552 [const vector]{2, 3}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163840,ffff92fd9c10,python3.7):2025-02-07-13:53:57.731.417 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5004544844489628105 [const vector]{0, 1}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163844,ffffae145c10,python3.7):2025-02-07-13:53:57.738.397 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-16057586909177180503 [const vector]{5, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163843,ffffaa156c10,python3.7):2025-02-07-13:53:57.738.410 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5435772415009061329 [const vector]{4, 6}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163839,ffff88d43c10,python3.7):2025-02-07-13:53:57.738.694 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5208665662337742843 [const vector]{0, 2}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163841,ffff99d3ac10,python3.7):2025-02-07-13:53:57.738.705 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5208665662337742843 [const vector]{0, 2}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163846,ffff97644c10,python3.7):2025-02-07-13:53:57.738.744 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-16057586909177180503 [const vector]{5, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163842,ffff8ac54c10,python3.7):2025-02-07-13:53:57.738.810 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-4190060298023907007 [const vector]{1, 3}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163840,ffff92fd9c10,python3.7):2025-02-07-13:53:57.738.969 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-4190060298023907007 [const vector]{1, 3}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163845,ffff83f78c10,python3.7):2025-02-07-13:53:57.739.360 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5435772415009061329 [const vector]{4, 6}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163844,ffffae145c10,python3.7):2025-02-07-13:53:57.739.735 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-2688051859485673701 [const vector]{1, 3, 5, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163839,ffff88d43c10,python3.7):2025-02-07-13:53:57.739.767 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-5226697808808137312 [const vector]{0, 2, 4, 6}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163841,ffff99d3ac10,python3.7):2025-02-07-13:53:57.739.790 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-5226697808808137312 [const vector]{0, 2, 4, 6}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163846,ffff97644c10,python3.7):2025-02-07-13:53:57.739.809 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-2688051859485673701 [const vector]{1, 3, 5, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163840,ffff92fd9c10,python3.7):2025-02-07-13:53:57.740.079 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-2688051859485673701 [const vector]{1, 3, 5, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163842,ffff8ac54c10,python3.7):2025-02-07-13:53:57.740.097 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-2688051859485673701 [const vector]{1, 3, 5, 7}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163843,ffffaa156c10,python3.7):2025-02-07-13:53:57.740.383 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-5226697808808137312 [const vector]{0, 2, 4, 6}, async: 0, submit_now: 0 [WARNING] DISTRIBUTED(163845,ffff83f78c10,python3.7):2025-02-07-13:53:57.740.471 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 4-5226697808808137312 [const vector]{0, 2, 4, 6}, async: 0, submit_now: 0 [WARNING] PARALLEL(163844,ffffae145c10,python3.7):2025-02-07-13:53:58.396.068 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163843,ffffaa156c10,python3.7):2025-02-07-13:53:58.396.928 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163839,ffff88d43c10,python3.7):2025-02-07-13:53:58.413.765 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163846,ffff97644c10,python3.7):2025-02-07-13:53:58.413.769 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163841,ffff99d3ac10,python3.7):2025-02-07-13:53:58.414.799 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163840,ffff92fd9c10,python3.7):2025-02-07-13:53:58.420.600 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163842,ffff8ac54c10,python3.7):2025-02-07-13:53:58.423.673 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:58.431.201 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-6301172352641561019 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:58.431.197 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-6301172352641561019 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:58.431.201 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-6301172352641561019 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:58.431.212 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-6301172352641561019 [WARNING] DEVICE(163839,fffd31ffb0f0,python3.7):2025-02-07-13:53:58.431.393 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-6301172352641561019, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163840,fffd427fc0f0,python3.7):2025-02-07-13:53:58.431.426 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-6301172352641561019, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163841,fffd54ff90f0,python3.7):2025-02-07-13:53:58.431.450 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-6301172352641561019, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163842,fffd3dffb0f0,python3.7):2025-02-07-13:53:58.431.477 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-6301172352641561019, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] PARALLEL(163845,ffff83f78c10,python3.7):2025-02-07-13:53:58.455.580 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:58.463.665 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-15700679239691767905 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:58.463.663 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-15700679239691767905 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:58.463.659 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-15700679239691767905 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:58.463.669 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-15700679239691767905 [WARNING] DEVICE(163843,fffd437fe0f0,python3.7):2025-02-07-13:53:58.463.885 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-15700679239691767905, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffd657fa0f0,python3.7):2025-02-07-13:53:58.463.942 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-15700679239691767905, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:58.463.921 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-15700679239691767905, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffd2b7fe0f0,python3.7):2025-02-07-13:53:58.463.925 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-15700679239691767905, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163839,fffd31ffb0f0,python3.7):2025-02-07-13:53:58.502.457 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-6301172352641561019 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:58.502.524 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-6301172352641561019 [WARNING] DEVICE(163843,fffd437fe0f0,python3.7):2025-02-07-13:53:58.534.626 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-15700679239691767905 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:58.534.683 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-15700679239691767905 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:58.534.826 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-16453000547691086251 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:58.534.831 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-16453000547691086251 [WARNING] DEVICE(163843,fffd437fe0f0,python3.7):2025-02-07-13:53:58.534.945 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-16453000547691086251, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163839,fffd317fa0f0,python3.7):2025-02-07-13:53:58.534.957 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-16453000547691086251, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163843,fffd437fe0f0,python3.7):2025-02-07-13:53:58.585.628 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-16453000547691086251 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:58.585.683 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-16453000547691086251 [WARNING] DEVICE(163839,fffd317fa0f0,python3.7):2025-02-07-13:53:58.585.717 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-16453000547691086251 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:58.585.763 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-16453000547691086251 [WARNING] DEVICE(163841,fffd54ff90f0,python3.7):2025-02-07-13:53:58.682.974 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-6301172352641561019 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:58.683.051 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-6301172352641561019 [WARNING] DEVICE(163842,fffd3dffb0f0,python3.7):2025-02-07-13:53:58.683.279 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-6301172352641561019 [WARNING] DEVICE(163840,fffd427fc0f0,python3.7):2025-02-07-13:53:58.683.337 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-6301172352641561019 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:58.683.358 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-6301172352641561019 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:58.683.402 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-6301172352641561019 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:58.689.269 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5004544844489628105 [WARNING] DEVICE(163839,fffd30ff90f0,python3.7):2025-02-07-13:53:58.689.436 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5004544844489628105, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffd2b7fe0f0,python3.7):2025-02-07-13:53:58.714.867 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-15700679239691767905 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:58.714.942 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-15700679239691767905 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:58.715.008 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-15700679239691767905 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:58.715.075 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-15700679239691767905 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:58.715.134 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-511848487187618470 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:58.715.125 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-511848487187618470 [WARNING] DEVICE(163844,fffd657fa0f0,python3.7):2025-02-07-13:53:58.715.241 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-15700679239691767905 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:58.715.246 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5488101015797526856 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:58.715.259 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5488101015797526856 [WARNING] DEVICE(163845,fffd2b7fe0f0,python3.7):2025-02-07-13:53:58.715.285 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-511848487187618470, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:58.715.316 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-15700679239691767905 [WARNING] DEVICE(163841,fffd36ffd0f0,python3.7):2025-02-07-13:53:58.715.345 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-511848487187618470, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:58.715.386 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5488101015797526856, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163842,fffd1ffff0f0,python3.7):2025-02-07-13:53:58.715.446 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5488101015797526856, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:58.715.526 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-6541264347459079684 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:58.715.566 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-12944936785892925600 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:58.715.568 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-12944936785892925600 [WARNING] DEVICE(163843,fffd42ffd0f0,python3.7):2025-02-07-13:53:58.715.690 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-6541264347459079684, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffd657fa0f0,python3.7):2025-02-07-13:53:58.715.708 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-12944936785892925600, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163840,fffd40ff90f0,python3.7):2025-02-07-13:53:58.715.757 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-12944936785892925600, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163840,fffd40ff90f0,python3.7):2025-02-07-13:53:58.781.530 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-12944936785892925600 [WARNING] DEVICE(163841,fffd36ffd0f0,python3.7):2025-02-07-13:53:58.781.563 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-511848487187618470 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:58.781.581 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-12944936785892925600 [WARNING] DEVICE(163842,fffd1ffff0f0,python3.7):2025-02-07-13:53:58.781.610 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5488101015797526856 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:58.781.623 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-511848487187618470 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:58.781.675 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5004544844489628105 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:58.781.673 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5488101015797526856 [WARNING] DEVICE(163840,fffd40ff90f0,python3.7):2025-02-07-13:53:58.781.795 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5004544844489628105, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:58.781.984 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-3358271254418797552 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:58.781.987 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-3358271254418797552 [WARNING] DEVICE(163841,fffd367fc0f0,python3.7):2025-02-07-13:53:58.782.114 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-3358271254418797552, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163842,fffd1ffff0f0,python3.7):2025-02-07-13:53:58.782.121 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-3358271254418797552, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163840,fffd40ff90f0,python3.7):2025-02-07-13:53:58.841.305 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5004544844489628105 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:58.841.349 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5004544844489628105 [WARNING] DEVICE(163839,fffd30ff90f0,python3.7):2025-02-07-13:53:58.841.565 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5004544844489628105 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:58.841.627 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5004544844489628105 [WARNING] DEVICE(163842,fffd1ffff0f0,python3.7):2025-02-07-13:53:58.849.576 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-3358271254418797552 [WARNING] DEVICE(163841,fffd367fc0f0,python3.7):2025-02-07-13:53:58.849.603 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-3358271254418797552 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:58.849.636 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-3358271254418797552 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:58.849.661 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-3358271254418797552 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:58.849.744 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-4190060298023907007 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:58.849.743 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-4190060298023907007 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:58.849.764 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5208665662337742843 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:58.849.768 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5208665662337742843 [WARNING] DEVICE(163840,fffd23fff0f0,python3.7):2025-02-07-13:53:58.849.873 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-4190060298023907007, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163842,fffd1ffff0f0,python3.7):2025-02-07-13:53:58.849.877 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-4190060298023907007, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163841,fffd367fc0f0,python3.7):2025-02-07-13:53:58.849.894 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5208665662337742843, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163839,fffd17fff0f0,python3.7):2025-02-07-13:53:58.849.921 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5208665662337742843, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163841,fffd367fc0f0,python3.7):2025-02-07-13:53:58.913.502 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5208665662337742843 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:58.913.559 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5208665662337742843 [WARNING] DEVICE(163839,fffd17fff0f0,python3.7):2025-02-07-13:53:58.913.592 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5208665662337742843 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:58.913.658 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5208665662337742843 [WARNING] DEVICE(163840,fffd23fff0f0,python3.7):2025-02-07-13:53:58.921.335 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-4190060298023907007 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:58.921.380 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-4190060298023907007 [WARNING] DEVICE(163842,fffd1ffff0f0,python3.7):2025-02-07-13:53:58.921.621 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-4190060298023907007 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:58.921.689 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-4190060298023907007 [WARNING] DEVICE(163845,fffd2b7fe0f0,python3.7):2025-02-07-13:53:58.962.306 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-511848487187618470 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:58.962.375 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-511848487187618470 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:58.962.530 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5488101015797526856 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:58.962.594 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5488101015797526856 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:58.964.985 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-6853331267304275293 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:58.964.994 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-6853331267304275293 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:58.965.108 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-6853331267304275293, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffd29ffb0f0,python3.7):2025-02-07-13:53:58.965.202 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-6853331267304275293, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffd657fa0f0,python3.7):2025-02-07-13:53:58.966.059 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-12944936785892925600 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:58.966.113 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-12944936785892925600 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:58.966.274 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-6541264347459079684 [WARNING] DEVICE(163844,fffd657fa0f0,python3.7):2025-02-07-13:53:58.966.404 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-6541264347459079684, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffd29ffb0f0,python3.7):2025-02-07-13:53:59.025.524 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-6853331267304275293 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:59.025.589 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-6853331267304275293 [WARNING] DEVICE(163843,fffd42ffd0f0,python3.7):2025-02-07-13:53:59.029.557 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-6541264347459079684 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:59.029.627 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-6541264347459079684 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:59.030.007 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5435772415009061329 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:59.030.015 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5435772415009061329 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:59.030.019 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-5226697808808137312 [WARNING] DEVICE(163839,fffd177fe0f0,python3.7):2025-02-07-13:53:59.030.201 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-5226697808808137312, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffd29ffb0f0,python3.7):2025-02-07-13:53:59.030.212 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5435772415009061329, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163843,fffd427fc0f0,python3.7):2025-02-07-13:53:59.030.210 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5435772415009061329, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffd29ffb0f0,python3.7):2025-02-07-13:53:59.085.495 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5435772415009061329 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:59.085.555 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5435772415009061329 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:59.085.734 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-5226697808808137312 [WARNING] DEVICE(163845,fffd29ffb0f0,python3.7):2025-02-07-13:53:59.085.861 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-5226697808808137312, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163843,fffd427fc0f0,python3.7):2025-02-07-13:53:59.089.463 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5435772415009061329 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:59.089.548 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5435772415009061329 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:59.089.724 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-5226697808808137312 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:59.089.729 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-5226697808808137312 [WARNING] DEVICE(163841,fffd367fc0f0,python3.7):2025-02-07-13:53:59.089.858 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-5226697808808137312, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163843,fffd427fc0f0,python3.7):2025-02-07-13:53:59.089.857 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-5226697808808137312, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163841,fffd367fc0f0,python3.7):2025-02-07-13:53:59.141.726 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-5226697808808137312 [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:53:59.141.782 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-5226697808808137312 [WARNING] DEVICE(163839,fffd177fe0f0,python3.7):2025-02-07-13:53:59.145.869 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-5226697808808137312 [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:53:59.145.919 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-5226697808808137312 [WARNING] DEVICE(163845,fffd29ffb0f0,python3.7):2025-02-07-13:53:59.149.816 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-5226697808808137312 [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:53:59.149.866 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-5226697808808137312 [WARNING] DEVICE(163843,fffd427fc0f0,python3.7):2025-02-07-13:53:59.149.938 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-5226697808808137312 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:53:59.149.990 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-5226697808808137312 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:59.205.922 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-6853331267304275293 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:59.205.973 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-6853331267304275293 [WARNING] DEVICE(163844,fffd657fa0f0,python3.7):2025-02-07-13:53:59.205.959 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-6541264347459079684 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:59.206.016 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-6541264347459079684 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:59.208.456 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-16057586909177180503 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:59.208.462 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-16057586909177180503 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:59.208.594 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-16057586909177180503, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffd4f7fe0f0,python3.7):2025-02-07-13:53:59.208.662 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-16057586909177180503, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffd4f7fe0f0,python3.7):2025-02-07-13:53:59.265.548 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-16057586909177180503 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:59.265.600 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-16057586909177180503 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:59.265.822 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-2688051859485673701 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:59.265.827 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-2688051859485673701 [WARNING] DEVICE(163844,fffd4f7fe0f0,python3.7):2025-02-07-13:53:59.265.950 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-2688051859485673701, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163842,fffd1ffff0f0,python3.7):2025-02-07-13:53:59.265.965 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-2688051859485673701, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:59.449.868 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-16057586909177180503 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:59.449.927 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-16057586909177180503 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:59.450.225 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-2688051859485673701 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:59.450.232 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 4-2688051859485673701 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:59.450.347 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-2688051859485673701, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163840,fffd237fe0f0,python3.7):2025-02-07-13:53:59.450.378 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 4-2688051859485673701, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffd4f7fe0f0,python3.7):2025-02-07-13:53:59.505.819 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-2688051859485673701 [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:53:59.505.875 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-2688051859485673701 [WARNING] DEVICE(163840,fffd237fe0f0,python3.7):2025-02-07-13:53:59.505.942 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-2688051859485673701 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:53:59.505.989 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-2688051859485673701 [WARNING] DEVICE(163842,fffd1ffff0f0,python3.7):2025-02-07-13:53:59.509.683 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-2688051859485673701 [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:53:59.509.755 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-2688051859485673701 [WARNING] DEVICE(163846,fffd5affd0f0,python3.7):2025-02-07-13:53:59.690.308 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 4-2688051859485673701 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:53:59.690.360 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 4-2688051859485673701 TotalTime = 15.107, [21] [bootstrap]: 0.00131808 [type_inference]: 0.7202 [auto_monad]: 0.00188953 [graph_reusing]: 2.753e-05 [inline]: 0.0425068, [2] [rewriter_before_opt_a]: 0.00148385 [a1a2]: 0.0409846, [2] [Cycle 1]: 0.0280067, [11] [expand_dump_flag]: 3.35e-05 [switch_simplify]: 0.00104837 [loop_unroll]: 0.00067373 [a_1]: 0.0217843 [recompute_prepare]: 0.00016228 [updatestate_depend_eliminate]: 0.00035806 [updatestate_assign_eliminate]: 8.896e-05 [updatestate_loads_eliminate]: 0.00019826 [parameter_eliminate]: 6.39999e-06 [a_2]: 0.00338475 [parallel_inline_pass]: 0.00010329 [Cycle 2]: 0.00545479, [11] [expand_dump_flag]: 1.19001e-06 [switch_simplify]: 9.274e-05 [loop_unroll]: 9.159e-05 [a_1]: 0.00312598 [recompute_prepare]: 9.659e-05 [updatestate_depend_eliminate]: 0.00025341 [updatestate_assign_eliminate]: 6.377e-05 [updatestate_loads_eliminate]: 6.32e-05 [parameter_eliminate]: 3.44e-06 [a_2]: 0.00149003 [parallel_inline_pass]: 0.00010073 [parallel-infer-symbol]: 0.00017948 [pre_auto_parallel]: 9.116e-05 [insert-virtual-dataset]: 0.00115069 [parallel-infer-symbol-second]: 2.62e-06 [dataset_repeat_opt]: 7.124e-05 [pipeline_split]: 9.629e-05 [optimize]: 0.881506, [52] [py_interpret_to_execute]: 0.0001185 [rewriter_before_opt_a]: 0.00027044 [opt_a]: 0.863031, [3] [Cycle 1]: 0.760495, [46] [expand_dump_flag]: 2.03001e-06 [switch_simplify]: 0.00011232 [loop_unroll]: 9.656e-05 [a_1]: 0.00328576 [recompute_prepare]: 0.00010282 [updatestate_depend_eliminate]: 0.00010208 [updatestate_assign_eliminate]: 6.241e-05 [updatestate_loads_eliminate]: 6.863e-05 [parameter_eliminate]: 3.44e-06 [a_2]: 0.00164433 [accelerated_algorithm]: 0.00031242 [shard]: 2.3e-06 [meta_shard_fg_expand]: 4.969e-05 [shard_inline]: 0.00010892 [auto_parallel]: 8.203e-05 [parallel]: 0.208698 [flash_sp]: 7.886e-05 [merge_comm]: 0.00017531 [allreduce_fusion]: 9.814e-05 [matmul_add_comm_reduction]: 0.0001219 [allreduce_slice_to_reducescatter]: 6.00005e-07 [virtual_shard_identity]: 0.00015642 [virtual_dataset]: 0.00019096 [get_grad_eliminate_]: 0.00014581 [virtual_output]: 0.00016979 [merge_forward]: 9.476e-05 [cell_reuse_recompute_pass]: 2.91999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025259 [before_grad]: 0.0002376 [inplace_validation]: 0.00016038 [parallel_renormalize]: 0.0217688 [update_top_fg]: 6.39993e-07 [cast_eliminate]: 0.00020903 [meta_fg_expand]: 0.28814 [inplace_validation_after_expand]: 0.00191218 [flash_sp_send_recv_attached]: 0.00147923 [receive_attached]: 8.065e-05 [after_resolve]: 0.00243418 [a_after_grad]: 0.0046482 [special_op_eliminate]: 0.00222534 [renormalize]: 0.180454 [add_forward_monad_depend]: 0.00038814 [auto_monad_grad]: 0.00026338 [auto_monad_eliminator]: 0.00222097 [cse]: 0.00545909 [a_3]: 0.031635 [Cycle 2]: 0.0866942, [46] [expand_dump_flag]: 6.586e-05 [switch_simplify]: 0.00231334 [loop_unroll]: 0.0019589 [a_1]: 0.0385679 [recompute_prepare]: 0.00024401 [updatestate_depend_eliminate]: 0.00037586 [updatestate_assign_eliminate]: 0.00013785 [updatestate_loads_eliminate]: 0.00019373 [parameter_eliminate]: 4.22e-06 [a_2]: 0.00586126 [accelerated_algorithm]: 0.00021874 [shard]: 2.68e-06 [meta_shard_fg_expand]: 9.541e-05 [shard_inline]: 0.0001955 [auto_parallel]: 0.00014282 [parallel]: 1.378e-05 [flash_sp]: 0.0001522 [merge_comm]: 0.00014007 [allreduce_fusion]: 0.0001184 [matmul_add_comm_reduction]: 0.00013851 [allreduce_slice_to_reducescatter]: 5.3001e-07 [virtual_shard_identity]: 0.00019394 [virtual_dataset]: 0.00019835 [get_grad_eliminate_]: 0.00018275 [virtual_output]: 0.00018529 [merge_forward]: 0.00011825 [cell_reuse_recompute_pass]: 2.80999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00032686 [before_grad]: 0.00031068 [inplace_validation]: 0.00011032 [parallel_renormalize]: 1.09998e-07 [update_top_fg]: 6.50005e-07 [cast_eliminate]: 0.00020673 [meta_fg_expand]: 0.00037447 [inplace_validation_after_expand]: 0.0002335 [flash_sp_send_recv_attached]: 2.56e-06 [receive_attached]: 1.64e-06 [after_resolve]: 0.00021526 [a_after_grad]: 0.00031997 [special_op_eliminate]: 0.00018726 [renormalize]: 0.0220178 [add_forward_monad_depend]: 6.45001e-06 [auto_monad_grad]: 2.33999e-06 [auto_monad_eliminator]: 0.00037327 [cse]: 0.00841932 [a_3]: 0.0013141 [Cycle 3]: 0.015818, [46] [expand_dump_flag]: 2.55e-06 [switch_simplify]: 0.00018125 [loop_unroll]: 0.00017664 [a_1]: 0.00573749 [recompute_prepare]: 0.00018816 [updatestate_depend_eliminate]: 0.00018991 [updatestate_assign_eliminate]: 0.0001243 [updatestate_loads_eliminate]: 0.0001203 [parameter_eliminate]: 3.20999e-06 [a_2]: 0.00284101 [accelerated_algorithm]: 0.00020616 [shard]: 2.09e-06 [meta_shard_fg_expand]: 6.55e-05 [shard_inline]: 0.00018356 [auto_parallel]: 0.00014324 [parallel]: 1.134e-05 [flash_sp]: 2.03999e-06 [merge_comm]: 0.00013592 [allreduce_fusion]: 0.00012448 [matmul_add_comm_reduction]: 0.00015196 [allreduce_slice_to_reducescatter]: 5.00004e-07 [virtual_shard_identity]: 0.00018826 [virtual_dataset]: 0.00018134 [get_grad_eliminate_]: 0.00017316 [virtual_output]: 0.00017844 [merge_forward]: 0.00012226 [cell_reuse_recompute_pass]: 2.92e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031867 [before_grad]: 0.00030165 [inplace_validation]: 0.00011829 [parallel_renormalize]: 8.9989e-08 [update_top_fg]: 5.10001e-07 [cast_eliminate]: 0.00019693 [meta_fg_expand]: 0.00018241 [inplace_validation_after_expand]: 0.00016326 [flash_sp_send_recv_attached]: 2.39001e-06 [receive_attached]: 1.62001e-06 [after_resolve]: 0.00019973 [a_after_grad]: 0.00030271 [special_op_eliminate]: 0.00017835 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 2.58001e-06 [auto_monad_grad]: 1.73e-06 [auto_monad_eliminator]: 0.00020978 [cse]: 0.00056515 [a_3]: 0.00131249 [py_interpret_to_execute_after_opt_a]: 0.00018432 [slice_cell_reuse_recomputed_activation]: 2.44001e-06 [rewriter_after_opt_a]: 0.00118511 [convert_after_rewriter]: 0.00014309 [order_py_execute_after_rewriter]: 0.00010432 [opt_b]: 0.00537469, [1] [Cycle 1]: 0.00536342, [7] [b_1]: 0.00420084 [b_2]: 0.00018625 [updatestate_depend_eliminate]: 0.00012592 [updatestate_assign_eliminate]: 0.00011504 [updatestate_loads_eliminate]: 0.00011922 [renormalize]: 5.60001e-07 [cse]: 0.00055417 [optimize_parallel_all_gather_comm]: 0.00018377 [overlap_param_gather]: 1.23e-06 [cconv]: 8.412e-05 [loop_unroll]: 0.00097172 [opt_after_cconv]: 0.00212225, [1] [Cycle 1]: 0.00211272, [7] [c_1]: 0.00109065 [parameter_eliminate]: 2.79999e-06 [updatestate_depend_eliminate]: 0.00016655 [updatestate_assign_eliminate]: 0.00012076 [updatestate_loads_eliminate]: 0.00012259 [cse]: 0.00054787 [renormalize]: 7.10002e-07 [remove_dup_value]: 0.00090052 [tuple_transform]: 0.00119206, [1] [Cycle 1]: 0.00118368, [2] [d_1]: 0.00116486 [renormalize]: 4.39992e-07 [partial_unused_args_eliminate]: 3.23e-06 [add_cache_embedding]: 0.00019239 [add_recomputation]: 0.00089998 [cse_after_recomputation]: 0.00040625, [1] [Cycle 1]: 0.00039808, [1] [cse]: 0.00038252 [environ_conv]: 0.00011424 [swap_dp_allreduce_reducescatter]: 0.00016913 [bias_add_comm_swap]: 2.64999e-06 [label_micro_interleaved_index]: 1.71e-06 [label_fine_grained_interleaved_index]: 0.00070233 [merge_cast_opt]: 1.5e-06 [slice_recompute_activation]: 0.00019011 [micro_interleaved_order_control]: 1.5e-06 [assign_add_opt]: 0.00047845 [ForceFp32Comm]: 1.67001e-06 [remove_cast_before_assign_add]: 0.00013296 [full_micro_interleaved_order_control]: 2.23001e-06 [reorder_send_recv_between_fp_bp]: 1.97999e-06 [comm_op_add_attrs]: 0.00020718 [add_comm_op_reuse_tag]: 0.00019911 [interleave_split_concat_branches]: 9.70002e-07 [interleave_parallel_branches]: 7.79997e-07 [overlap_opt_shard_in_pipeline]: 1.141e-05 [overlap_opt_shard_grad_in_pipeline]: 3.29e-06 [control_data_broadcast_order]: 1.10001e-06 [grouped_pairwise_exchange_alltoall]: 1.104e-05 [offloading_packed_experts]: 2.21e-06 [overlap_recompute_and_grad_model_parallel]: 1.81e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.79997e-07 [overlap_recompute_allgather_and_fa_grad]: 5.73e-05 [overlap_grad_ring_attention]: 0.00017928 [overlap_grad_flash_sp]: 0.0001447 [begin_end_overlap_inline]: 1.01e-06 [split_matmul_comm_elemetwise]: 2.76e-06 [split_layernorm_comm]: 1.94e-06 [handle_group_info]: 1.03e-05 [symbol_engine_optimizer]: 0.00099783, [1] [Cycle 1]: 0.0009911, [6] [build]: 6.234e-05 [elim_shapecalc]: 0.00018609 [elim_not_effective]: 0.00027354 [opt_reshape]: 0.00016819 [fold_const_symbol]: 0.00026094 [renormalize]: 4.90007e-07 [pipeline_parallel_scheduler]: 3.83001e-06 [auto_monad_reorder]: 0.0003693 [get_jit_bprop_graph]: 4.1e-07 [rewriter_after_jit_bprop_graph]: 4.20012e-07 [eliminate_special_op_node]: 0.00157193 [distribtued_split]: 1.41001e-06 [validate]: 0.00036983 [task_emit]: 13.454 [execute]: 1.159e-05 Sums bootstrap : 0.001318s : 0.01% type_inference : 0.720200s : 4.77% auto_monad : 0.001890s : 0.01% graph_reusing : 0.000028s : 0.00% inline.rewriter_before_opt_a : 0.001484s : 0.01% inline.a1a2.expand_dump_flag : 0.000035s : 0.00% inline.a1a2.switch_simplify : 0.001141s : 0.01% inline.a1a2.loop_unroll : 0.000765s : 0.01% inline.a1a2.a_1 : 0.024910s : 0.17% inline.a1a2.recompute_prepare : 0.000259s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000611s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000153s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000261s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : 0.004875s : 0.03% inline.a1a2.parallel_inline_pass : 0.000204s : 0.00% parallel-infer-symbol : 0.000179s : 0.00% pre_auto_parallel : 0.000091s : 0.00% insert-virtual-dataset : 0.001151s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000071s : 0.00% pipeline_split : 0.000096s : 0.00% optimize.py_interpret_to_execute : 0.000119s : 0.00% optimize.rewriter_before_opt_a : 0.000270s : 0.00% optimize.opt_a.expand_dump_flag : 0.000070s : 0.00% optimize.opt_a.switch_simplify : 0.002607s : 0.02% optimize.opt_a.loop_unroll : 0.002232s : 0.01% optimize.opt_a.a_1 : 0.047591s : 0.32% optimize.opt_a.recompute_prepare : 0.000535s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000668s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000325s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000383s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.010347s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000737s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000211s : 0.00% optimize.opt_a.shard_inline : 0.000488s : 0.00% optimize.opt_a.auto_parallel : 0.000368s : 0.00% optimize.opt_a.parallel : 0.208723s : 1.38% optimize.opt_a.flash_sp : 0.000233s : 0.00% optimize.opt_a.merge_comm : 0.000451s : 0.00% optimize.opt_a.allreduce_fusion : 0.000341s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000412s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000539s : 0.00% optimize.opt_a.virtual_dataset : 0.000571s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000502s : 0.00% optimize.opt_a.virtual_output : 0.000534s : 0.00% optimize.opt_a.merge_forward : 0.000335s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000898s : 0.01% optimize.opt_a.before_grad : 0.000850s : 0.01% optimize.opt_a.inplace_validation : 0.000389s : 0.00% optimize.opt_a.parallel_renormalize : 0.021769s : 0.14% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000613s : 0.00% optimize.opt_a.meta_fg_expand : 0.288697s : 1.91% optimize.opt_a.inplace_validation_after_expand : 0.002309s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.001484s : 0.01% optimize.opt_a.receive_attached : 0.000084s : 0.00% optimize.opt_a.after_resolve : 0.002849s : 0.02% optimize.opt_a.a_after_grad : 0.005271s : 0.03% optimize.opt_a.special_op_eliminate : 0.002591s : 0.02% optimize.opt_a.renormalize : 0.202472s : 1.34% optimize.opt_a.add_forward_monad_depend : 0.000397s : 0.00% optimize.opt_a.auto_monad_grad : 0.000267s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002804s : 0.02% optimize.opt_a.cse : 0.014444s : 0.10% optimize.opt_a.a_3 : 0.034262s : 0.23% optimize.py_interpret_to_execute_after_opt_a : 0.000184s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001185s : 0.01% optimize.convert_after_rewriter : 0.000143s : 0.00% optimize.order_py_execute_after_rewriter : 0.000104s : 0.00% optimize.opt_b.b_1 : 0.004201s : 0.03% optimize.opt_b.b_2 : 0.000186s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000126s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000115s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000119s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000554s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000184s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000084s : 0.00% optimize.loop_unroll : 0.000972s : 0.01% optimize.opt_after_cconv.c_1 : 0.001091s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000167s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000121s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000123s : 0.00% optimize.opt_after_cconv.cse : 0.000548s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000901s : 0.01% optimize.tuple_transform.d_1 : 0.001165s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000192s : 0.00% optimize.add_recomputation : 0.000900s : 0.01% optimize.cse_after_recomputation.cse : 0.000383s : 0.00% optimize.environ_conv : 0.000114s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000169s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000702s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000190s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000478s : 0.00% optimize.ForceFp32Comm : 0.000002s : 0.00% optimize.remove_cast_before_assign_add : 0.000133s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000207s : 0.00% optimize.add_comm_op_reuse_tag : 0.000199s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000011s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000057s : 0.00% optimize.overlap_grad_ring_attention : 0.000179s : 0.00% optimize.overlap_grad_flash_sp : 0.000145s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000010s : 0.00% optimize.symbol_engine_optimizer.build : 0.000062s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000186s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000274s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000168s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000261s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000369s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001572s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000370s : 0.00% task_emit : 13.453970s : 89.13% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.053409 4899 0.03% : 0.000018s : 4: substitution.ad_related_special_op_eliminate 0.04% : 0.000023s : 9: substitution.addn_check_dump 0.10% : 0.000053s : 7: substitution.addn_zero_filter 0.03% : 0.000016s : 7: substitution.adjust_all_reduce_mul_add 0.61% : 0.000327s : 71: substitution.arithmetic_simplify 0.10% : 0.000054s : 10: substitution.cast_eliminate 0.10% : 0.000055s : 47: substitution.depend_value_elim 0.07% : 0.000035s : 127: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000024s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.06% : 0.000031s : 27: substitution.environ_get_eliminate 0.07% : 0.000036s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000024s : 28: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.02% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.07% : 0.000035s : 127: substitution.fold_const_symbol 63.78% : 0.034064s : 290: substitution.getattr_setattr_resolve 0.20% : 0.000105s : 162: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 24.10% : 0.012874s : 363: substitution.inline 1.48% : 0.000791s : 127: substitution.inline_without_move 0.27% : 0.000144s : 361: substitution.j_node_and_user_rematch 0.36% : 0.000190s : 40: substitution.less_batch_normalization 0.09% : 0.000046s : 90: substitution.load_eliminater 0.11% : 0.000059s : 10: substitution.merge_addn 0.23% : 0.000122s : 115: substitution.minmaximum_grad 0.01% : 0.000003s : 10: substitution.opt_reshape 0.03% : 0.000014s : 1: substitution.partial_defer_inline 0.13% : 0.000071s : 28: substitution.partial_eliminate 0.04% : 0.000021s : 35: substitution.reduce_all_const_elim 0.06% : 0.000033s : 15: substitution.reduce_eliminate 0.34% : 0.000184s : 361: substitution.remove_not_recompute_node 2.24% : 0.001196s : 612: substitution.replace_applicator 0.26% : 0.000137s : 324: substitution.replace_old_param 0.20% : 0.000107s : 31: substitution.reshape_eliminate 0.02% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000012s : 4: substitution.specialize_transform 0.03% : 0.000018s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000092s : 36: substitution.switch_simplify 0.06% : 0.000033s : 11: substitution.tile_eliminate 0.53% : 0.000284s : 115: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000147s : 121: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000232s : 121: substitution.tuple_list_get_item_depend_reorder 1.64% : 0.000878s : 356: substitution.tuple_list_get_item_eliminator 0.37% : 0.000199s : 121: substitution.tuple_list_get_set_item_eliminator 0.36% : 0.000192s : 210: substitution.updatestate_pure_node_eliminater 0.65% : 0.000348s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000013s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.719741 2 96.60% : 0.695274s : 1: type_inference.infer 3.40% : 0.024467s : 1: type_inference.specialize ------[replace.] 0.010973 851 0.33% : 0.000036s : 4: replace.ad_related_special_op_eliminate 0.05% : 0.000006s : 1: replace.arithmetic_simplify 0.46% : 0.000051s : 7: replace.depend_value_elim 0.38% : 0.000042s : 3: replace.environ_get_set_eliminate 28.57% : 0.003135s : 189: replace.getattr_setattr_resolve 30.31% : 0.003325s : 342: replace.inline 0.22% : 0.000024s : 1: replace.merge_addn 1.04% : 0.000114s : 7: replace.partial_eliminate 3.97% : 0.000436s : 28: replace.replace_applicator 3.59% : 0.000394s : 36: replace.switch_simplify 0.46% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 30.28% : 0.003322s : 225: replace.tuple_list_get_item_eliminator 0.16% : 0.000017s : 1: replace.updatestate_useless_node_eliminater 0.18% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042728 851 0.03% : 0.000015s : 4: match.ad_related_special_op_eliminate 0.03% : 0.000012s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000018s : 3: match.environ_get_set_eliminate 68.50% : 0.029267s : 189: match.getattr_setattr_resolve 29.56% : 0.012630s : 342: match.inline 0.07% : 0.000028s : 1: match.merge_addn 0.09% : 0.000038s : 7: match.partial_eliminate 0.26% : 0.000110s : 28: match.replace_applicator 0.17% : 0.000072s : 36: match.switch_simplify 0.07% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 1.13% : 0.000482s : 225: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000012s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.024844157866 0.71% : 0.000176s : 1384: predicate.accumulaten_eliminater 0.28% : 0.000069s : 325: predicate.ad_related_special_op_eliminate 0.55% : 0.000136s : 1000: predicate.addn_check_dump 0.76% : 0.000188s : 1384: predicate.addn_zero_filter 0.69% : 0.000172s : 1384: predicate.adjust_all_reduce_mul_add 1.67% : 0.000415s : 2385: predicate.arithmetic_simplify 1.16% : 0.000288s : 1911: predicate.cast_eliminate 3.44% : 0.000855s : 4408: predicate.check_bprop_eliminate 0.56% : 0.000139s : 1000: predicate.compare_switch_simplify 0.06% : 0.000015s : 186: predicate.const_output_eliminate 0.18% : 0.000046s : 316: predicate.convert_tensor_all_eliminate 1.17% : 0.000290s : 1619: predicate.convert_tensor_eliminate 0.56% : 0.000139s : 1003: predicate.depend_value_elim 0.82% : 0.000204s : 1388: predicate.dict_get_item_const_eliminator 0.79% : 0.000197s : 1388: predicate.dict_get_item_eliminator 0.78% : 0.000195s : 1388: predicate.dict_set_item_eliminator 0.05% : 0.000012s : 162: predicate.elim_not_effective 0.12% : 0.000029s : 162: predicate.elim_shapecalc_of_broadcastargs 0.80% : 0.000198s : 1571: predicate.environ_add_const_eliminate 0.81% : 0.000201s : 1574: predicate.environ_get_add_eliminate 0.81% : 0.000201s : 1571: predicate.environ_get_depend_swap 1.40% : 0.000347s : 2574: predicate.environ_get_eliminate 0.83% : 0.000206s : 1574: predicate.environ_get_set_eliminate 1.07% : 0.000265s : 1969: predicate.exchange_switch_depend_value 1.36% : 0.000337s : 1969: predicate.float_depend_g_call 0.55% : 0.000136s : 1000: predicate.float_environ_get_switch 0.65% : 0.000162s : 1186: predicate.float_tuple_getitem_switch 0.05% : 0.000013s : 162: predicate.fold_const_symbol 0.32% : 0.000079s : 534: predicate.get_grad_eliminate 2.08% : 0.000517s : 2090: predicate.getattr_setattr_resolve 0.06% : 0.000014s : 162: predicate.graph_param_transform 0.55% : 0.000136s : 1000: predicate.incorporate_call 0.54% : 0.000133s : 1000: predicate.incorporate_call_switch 3.84% : 0.000954s : 5415: predicate.inline 2.38% : 0.000591s : 2686: predicate.inline_without_move 0.17% : 0.000041s : 534: predicate.j_node_and_user_rematch 0.35% : 0.000086s : 496: predicate.less_batch_normalization 1.14% : 0.000282s : 1967: predicate.list_to_tuple_eliminator_ 1.81% : 0.000449s : 3382: predicate.load_eliminater 0.20% : 0.000049s : 186: predicate.loop_unroll_after_grad 2.32% : 0.000576s : 3094: predicate.loop_unroll_before_grad 0.95% : 0.000237s : 1766: predicate.make_slice_get_slice_eliminator 0.61% : 0.000150s : 1002: predicate.merge_addn 3.32% : 0.000824s : 4292: predicate.micro_step_allgather_replace 3.40% : 0.000844s : 4292: predicate.mini_step_allgather_replace 0.72% : 0.000179s : 1385: predicate.minmaximum_grad 0.19% : 0.000047s : 316: predicate.mutable_eliminate 0.10% : 0.000024s : 162: predicate.opt_reshape 0.11% : 0.000028s : 186: predicate.parallel_virtual_node 1.94% : 0.000483s : 1969: predicate.partial_defer_inline 1.28% : 0.000319s : 1812: predicate.partial_eliminate 0.72% : 0.000178s : 1384: predicate.print_const_string_wrapper 0.55% : 0.000138s : 989: predicate.reduce_all_const_elim 0.93% : 0.000232s : 1385: predicate.reduce_eliminate 0.16% : 0.000040s : 534: predicate.remove_not_recompute_node 2.06% : 0.000511s : 5967: predicate.replace_applicator 0.95% : 0.000236s : 2686: predicate.replace_old_param 0.06% : 0.000015s : 186: predicate.reset_defer_inline 0.75% : 0.000186s : 1385: predicate.reshape_eliminate 3.34% : 0.000830s : 4292: predicate.row_tensor_add_zeros_like 0.12% : 0.000029s : 186: predicate.row_tensor_eliminate 3.49% : 0.000866s : 4408: predicate.same_eliminate 0.23% : 0.000058s : 741: predicate.set_cell_output_no_recompute 0.32% : 0.000079s : 534: predicate.shard_identity_eliminate 2.18% : 0.000542s : 2872: predicate.special_op_eliminate 0.62% : 0.000154s : 1002: predicate.specialize_transform 3.66% : 0.000910s : 4292: predicate.split_environ_get_set_with_tuple_value 1.72% : 0.000426s : 2686: predicate.stack_unstack_eliminate 1.90% : 0.000472s : 3382: predicate.stopgrad_eliminater 0.10% : 0.000026s : 186: predicate.switch_call_monad_eliminater 1.17% : 0.000292s : 1969: predicate.switch_defer_inline 4.53% : 0.001125s : 6377: predicate.switch_layer_defer_inline 4.19% : 0.001040s : 6137: predicate.switch_simplify 0.85% : 0.000211s : 1385: predicate.tile_eliminate 0.72% : 0.000179s : 1385: predicate.transpose_eliminate 1.01% : 0.000251s : 1736: predicate.tuple_list_convert_item_index_to_positive 1.10% : 0.000272s : 1742: predicate.tuple_list_get_item_const_eliminator 0.93% : 0.000231s : 1742: predicate.tuple_list_get_item_depend_reorder 1.90% : 0.000472s : 2967: predicate.tuple_list_get_item_eliminator 0.97% : 0.000241s : 1742: predicate.tuple_list_get_set_item_eliminator 1.63% : 0.000405s : 2742: predicate.tuple_list_set_item_eliminator 1.09% : 0.000272s : 1967: predicate.tuple_to_list_eliminator_ 1.79% : 0.000445s : 3382: predicate.updatestate_pure_node_eliminater 2.38% : 0.000592s : 4383: predicate.updatestate_useless_node_eliminater 0.11% : 0.000028s : 186: predicate.value_based_eliminate 0.32% : 0.000079s : 536: predicate.virtual_dataset_eliminate 0.31% : 0.000077s : 534: predicate.virtual_output_eliminate 0.12% : 0.000030s : 186: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.069379 841 68.31% : 0.047395s : 383: func_graph_cloner_run.FuncGraphClonerGraph 2.16% : 0.001497s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.53% : 0.020487s : 436: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.449925 350 0.00% : 0.000006s : 1: ForceFp32Comm 0.25% : 0.040989s : 1: a1a2 0.00% : 0.000200s : 1: add_cache_embedding 0.00% : 0.000207s : 1: add_comm_op_reuse_tag 0.01% : 0.000914s : 1: add_recomputation 0.00% : 0.000489s : 1: assign_add_opt 0.01% : 0.001913s : 1: auto_monad 0.00% : 0.000383s : 1: auto_monad_reorder 0.00% : 0.000007s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001365s : 1: bootstrap 0.00% : 0.000091s : 1: cconv 0.00% : 0.000216s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000151s : 1: convert_after_rewriter 0.00% : 0.000412s : 1: cse_after_recomputation 0.00% : 0.000079s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.001588s : 1: eliminate_special_op_node 0.00% : 0.000123s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000036s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000013s : 1: handle_group_info 0.26% : 0.042515s : 1: inline 0.01% : 0.001173s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000713s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000983s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.20% : 0.032082s : 61: opt.transform.a1a2 0.00% : 0.000233s : 1: opt.transform.loop_unroll_optimizer 0.69% : 0.113730s : 148: opt.transform.opt_a 0.01% : 0.001087s : 1: opt.transform.opt_after_cconv 0.03% : 0.004351s : 27: opt.transform.opt_b 0.25% : 0.040428s : 20: opt.transform.opt_resolve 0.01% : 0.001162s : 1: opt.transform.opt_trans_graph 0.01% : 0.001041s : 6: opt.transform.special_op_eliminate 0.01% : 0.000882s : 4: opt.transform.symbol_engine_opt 5.25% : 0.863037s : 1: opt_a 0.01% : 0.002128s : 1: opt_after_cconv 0.03% : 0.005380s : 1: opt_b 5.36% : 0.881521s : 1: optimize 0.00% : 0.000193s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000110s : 1: order_py_execute_after_rewriter 0.00% : 0.000149s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000185s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000015s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000062s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000192s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000106s : 1: pipeline_split 0.00% : 0.000100s : 1: pre_auto_parallel 0.00% : 0.000126s : 1: py_interpret_to_execute 0.00% : 0.000194s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000139s : 1: remove_cast_before_assign_add 0.01% : 0.000916s : 1: remove_dup_value 0.94% : 0.155020s : 3: renormalize.infer 0.42% : 0.069174s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001198s : 1: rewriter_after_opt_a 0.01% : 0.001777s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000197s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000176s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.001002s : 1: symbol_engine_optimizer 81.79% : 13.454007s : 1: task_emit 0.01% : 0.001197s : 1: tuple_transform 4.38% : 0.720235s : 1: type_inference 0.01% : 0.001663s : 1: validate TotalTime = 15.1488, [21] [bootstrap]: 0.00127991 [type_inference]: 0.721969 [auto_monad]: 0.00190067 [graph_reusing]: 2.56e-05 [inline]: 0.0452707, [2] [rewriter_before_opt_a]: 0.00144525 [a1a2]: 0.0437895, [2] [Cycle 1]: 0.0310958, [11] [expand_dump_flag]: 3.207e-05 [switch_simplify]: 0.00105896 [loop_unroll]: 0.00066899 [a_1]: 0.021997 [recompute_prepare]: 0.00015942 [updatestate_depend_eliminate]: 0.00036963 [updatestate_assign_eliminate]: 8.879e-05 [updatestate_loads_eliminate]: 0.0001983 [parameter_eliminate]: 5.95e-06 [a_2]: 0.00623548 [parallel_inline_pass]: 0.00010346 [Cycle 2]: 0.00530483, [11] [expand_dump_flag]: 3.23e-06 [switch_simplify]: 9.381e-05 [loop_unroll]: 9.251e-05 [a_1]: 0.0031587 [recompute_prepare]: 0.00010116 [updatestate_depend_eliminate]: 8.227e-05 [updatestate_assign_eliminate]: 6.07e-05 [updatestate_loads_eliminate]: 6.332e-05 [parameter_eliminate]: 4.24001e-06 [a_2]: 0.00147913 [parallel_inline_pass]: 9.936e-05 [parallel-infer-symbol]: 0.00019147 [pre_auto_parallel]: 0.00010239 [insert-virtual-dataset]: 0.00140429 [parallel-infer-symbol-second]: 2.70001e-06 [dataset_repeat_opt]: 8.706e-05 [pipeline_split]: 0.00011823 [optimize]: 0.726006, [52] [py_interpret_to_execute]: 0.00014602 [rewriter_before_opt_a]: 0.0002837 [opt_a]: 0.707304, [3] [Cycle 1]: 0.602416, [46] [expand_dump_flag]: 1.99e-06 [switch_simplify]: 0.00011297 [loop_unroll]: 9.7e-05 [a_1]: 0.0033363 [recompute_prepare]: 0.00010556 [updatestate_depend_eliminate]: 0.00010262 [updatestate_assign_eliminate]: 6.355e-05 [updatestate_loads_eliminate]: 6.568e-05 [parameter_eliminate]: 3.64e-06 [a_2]: 0.00156756 [accelerated_algorithm]: 0.00023902 [shard]: 2.12999e-06 [meta_shard_fg_expand]: 5.587e-05 [shard_inline]: 0.00010611 [auto_parallel]: 7.462e-05 [parallel]: 0.0524556 [flash_sp]: 7.715e-05 [merge_comm]: 0.00017012 [allreduce_fusion]: 9.733e-05 [matmul_add_comm_reduction]: 0.00012078 [allreduce_slice_to_reducescatter]: 4.69998e-07 [virtual_shard_identity]: 0.00015759 [virtual_dataset]: 0.00019 [get_grad_eliminate_]: 0.00019578 [virtual_output]: 0.00014307 [merge_forward]: 9.281e-05 [cell_reuse_recompute_pass]: 3.24e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024979 [before_grad]: 0.00023377 [inplace_validation]: 0.00015813 [parallel_renormalize]: 0.0227152 [update_top_fg]: 9.79999e-07 [cast_eliminate]: 0.00024972 [meta_fg_expand]: 0.290498 [inplace_validation_after_expand]: 0.00189724 [flash_sp_send_recv_attached]: 0.0014786 [receive_attached]: 9.643e-05 [after_resolve]: 0.00243152 [a_after_grad]: 0.00463963 [special_op_eliminate]: 0.00221998 [renormalize]: 0.174935 [add_forward_monad_depend]: 0.00037107 [auto_monad_grad]: 0.00026255 [auto_monad_eliminator]: 0.00223513 [cse]: 0.0053935 [a_3]: 0.0321679 [Cycle 2]: 0.0888724, [46] [expand_dump_flag]: 6.376e-05 [switch_simplify]: 0.00234053 [loop_unroll]: 0.0019484 [a_1]: 0.0389222 [recompute_prepare]: 0.00024432 [updatestate_depend_eliminate]: 0.00034295 [updatestate_assign_eliminate]: 0.00014029 [updatestate_loads_eliminate]: 0.00020304 [parameter_eliminate]: 5.17e-06 [a_2]: 0.00597094 [accelerated_algorithm]: 0.00021642 [shard]: 2.2e-06 [meta_shard_fg_expand]: 0.00010028 [shard_inline]: 0.000192 [auto_parallel]: 0.00014403 [parallel]: 1.341e-05 [flash_sp]: 0.00015246 [merge_comm]: 0.00014077 [allreduce_fusion]: 0.00011702 [matmul_add_comm_reduction]: 0.00014192 [allreduce_slice_to_reducescatter]: 5.29995e-07 [virtual_shard_identity]: 0.00019593 [virtual_dataset]: 0.00018733 [get_grad_eliminate_]: 0.0001812 [virtual_output]: 0.00018444 [merge_forward]: 0.00011739 [cell_reuse_recompute_pass]: 2.48e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00032146 [before_grad]: 0.00031981 [inplace_validation]: 0.00011078 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 6.50005e-07 [cast_eliminate]: 0.00020405 [meta_fg_expand]: 0.00037169 [inplace_validation_after_expand]: 0.00023612 [flash_sp_send_recv_attached]: 2.44001e-06 [receive_attached]: 1.67999e-06 [after_resolve]: 0.00021405 [a_after_grad]: 0.00031382 [special_op_eliminate]: 0.00018646 [renormalize]: 0.0232106 [add_forward_monad_depend]: 6.87e-06 [auto_monad_grad]: 3.17e-06 [auto_monad_eliminator]: 0.00037239 [cse]: 0.00893261 [a_3]: 0.00132897 [Cycle 3]: 0.0159911, [46] [expand_dump_flag]: 3.04999e-06 [switch_simplify]: 0.00022588 [loop_unroll]: 0.00017737 [a_1]: 0.00572349 [recompute_prepare]: 0.00018799 [updatestate_depend_eliminate]: 0.0001993 [updatestate_assign_eliminate]: 0.00012764 [updatestate_loads_eliminate]: 0.00012695 [parameter_eliminate]: 4.17999e-06 [a_2]: 0.00288383 [accelerated_algorithm]: 0.00020659 [shard]: 2.2e-06 [meta_shard_fg_expand]: 7.113e-05 [shard_inline]: 0.0001827 [auto_parallel]: 0.00014859 [parallel]: 1.42e-05 [flash_sp]: 3.20999e-06 [merge_comm]: 0.00013906 [allreduce_fusion]: 0.00012451 [matmul_add_comm_reduction]: 0.00015836 [allreduce_slice_to_reducescatter]: 5.69999e-07 [virtual_shard_identity]: 0.00019023 [virtual_dataset]: 0.00018099 [get_grad_eliminate_]: 0.00017378 [virtual_output]: 0.00017597 [merge_forward]: 0.00012543 [cell_reuse_recompute_pass]: 4.33999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00032205 [before_grad]: 0.0003562 [inplace_validation]: 0.00012249 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 5.29995e-07 [cast_eliminate]: 0.00019874 [meta_fg_expand]: 0.00015459 [inplace_validation_after_expand]: 0.00016699 [flash_sp_send_recv_attached]: 2.99e-06 [receive_attached]: 1.91999e-06 [after_resolve]: 0.00020109 [a_after_grad]: 0.00029966 [special_op_eliminate]: 0.0001787 [renormalize]: 7.99919e-08 [add_forward_monad_depend]: 3.27999e-06 [auto_monad_grad]: 2.27e-06 [auto_monad_eliminator]: 0.00021851 [cse]: 0.00056914 [a_3]: 0.00129332 [py_interpret_to_execute_after_opt_a]: 0.00025113 [slice_cell_reuse_recomputed_activation]: 2.71e-06 [rewriter_after_opt_a]: 0.00110854 [convert_after_rewriter]: 0.00014266 [order_py_execute_after_rewriter]: 0.00010288 [opt_b]: 0.00539224, [1] [Cycle 1]: 0.00538299, [7] [b_1]: 0.00421879 [b_2]: 0.00018558 [updatestate_depend_eliminate]: 0.00012922 [updatestate_assign_eliminate]: 0.00011759 [updatestate_loads_eliminate]: 0.00011938 [renormalize]: 5.60001e-07 [cse]: 0.00054855 [optimize_parallel_all_gather_comm]: 0.00019007 [overlap_param_gather]: 1.19999e-06 [cconv]: 9.028e-05 [loop_unroll]: 0.00111019 [opt_after_cconv]: 0.00206495, [1] [Cycle 1]: 0.00205691, [7] [c_1]: 0.00103586 [parameter_eliminate]: 2.84999e-06 [updatestate_depend_eliminate]: 0.00016418 [updatestate_assign_eliminate]: 0.00012391 [updatestate_loads_eliminate]: 0.00012176 [cse]: 0.00054541 [renormalize]: 5.09986e-07 [remove_dup_value]: 0.00089976 [tuple_transform]: 0.0012644, [1] [Cycle 1]: 0.0012569, [2] [d_1]: 0.00123657 [renormalize]: 5.10001e-07 [partial_unused_args_eliminate]: 3.51999e-06 [add_cache_embedding]: 0.00018713 [add_recomputation]: 0.00088087 [cse_after_recomputation]: 0.00041763, [1] [Cycle 1]: 0.00040841, [1] [cse]: 0.00039301 [environ_conv]: 0.00012722 [swap_dp_allreduce_reducescatter]: 0.00017484 [bias_add_comm_swap]: 3.21001e-06 [label_micro_interleaved_index]: 1.86e-06 [label_fine_grained_interleaved_index]: 0.00067774 [merge_cast_opt]: 1.71999e-06 [slice_recompute_activation]: 0.00018909 [micro_interleaved_order_control]: 2.19001e-06 [assign_add_opt]: 0.00051303 [ForceFp32Comm]: 1.64e-06 [remove_cast_before_assign_add]: 0.00019629 [full_micro_interleaved_order_control]: 2.26e-06 [reorder_send_recv_between_fp_bp]: 1.96e-06 [comm_op_add_attrs]: 0.00020743 [add_comm_op_reuse_tag]: 0.00020801 [interleave_split_concat_branches]: 9.20001e-07 [interleave_parallel_branches]: 8.89995e-07 [overlap_opt_shard_in_pipeline]: 1.487e-05 [overlap_opt_shard_grad_in_pipeline]: 3.35e-06 [control_data_broadcast_order]: 1.18e-06 [grouped_pairwise_exchange_alltoall]: 1.242e-05 [offloading_packed_experts]: 2.78e-06 [overlap_recompute_and_grad_model_parallel]: 2.70001e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.39995e-07 [overlap_recompute_allgather_and_fa_grad]: 5.335e-05 [overlap_grad_ring_attention]: 0.00017951 [overlap_grad_flash_sp]: 0.0001464 [begin_end_overlap_inline]: 1.10001e-06 [split_matmul_comm_elemetwise]: 1.76e-06 [split_layernorm_comm]: 1.88001e-06 [handle_group_info]: 7.01001e-06 [symbol_engine_optimizer]: 0.00100863, [1] [Cycle 1]: 0.00100159, [6] [build]: 6.425e-05 [elim_shapecalc]: 0.00018631 [elim_not_effective]: 0.0002748 [opt_reshape]: 0.00017266 [fold_const_symbol]: 0.00026358 [renormalize]: 5.29995e-07 [pipeline_parallel_scheduler]: 3.78999e-06 [auto_monad_reorder]: 0.00036932 [get_jit_bprop_graph]: 8.00006e-07 [rewriter_after_jit_bprop_graph]: 4.30009e-07 [eliminate_special_op_node]: 0.00164399 [distribtued_split]: 1.49e-06 [validate]: 0.00037597 [task_emit]: 13.6466 [execute]: 1.24e-05 Sums bootstrap : 0.001280s : 0.01% type_inference : 0.721969s : 4.77% auto_monad : 0.001901s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001445s : 0.01% inline.a1a2.expand_dump_flag : 0.000035s : 0.00% inline.a1a2.switch_simplify : 0.001153s : 0.01% inline.a1a2.loop_unroll : 0.000761s : 0.01% inline.a1a2.a_1 : 0.025156s : 0.17% inline.a1a2.recompute_prepare : 0.000261s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000452s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000149s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000262s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : 0.007715s : 0.05% inline.a1a2.parallel_inline_pass : 0.000203s : 0.00% parallel-infer-symbol : 0.000191s : 0.00% pre_auto_parallel : 0.000102s : 0.00% insert-virtual-dataset : 0.001404s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000087s : 0.00% pipeline_split : 0.000118s : 0.00% optimize.py_interpret_to_execute : 0.000146s : 0.00% optimize.rewriter_before_opt_a : 0.000284s : 0.00% optimize.opt_a.expand_dump_flag : 0.000069s : 0.00% optimize.opt_a.switch_simplify : 0.002679s : 0.02% optimize.opt_a.loop_unroll : 0.002223s : 0.01% optimize.opt_a.a_1 : 0.047982s : 0.32% optimize.opt_a.recompute_prepare : 0.000538s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000645s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000331s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000396s : 0.00% optimize.opt_a.parameter_eliminate : 0.000013s : 0.00% optimize.opt_a.a_2 : 0.010422s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000662s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000227s : 0.00% optimize.opt_a.shard_inline : 0.000481s : 0.00% optimize.opt_a.auto_parallel : 0.000367s : 0.00% optimize.opt_a.parallel : 0.052483s : 0.35% optimize.opt_a.flash_sp : 0.000233s : 0.00% optimize.opt_a.merge_comm : 0.000450s : 0.00% optimize.opt_a.allreduce_fusion : 0.000339s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000421s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000544s : 0.00% optimize.opt_a.virtual_dataset : 0.000558s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000551s : 0.00% optimize.opt_a.virtual_output : 0.000503s : 0.00% optimize.opt_a.merge_forward : 0.000336s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000010s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000893s : 0.01% optimize.opt_a.before_grad : 0.000910s : 0.01% optimize.opt_a.inplace_validation : 0.000391s : 0.00% optimize.opt_a.parallel_renormalize : 0.022715s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000653s : 0.00% optimize.opt_a.meta_fg_expand : 0.291024s : 1.92% optimize.opt_a.inplace_validation_after_expand : 0.002300s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.001484s : 0.01% optimize.opt_a.receive_attached : 0.000100s : 0.00% optimize.opt_a.after_resolve : 0.002847s : 0.02% optimize.opt_a.a_after_grad : 0.005253s : 0.03% optimize.opt_a.special_op_eliminate : 0.002585s : 0.02% optimize.opt_a.renormalize : 0.198146s : 1.31% optimize.opt_a.add_forward_monad_depend : 0.000381s : 0.00% optimize.opt_a.auto_monad_grad : 0.000268s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002826s : 0.02% optimize.opt_a.cse : 0.014895s : 0.10% optimize.opt_a.a_3 : 0.034790s : 0.23% optimize.py_interpret_to_execute_after_opt_a : 0.000251s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001109s : 0.01% optimize.convert_after_rewriter : 0.000143s : 0.00% optimize.order_py_execute_after_rewriter : 0.000103s : 0.00% optimize.opt_b.b_1 : 0.004219s : 0.03% optimize.opt_b.b_2 : 0.000186s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000129s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000118s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000119s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000549s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000190s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000090s : 0.00% optimize.loop_unroll : 0.001110s : 0.01% optimize.opt_after_cconv.c_1 : 0.001036s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000164s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000124s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000122s : 0.00% optimize.opt_after_cconv.cse : 0.000545s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000900s : 0.01% optimize.tuple_transform.d_1 : 0.001237s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000187s : 0.00% optimize.add_recomputation : 0.000881s : 0.01% optimize.cse_after_recomputation.cse : 0.000393s : 0.00% optimize.environ_conv : 0.000127s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000175s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000678s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000189s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000513s : 0.00% optimize.ForceFp32Comm : 0.000002s : 0.00% optimize.remove_cast_before_assign_add : 0.000196s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000207s : 0.00% optimize.add_comm_op_reuse_tag : 0.000208s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000015s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000012s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000053s : 0.00% optimize.overlap_grad_ring_attention : 0.000180s : 0.00% optimize.overlap_grad_flash_sp : 0.000146s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000064s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000186s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000275s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000173s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000264s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000369s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001644s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000376s : 0.00% task_emit : 13.646648s : 90.15% execute : 0.000012s : 0.00% TotalTime = 15.1376, [21] [bootstrap]: 0.00123816 [type_inference]: 0.736438 [auto_monad]: 0.00203325 [graph_reusing]: 2.808e-05 [inline]: 0.0495749, [2] [rewriter_before_opt_a]: 0.00173262 [a1a2]: 0.0478017, [2] [Cycle 1]: 0.0334932, [11] [expand_dump_flag]: 3.497e-05 [switch_simplify]: 0.00122456 [loop_unroll]: 0.00080748 [a_1]: 0.0261089 [recompute_prepare]: 0.00019026 [updatestate_depend_eliminate]: 0.00037944 [updatestate_assign_eliminate]: 9.673e-05 [updatestate_loads_eliminate]: 0.00021584 [parameter_eliminate]: 6.80999e-06 [a_2]: 0.00410896 [parallel_inline_pass]: 0.00013882 [Cycle 2]: 0.00672303, [11] [expand_dump_flag]: 1.47001e-06 [switch_simplify]: 0.00012228 [loop_unroll]: 0.00011619 [a_1]: 0.00414538 [recompute_prepare]: 0.00011949 [updatestate_depend_eliminate]: 7.71e-05 [updatestate_assign_eliminate]: 6.349e-05 [updatestate_loads_eliminate]: 6.558e-05 [parameter_eliminate]: 3.09999e-06 [a_2]: 0.00181347 [parallel_inline_pass]: 0.00012376 [parallel-infer-symbol]: 0.00019009 [pre_auto_parallel]: 0.00010649 [insert-virtual-dataset]: 0.0011705 [parallel-infer-symbol-second]: 2.58001e-06 [dataset_repeat_opt]: 8.574e-05 [pipeline_split]: 8.997e-05 [optimize]: 0.73657, [52] [py_interpret_to_execute]: 0.00012088 [rewriter_before_opt_a]: 0.00031936 [opt_a]: 0.715978, [3] [Cycle 1]: 0.600611, [46] [expand_dump_flag]: 2.02001e-06 [switch_simplify]: 0.00013386 [loop_unroll]: 0.00012027 [a_1]: 0.00415644 [recompute_prepare]: 0.000125 [updatestate_depend_eliminate]: 9.682e-05 [updatestate_assign_eliminate]: 6.641e-05 [updatestate_loads_eliminate]: 7.056e-05 [parameter_eliminate]: 3.15001e-06 [a_2]: 0.00195655 [accelerated_algorithm]: 0.00026826 [shard]: 2.08999e-06 [meta_shard_fg_expand]: 5.513e-05 [shard_inline]: 0.00013147 [auto_parallel]: 7.88e-05 [parallel]: 0.0211923 [flash_sp]: 8.014e-05 [merge_comm]: 0.00018528 [allreduce_fusion]: 0.00017226 [matmul_add_comm_reduction]: 0.000126 [allreduce_slice_to_reducescatter]: 5.10001e-07 [virtual_shard_identity]: 0.00018388 [virtual_dataset]: 0.00022233 [get_grad_eliminate_]: 0.00017456 [virtual_output]: 0.0001727 [merge_forward]: 9.621e-05 [cell_reuse_recompute_pass]: 2.79999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031907 [before_grad]: 0.00030355 [inplace_validation]: 0.00016514 [parallel_renormalize]: 0.0224439 [update_top_fg]: 8.10003e-07 [cast_eliminate]: 0.00024149 [meta_fg_expand]: 0.299511 [inplace_validation_after_expand]: 0.00198822 [flash_sp_send_recv_attached]: 0.00152679 [receive_attached]: 9.987e-05 [after_resolve]: 0.00304714 [a_after_grad]: 0.00579516 [special_op_eliminate]: 0.00272882 [renormalize]: 0.185934 [add_forward_monad_depend]: 0.00036154 [auto_monad_grad]: 0.00027575 [auto_monad_eliminator]: 0.00239585 [cse]: 0.00554673 [a_3]: 0.0374669 [Cycle 2]: 0.0965879, [46] [expand_dump_flag]: 6.822e-05 [switch_simplify]: 0.00273033 [loop_unroll]: 0.00240482 [a_1]: 0.0444361 [recompute_prepare]: 0.00027695 [updatestate_depend_eliminate]: 0.00033628 [updatestate_assign_eliminate]: 0.00014513 [updatestate_loads_eliminate]: 0.00020636 [parameter_eliminate]: 4.34999e-06 [a_2]: 0.0071123 [accelerated_algorithm]: 0.00026742 [shard]: 2.35e-06 [meta_shard_fg_expand]: 0.00010378 [shard_inline]: 0.00023641 [auto_parallel]: 0.00015051 [parallel]: 1.461e-05 [flash_sp]: 0.00015566 [merge_comm]: 0.00015048 [allreduce_fusion]: 0.00013284 [matmul_add_comm_reduction]: 0.00014854 [allreduce_slice_to_reducescatter]: 5.69999e-07 [virtual_shard_identity]: 0.00024154 [virtual_dataset]: 0.00022823 [get_grad_eliminate_]: 0.00022192 [virtual_output]: 0.00028667 [merge_forward]: 0.00012987 [cell_reuse_recompute_pass]: 3.30999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00041041 [before_grad]: 0.00039815 [inplace_validation]: 0.0001162 [parallel_renormalize]: 7.99919e-08 [update_top_fg]: 6.50005e-07 [cast_eliminate]: 0.00024836 [meta_fg_expand]: 0.00038896 [inplace_validation_after_expand]: 0.00025082 [flash_sp_send_recv_attached]: 2.44001e-06 [receive_attached]: 1.86e-06 [after_resolve]: 0.00026229 [a_after_grad]: 0.00040498 [special_op_eliminate]: 0.00022569 [renormalize]: 0.0226679 [add_forward_monad_depend]: 6.05e-06 [auto_monad_grad]: 2.73e-06 [auto_monad_eliminator]: 0.00038497 [cse]: 0.00854658 [a_3]: 0.00158225 [Cycle 3]: 0.0187571, [46] [expand_dump_flag]: 2.36e-06 [switch_simplify]: 0.00021959 [loop_unroll]: 0.00021265 [a_1]: 0.00708861 [recompute_prepare]: 0.00025606 [updatestate_depend_eliminate]: 0.00019084 [updatestate_assign_eliminate]: 0.00012729 [updatestate_loads_eliminate]: 0.00012451 [parameter_eliminate]: 3.28e-06 [a_2]: 0.00334361 [accelerated_algorithm]: 0.0002935 [shard]: 1.71001e-06 [meta_shard_fg_expand]: 7.276e-05 [shard_inline]: 0.00022185 [auto_parallel]: 0.00014944 [parallel]: 9.91e-06 [flash_sp]: 2.33001e-06 [merge_comm]: 0.00014742 [allreduce_fusion]: 0.00013669 [matmul_add_comm_reduction]: 0.00015619 [allreduce_slice_to_reducescatter]: 3.6e-07 [virtual_shard_identity]: 0.00022433 [virtual_dataset]: 0.00021614 [get_grad_eliminate_]: 0.00020887 [virtual_output]: 0.00021267 [merge_forward]: 0.00012697 [cell_reuse_recompute_pass]: 3.21001e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00039651 [before_grad]: 0.00038258 [inplace_validation]: 0.0001225 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 5.50004e-07 [cast_eliminate]: 0.00023477 [meta_fg_expand]: 0.00017565 [inplace_validation_after_expand]: 0.00016849 [flash_sp_send_recv_attached]: 2.08001e-06 [receive_attached]: 1.41001e-06 [after_resolve]: 0.00025363 [a_after_grad]: 0.00037782 [special_op_eliminate]: 0.00021429 [renormalize]: 6.99947e-08 [add_forward_monad_depend]: 2.61e-06 [auto_monad_grad]: 1.42e-06 [auto_monad_eliminator]: 0.00022086 [cse]: 0.00056391 [a_3]: 0.00152803 [py_interpret_to_execute_after_opt_a]: 0.00017947 [slice_cell_reuse_recomputed_activation]: 2.32999e-06 [rewriter_after_opt_a]: 0.0013016 [convert_after_rewriter]: 0.00014797 [order_py_execute_after_rewriter]: 0.00010866 [opt_b]: 0.00623217, [1] [Cycle 1]: 0.00622316, [7] [b_1]: 0.00499703 [b_2]: 0.00022194 [updatestate_depend_eliminate]: 0.00013259 [updatestate_assign_eliminate]: 0.00012055 [updatestate_loads_eliminate]: 0.00012442 [renormalize]: 6.20013e-07 [cse]: 0.00055447 [optimize_parallel_all_gather_comm]: 0.00018293 [overlap_param_gather]: 1.49e-06 [cconv]: 8.585e-05 [loop_unroll]: 0.00102504 [opt_after_cconv]: 0.00234395, [1] [Cycle 1]: 0.00233567, [7] [c_1]: 0.00129243 [parameter_eliminate]: 2.65999e-06 [updatestate_depend_eliminate]: 0.00016745 [updatestate_assign_eliminate]: 0.00012727 [updatestate_loads_eliminate]: 0.00012701 [cse]: 0.00055148 [renormalize]: 7.00005e-07 [remove_dup_value]: 0.00088332 [tuple_transform]: 0.00153211, [1] [Cycle 1]: 0.00152358, [2] [d_1]: 0.00150104 [renormalize]: 5.60001e-07 [partial_unused_args_eliminate]: 3.61e-06 [add_cache_embedding]: 0.00018181 [add_recomputation]: 0.00091128 [cse_after_recomputation]: 0.00042413, [1] [Cycle 1]: 0.00041421, [1] [cse]: 0.0003965 [environ_conv]: 0.00012266 [swap_dp_allreduce_reducescatter]: 0.00017347 [bias_add_comm_swap]: 2.88e-06 [label_micro_interleaved_index]: 2.03999e-06 [label_fine_grained_interleaved_index]: 0.00076876 [merge_cast_opt]: 1.43e-06 [slice_recompute_activation]: 0.00021853 [micro_interleaved_order_control]: 2.23001e-06 [assign_add_opt]: 0.00051213 [ForceFp32Comm]: 1.61001e-06 [remove_cast_before_assign_add]: 0.00013782 [full_micro_interleaved_order_control]: 2.78e-06 [reorder_send_recv_between_fp_bp]: 1.62999e-06 [comm_op_add_attrs]: 0.00021748 [add_comm_op_reuse_tag]: 0.00023053 [interleave_split_concat_branches]: 1.09e-06 [interleave_parallel_branches]: 9.39996e-07 [overlap_opt_shard_in_pipeline]: 1.159e-05 [overlap_opt_shard_grad_in_pipeline]: 3.47001e-06 [control_data_broadcast_order]: 1.11001e-06 [grouped_pairwise_exchange_alltoall]: 1.208e-05 [offloading_packed_experts]: 2.56e-06 [overlap_recompute_and_grad_model_parallel]: 2.17999e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.14e-06 [overlap_recompute_allgather_and_fa_grad]: 6.334e-05 [overlap_grad_ring_attention]: 0.00023155 [overlap_grad_flash_sp]: 0.00018859 [begin_end_overlap_inline]: 8.59989e-07 [split_matmul_comm_elemetwise]: 2.28999e-06 [split_layernorm_comm]: 2.2e-06 [handle_group_info]: 6.79999e-06 [symbol_engine_optimizer]: 0.00122709, [1] [Cycle 1]: 0.00121975, [6] [build]: 6.221e-05 [elim_shapecalc]: 0.00021887 [elim_not_effective]: 0.00034819 [opt_reshape]: 0.00021159 [fold_const_symbol]: 0.00033536 [renormalize]: 5.20013e-07 [pipeline_parallel_scheduler]: 4.24001e-06 [auto_monad_reorder]: 0.00039249 [get_jit_bprop_graph]: 6.10002e-07 [rewriter_after_jit_bprop_graph]: 4.29995e-07 [eliminate_special_op_node]: 0.00182524 [distribtued_split]: 1.51001e-06 [validate]: 0.00041075 [task_emit]: 13.6059 [execute]: 1.273e-05 Sums bootstrap : 0.001238s : 0.01% type_inference : 0.736438s : 4.87% auto_monad : 0.002033s : 0.01% graph_reusing : 0.000028s : 0.00% inline.rewriter_before_opt_a : 0.001733s : 0.01% inline.a1a2.expand_dump_flag : 0.000036s : 0.00% inline.a1a2.switch_simplify : 0.001347s : 0.01% inline.a1a2.loop_unroll : 0.000924s : 0.01% inline.a1a2.a_1 : 0.030254s : 0.20% inline.a1a2.recompute_prepare : 0.000310s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000457s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000160s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000281s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : 0.005922s : 0.04% inline.a1a2.parallel_inline_pass : 0.000263s : 0.00% parallel-infer-symbol : 0.000190s : 0.00% pre_auto_parallel : 0.000106s : 0.00% insert-virtual-dataset : 0.001170s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000086s : 0.00% pipeline_split : 0.000090s : 0.00% optimize.py_interpret_to_execute : 0.000121s : 0.00% optimize.rewriter_before_opt_a : 0.000319s : 0.00% optimize.opt_a.expand_dump_flag : 0.000073s : 0.00% optimize.opt_a.switch_simplify : 0.003084s : 0.02% optimize.opt_a.loop_unroll : 0.002738s : 0.02% optimize.opt_a.a_1 : 0.055681s : 0.37% optimize.opt_a.recompute_prepare : 0.000658s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000624s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000339s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000401s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.012412s : 0.08% optimize.opt_a.accelerated_algorithm : 0.000829s : 0.01% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000232s : 0.00% optimize.opt_a.shard_inline : 0.000590s : 0.00% optimize.opt_a.auto_parallel : 0.000379s : 0.00% optimize.opt_a.parallel : 0.021217s : 0.14% optimize.opt_a.flash_sp : 0.000238s : 0.00% optimize.opt_a.merge_comm : 0.000483s : 0.00% optimize.opt_a.allreduce_fusion : 0.000442s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000431s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000650s : 0.00% optimize.opt_a.virtual_dataset : 0.000667s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000605s : 0.00% optimize.opt_a.virtual_output : 0.000672s : 0.00% optimize.opt_a.merge_forward : 0.000353s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.001126s : 0.01% optimize.opt_a.before_grad : 0.001084s : 0.01% optimize.opt_a.inplace_validation : 0.000404s : 0.00% optimize.opt_a.parallel_renormalize : 0.022444s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000725s : 0.00% optimize.opt_a.meta_fg_expand : 0.300075s : 1.98% optimize.opt_a.inplace_validation_after_expand : 0.002408s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.001531s : 0.01% optimize.opt_a.receive_attached : 0.000103s : 0.00% optimize.opt_a.after_resolve : 0.003563s : 0.02% optimize.opt_a.a_after_grad : 0.006578s : 0.04% optimize.opt_a.special_op_eliminate : 0.003169s : 0.02% optimize.opt_a.renormalize : 0.208602s : 1.38% optimize.opt_a.add_forward_monad_depend : 0.000370s : 0.00% optimize.opt_a.auto_monad_grad : 0.000280s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.003002s : 0.02% optimize.opt_a.cse : 0.014657s : 0.10% optimize.opt_a.a_3 : 0.040577s : 0.27% optimize.py_interpret_to_execute_after_opt_a : 0.000179s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001302s : 0.01% optimize.convert_after_rewriter : 0.000148s : 0.00% optimize.order_py_execute_after_rewriter : 0.000109s : 0.00% optimize.opt_b.b_1 : 0.004997s : 0.03% optimize.opt_b.b_2 : 0.000222s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000133s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000121s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000124s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000554s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000183s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000086s : 0.00% optimize.loop_unroll : 0.001025s : 0.01% optimize.opt_after_cconv.c_1 : 0.001292s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000167s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000127s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000127s : 0.00% optimize.opt_after_cconv.cse : 0.000551s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000883s : 0.01% optimize.tuple_transform.d_1 : 0.001501s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000182s : 0.00% optimize.add_recomputation : 0.000911s : 0.01% optimize.cse_after_recomputation.cse : 0.000397s : 0.00% optimize.environ_conv : 0.000123s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000173s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000769s : 0.01% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000219s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000512s : 0.00% optimize.ForceFp32Comm : 0.000002s : 0.00% optimize.remove_cast_before_assign_add : 0.000138s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000217s : 0.00% optimize.add_comm_op_reuse_tag : 0.000231s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000012s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000012s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000063s : 0.00% optimize.overlap_grad_ring_attention : 0.000232s : 0.00% optimize.overlap_grad_flash_sp : 0.000189s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000062s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000219s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000348s : 0.00% optimize.symbol_engine_optimizer Time group info: ------[substitution.] 0.054467 4902 0.03% : 0.000018s : 4: substitution.ad_related_special_op_eliminate 0.04% : 0.000023s : 9: substitution.addn_check_dump 0.10% : 0.000057s : 7: substitution.addn_zero_filter 0.03% : 0.000017s : 7: substitution.adjust_all_reduce_mul_add 0.63% : 0.000340s : 71: substitution.arithmetic_simplify 0.11% : 0.000057s : 10: substitution.cast_eliminate 0.11% : 0.000060s : 47: substitution.depend_value_elim 0.07% : 0.000036s : 127: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.07% : 0.000037s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000019s : 12: substitution.environ_get_depend_swap 0.05% : 0.000028s : 27: substitution.environ_get_eliminate 0.06% : 0.000034s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000024s : 28: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.02% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.07% : 0.000036s : 127: substitution.fold_const_symbol 64.09% : 0.034908s : 290: substitution.getattr_setattr_resolve 0.20% : 0.000107s : 165: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000006s : 8: substitution.incorporate_call_switch 23.94% : 0.013037s : 363: substitution.inline 1.45% : 0.000792s : 127: substitution.inline_without_move 0.27% : 0.000147s : 361: substitution.j_node_and_user_rematch 0.24% : 0.000133s : 40: substitution.less_batch_normalization 0.08% : 0.000046s : 90: substitution.load_eliminater 0.10% : 0.000056s : 10: substitution.merge_addn 0.23% : 0.000124s : 115: substitution.minmaximum_grad 0.01% : 0.000004s : 10: substitution.opt_reshape 0.02% : 0.000013s : 1: substitution.partial_defer_inline 0.12% : 0.000064s : 28: substitution.partial_eliminate 0.04% : 0.000022s : 35: substitution.reduce_all_const_elim 0.06% : 0.000033s : 15: substitution.reduce_eliminate 0.34% : 0.000186s : 361: substitution.remove_not_recompute_node 2.20% : 0.001201s : 612: substitution.replace_applicator 0.32% : 0.000177s : 324: substitution.replace_old_param 0.20% : 0.000111s : 31: substitution.reshape_eliminate 0.03% : 0.000014s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000013s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.18% : 0.000098s : 36: substitution.switch_simplify 0.06% : 0.000032s : 11: substitution.tile_eliminate 0.53% : 0.000288s : 115: substitution.tuple_list_convert_item_index_to_positive 0.26% : 0.000144s : 121: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000226s : 121: substitution.tupl.opt_reshape : 0.000212s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000335s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000392s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001825s : 0.01% distribtued_split : 0.000002s : 0.00% validate : 0.000411s : 0.00% task_emit : 13.605927s : 89.95% execute : 0.000013s : 0.00% e_list_get_item_depend_reorder 1.62% : 0.000882s : 356: substitution.tuple_list_get_item_eliminator 0.38% : 0.000205s : 121: substitution.tuple_list_get_set_item_eliminator 0.36% : 0.000196s : 210: substitution.updatestate_pure_node_eliminater 0.65% : 0.000353s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.721515 2 96.61% : 0.697051s : 1: type_inference.infer 3.39% : 0.024465s : 1: type_inference.specialize ------[replace.] 0.011201 851 0.33% : 0.000037s : 4: replace.ad_related_special_op_eliminate 0.07% : 0.000008s : 1: replace.arithmetic_simplify 0.56% : 0.000063s : 7: replace.depend_value_elim 0.37% : 0.000041s : 3: replace.environ_get_set_eliminate 28.40% : 0.003181s : 189: replace.getattr_setattr_resolve 30.25% : 0.003389s : 342: replace.inline 0.22% : 0.000024s : 1: replace.merge_addn 1.02% : 0.000114s : 7: replace.partial_eliminate 3.87% : 0.000433s : 28: replace.replace_applicator 3.53% : 0.000395s : 36: replace.switch_simplify 0.45% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 30.60% : 0.003428s : 225: replace.tuple_list_get_item_eliminator 0.15% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.19% : 0.000021s : 1: replace.virtual_dataset_eliminate ------[match.] 0.043575 851 0.03% : 0.000015s : 4: match.ad_related_special_op_eliminate 0.03% : 0.000012s : 1: match.arithmetic_simplify 0.01% : 0.000005s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 68.76% : 0.029960s : 189: match.getattr_setattr_resolve 29.32% : 0.012778s : 342: match.inline 0.06% : 0.000027s : 1: match.merge_addn 0.09% : 0.000040s : 7: match.partial_eliminate 0.25% : 0.000110s : 28: match.replace_applicator 0.18% : 0.000077s : 36: match.switch_simplify 0.07% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 1.11% : 0.000484s : 225: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000011s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.024906157659 0.71% : 0.000176s : 1382: predicate.accumulaten_eliminater 0.28% : 0.000071s : 331: predicate.ad_related_special_op_eliminate 0.54% : 0.000133s : 998: predicate.addn_check_dump 0.76% : 0.000189s : 1382: predicate.addn_zero_filter 0.71% : 0.000178s : 1382: predicate.adjust_all_reduce_mul_add 1.74% : 0.000433s : 2381: predicate.arithmetic_simplify 1.07% : 0.000268s : 1907: predicate.cast_eliminate 3.37% : 0.000840s : 4404: predicate.check_bprop_eliminate 0.74% : 0.000185s : 998: predicate.compare_switch_simplify 0.06% : 0.000015s : 184: predicate.const_output_eliminate 0.19% : 0.000047s : 322: predicate.convert_tensor_all_eliminate 1.07% : 0.000267s : 1617: predicate.convert_tensor_eliminate 0.59% : 0.000146s : 1001: predicate.depend_value_elim 0.97% : 0.000242s : 1386: predicate.dict_get_item_const_eliminator 0.79% : 0.000196s : 1386: predicate.dict_get_item_eliminator 0.77% : 0.000192s : 1386: predicate.dict_set_item_eliminator 0.05% : 0.000012s : 165: predicate.elim_not_effective 0.11% : 0.000028s : 165: predicate.elim_shapecalc_of_broadcastargs 0.81% : 0.000202s : 1567: predicate.environ_add_const_eliminate 0.81% : 0.000202s : 1570: predicate.environ_get_add_eliminate 0.82% : 0.000205s : 1567: predicate.environ_get_depend_swap 1.38% : 0.000343s : 2568: predicate.environ_get_eliminate 0.82% : 0.000203s : 1570: predicate.environ_get_set_eliminate 1.07% : 0.000266s : 1967: predicate.exchange_switch_depend_value 1.36% : 0.000338s : 1967: predicate.float_d epend_g_call 0.55% : 0.000137s : 998: predicate.float_environ_get_switch 0.64% : 0.000158s : 1182: predicate.float_tuple_getitem_switch 0.05% : 0.000012s : 165: predicate.fold_const_symbol 0.31% : 0.000078s : 532: predicate.get_grad_eliminate 2.13% : 0.000532s : 2090: predicate.getattr_setattr_resolve 0.06% : 0.000015s : 165: predicate.graph_param_transform 0.54% : 0.000133s : 998: predicate.incorporate_call 0.52% : 0.000131s : 998: predicate.incorporate_call_switch 3.73% : 0.000930s : 5403: predicate.inline 2.38% : 0.000594s : 2684: predicate.inline_without_move 0.17% : 0.000041s : 532: predicate.j_node_and_user_rematch 0.34% : 0.000084s : 494: predicate.less_batch_normalization 1.10% : 0.000274s : 1966: predicate.list_to_tuple_eliminator_ 1.77% : 0.000440s : 3374: predicate.load_eliminater 0.21% : 0.000051s : 184: predicate.loop_unroll_after_grad 2.34% : 0.000582s : 3092: predicate.loop_unroll_before_grad 1.00% : 0.000249s : 1760: predicate.make_slice_get_slice_eliminator 0.55% : 0.000138s : 1000: predicate.merge_addn 3.48% : 0.000866s : 4288: predicate.micro_step_allgather_replace 3.26% : 0.000813s : 4288: predicate.mini_step_allgather_replace 0.73% : 0.000182s : 1383: predicate.minmaximum_grad 0.20% : 0.000049s : 322: predicate.mutable_eliminate 0.10% : 0.000025s : 165: predicate.opt_reshape 0.11% : 0.000028s : 184: predicate.parallel_virtual_node 2.16% : 0.000537s : 1967: predicate.partial_defer_inline 1.08% : 0.000268s : 1808: predicate.partial_eliminate 0.76% : 0.000188s : 1382: predicate.print_const_string_wrapper 0.59% : 0.000148s : 987: predicate.reduce_all_const_elim 0.90% : 0.000225s : 1383: predicate.reduce_eliminate 0.16% : 0.000039s : 532: predicate.remove_not_recompute_node 2.03% : 0.000506s : 5961: predicate.replace_applicator 0.81% : 0.000202s : 2684: predicate.replace_old_param 0.06% : 0.000015s : 184: predicate.reset_defer_inline 0.73% : 0.000181s : 1383: predicate.reshape_eliminate 3.49% : 0.000869s : 4288: predicate.row_tensor_add_zeros_like 0.12% : 0.000029s : 184: predicate.row_tensor_eliminate 3.55% : 0.000884s : 4404: predicate.same_eliminate 0.24% : 0.000059s : 739: predicate.set_cell_output_no_recompute 0.32% : 0.000080s : 532: predicate.shard_identity_eliminate 2.17% : 0.000541s : 2868: predicate.special_op_eliminate 0.62% : 0.000154s : 1000: predicate.specialize_transform 3.70% : 0.000921s : 4288: predicate.split_environ_get_set_with_tuple_value 1.57% : 0.000392s : 2684: predicate.stack_unstack_eliminate 1.75% : 0.000436s : 3374: predicate.stopgrad_eliminater 0.10% : 0.000025s : 184: predicate.switch_call_monad_eliminater 1.20% : 0.000300s : 1967: predicate.switch_defer_inline 4.52% : 0.001126s : 6371: predicate.switch_layer_defer_inline 4.20% : 0.001047s : 6131: predicate.switch_simplify 0.74% : 0.000183s : 1383: predicate.tile_eliminate 0.70% : 0.000175s : 1383: predicate.transpose_eliminate 1.03% : 0.000256s : 1735: predicate.tuple_list_convert_item_index_to_positive 1.05% : 0.000262s : 1741: predicate.tuple_list_get_item_const_eliminator 1.05% : 0.000262s : 1741: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000468s : 2964: predicate.tuple_list_get_item_eliminator 0.95% : 0.000236s : 1741: predicate.tuple_list_get_set_item_eliminator 1.79% : 0.000445s : 2739: predicate.tuple_list_set_item_eliminator 1.08% : 0.000269s : 1966: predicate.tuple_to_list_eliminator_ 1.77% : 0.000441s : 3374: predicate.updatestate_pure_node_eliminater 2.43% : 0.000604s : 4373: predicate.updatestate_useless_node_eliminater 0.11% : 0.000028s : 184: predicate.value_based_eliminate 0.31% : 0.000078s : 534: predicate.virtual_dataset_eliminate 0.30% : 0.000076s : 532: predicate.virtual_output_eliminate 0.12% : 0.000029s : 184: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.065900 841 68.54% : 0.045166s : 383: func_graph_cloner_run.FuncGraphClonerGraph 2.27% : 0.001495s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.19% : 0.019239s : 436: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.340956 350 0.00% : 0.000006s : 1: ForceFp32Comm 0.27% : 0.043794s : 1: a1a2 0.00% : 0.000194s : 1: add_cache_embedding 0.00% : 0.000216s : 1: add_comm_op_reuse_tag 0.01% : 0.000895s : 1: add_recomputation 0.00% : 0.000525s : 1: assign_add_opt 0.01% : 0.001925s : 1: auto_monad 0.00% : 0.000384s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001330s : 1: bootstrap 0.00% : 0.000097s : 1: cconv 0.00% : 0.000216s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000150s : 1: convert_after_rewriter 0.00% : 0.000423s : 1: cse_after_recomputation 0.00% : 0.000095s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.01% : 0.001660s : 1: eliminate_special_op_node 0.00% : 0.000136s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000016s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.28% : 0.045281s : 1: inline 0.01% : 0.001428s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000688s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001121s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.22% : 0.035172s : 61: opt.transform.a1a2 0.00% : 0.000235s : 1: opt.transform.loop_unroll_optimizer 0.70% : 0.114797s : 148: opt.transform.opt_a 0.01% : 0.001033s : 1: opt.transform.opt_after_cconv 0.03% : 0.004369s : 27: opt.transform.opt_b 0.25% : 0.041348s : 20: opt.transform.opt_resolve 0.01% : 0.001233s : 1: opt.transform.opt_trans_graph 0.01% : 0.001110s : 6: opt.transform.special_op_eliminate 0.01% : 0.000891s : 4: opt.transform.symbol_engine_opt 4.33% : 0.707311s : 1: opt_a 0.01% : 0.002071s : 1: opt_after_cconv 0.03% : 0.005398s : 1: opt_b 4.44% : 0.726019s : 1: optimize 0.00% : 0.000199s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000109s : 1: order_py_execute_after_rewriter 0.00% : 0.000151s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000185s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000019s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000058s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000204s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000129s : 1: pipeline_split 0.00% : 0.000112s : 1: pre_auto_parallel 0.00% : 0.000155s : 1: py_interpret_to_execute 0.00% : 0.000262s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000204s : 1: remove_cast_before_assign_add 0.01% : 0.000915s : 1: remove_dup_value 0.95% : 0.155019s : 3: renormalize.infer 0.40% : 0.065799s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001121s : 1: rewriter_after_opt_a 0.01% : 0.001752s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000196s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000182s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.001012s : 1: symbol_engine_optimizer 83.51% : 13.646696s : 1: task_emit 0.01% : 0.001270s : 1: tuple_transform 4.42% : 0.722002s : 1: type_inference 0.01% : 0.001437s : 1: validate Time group info: ------[substitution.] 0.056127 4902 0.03% : 0.000019s : 4: substitution.ad_related_special_op_eliminate 0.05% : 0.000025s : 9: substitution.addn_check_dump 0.10% : 0.000058s : 7: substitution.addn_zero_filter 0.03% : 0.000017s : 7: substitution.adjust_all_reduce_mul_add 0.66% : 0.000368s : 71: substitution.arithmetic_simplify 0.10% : 0.000058s : 10: substitution.cast_eliminate 0.12% : 0.000069s : 47: substitution.depend_value_elim 0.09% : 0.000050s : 127: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.07% : 0.000037s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000022s : 12: substitution.environ_get_depend_swap 0.06% : 0.000036s : 27: substitution.environ_get_eliminate 0.07% : 0.000037s : 15: substitution.environ_get_set_eliminate 0.05% : 0.000028s : 28: substitution.float_depend_g_call 0.02% : 0.000012s : 12: substitution.float_environ_get_switch 0.02% : 0.000013s : 10: substitution.float_tuple_getitem_switch 0.09% : 0.000051s : 127: substitution.fold_const_symbol 63.30% : 0.035531s : 290: substitution.getattr_setattr_resolve 0.24% : 0.000136s : 165: substitution.graph_param_transform 0.02% : 0.000010s : 8: substitution.incorporate_call 0.01% : 0.000007s : 8: substitution.incorporate_call_switch 23.30% : 0.013078s : 363: substitution.inline 1.54% : 0.000863s : 127: substitution.inline_without_move 0.35% : 0.000198s : 361: substitution.j_node_and_user_rematch 0.32% : 0.000179s : 40: substitution.less_batch_normalization 0.10% : 0.000059s : 90: substitution.load_eliminater 0.10% : 0.000056s : 10: substitution.merge_addn 0.26% : 0.000144s : 115: substitution.minmaximum_grad 0.01% : 0.000005s : 10: substitution.opt_reshape 0.03% : 0.000015s : 1: substitution.partial_defer_inline 0.12% : 0.000067s : 28: substitution.partial_eliminate 0.05% : 0.000027s : 35: substitution.reduce_all_const_elim 0.14% : 0.000080s : 15: substitution.reduce_eliminate 0.49% : 0.000274s : 361: substitution.remove_not_recompute_node 2.62% : 0.001473s : 612: substitution.replace_applicator 0.32% : 0.000182s : 324: substitution.replace_old_param 0.21% : 0.000117s : 31: substitution.reshape_eliminate 0.03% : 0.000016s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000012s : 4: substitution.specialize_transform 0.04% : 0.000020s : 12: substitution.split_environ_get_set_with_tuple_value 0.20% : 0.000113s : 36: substitution.switch_simplify 0.06% : 0.000032s : 11: substitution.tile_eliminate 0.56% : 0.000316s : 115: substitution.tuple_list_convert_item_index_to_positive 0.30% : 0.000166s : 121: substitution.tuple_list_get_item_const_eliminator 0.44% : 0.000246s : 121: substitution.tuple_list_get_item_depend_reorder 1.68% : 0.000944s : 356: substitution.tuple_list_get_item_eliminator 0.39% : 0.000220s : 121: substitution.tuple_list_get_set_item_eliminator 0.41% : 0.000229s : 210: substitution.updatestate_pure_node_eliminater 0.71% : 0.000399s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.735983 2 96.45% : 0.709861s : 1: type_inference.infer 3.55% : 0.026122s : 1: type_inference.specialize ------[replace.] 0.010957 851 0.33% : 0.000036s : 4: replace.ad_related_special_op_eliminate 0.06% : 0.000007s : 1: replace.arithmetic_simplify 0.43% : 0.000047s : 7: replace.depend_value_elim 0.37% : 0.000041s : 3: replace.environ_get_set_eliminate 29.16% : 0.003195s : 189: replace.getattr_setattr_resolve 30.27% : 0.003317s : 342: replace.inline 0.23% : 0.000025s : 1: replace.merge_addn 1.04% : 0.000113s : 7: replace.partial_eliminate 3.99% : 0.000437s : 28: replace.replace_applicator 3.58% : 0.000393s : 36: replace.switch_simplify 0.46% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 29.74% : 0.003258s : 225: replace.tuple_list_get_item_eliminator 0.15% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.18% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.044149 851 0.03% : 0.000015s : 4: match.ad_related_special_op_eliminate 0.03% : 0.000012s : 1: match.arithmetic_simplify 0.01% : 0.000004s : 7: match.depend_value_elim 0.04% : 0.000018s : 3: match.environ_get_set_eliminate 69.05% : 0.030484s : 189: match.getattr_setattr_resolve 28.96% : 0.012787s : 342: match.inline 0.06% : 0.000025s : 1: match.merge_addn 0.09% : 0.000040s : 7: match.partial_eliminate 0.28% : 0.000124s : 28: match.replace_applicator 0.20% : 0.000088s : 36: match.switch_simplify 0.08% : 0.000033s : 6: match.tuple_list_get_item_depend_reorder 1.13% : 0.000499s : 225: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.028970157527 0.75% : 0.000218s : 1381: predicate.accumulaten_eliminater 0.27% : 0.000077s : 331: predicate.ad_related_special_op_eliminate 0.55% : 0.000160s : 997: predicate.addn_check_dump 0.73% : 0.000212s : 1381: predicate.addn_zero_filter 0.75% : 0.000218s : 1381: predicate.adjust_all_reduce_mul_add 1.72% : 0.000498s : 2379: predicate.arithmetic_simplify 1.10% : 0.000318s : 1905: predicate.cast_eliminate 3.35% : 0.000969s : 4402: predicate.check_bprop_eliminate 0.56% : 0.000161s : 997: predicate.compare_switch_simplify 0.06% : 0.000018s : 183: predicate.const_output_eliminate 0.18% : 0.000053s : 322: predicate.convert_tensor_all_eliminate 1.14% : 0.000331s : 1616: predicate.convert_tensor_eliminate 0.57% : 0.000166s : 1000: predicate.depend_value_elim 0.78% : 0.000226s : 1385: predicate.dict_get_item_const_eliminator 0.80% : 0.000232s : 1385: predicate.dict_get_item_eliminator 0.78% : 0.000227s : 1385: predicate.dict_set_item_eliminator 0.05% : 0.000015s : 165: predicate.elim_not_effective 0.11% : 0.000032s : 165: predicate.elim_shapecalc_of_broadcastargs 0.82% : 0.000237s : 1565: predicate.environ_add_const_eliminate 0.84% : 0.000243s : 1568: predicate.environ_get_add_eliminate 0.82% : 0.000239s : 1565: predicate.environ_get_depend_swap 1.42% : 0.000412s : 2565: predicate.environ_get_eliminate 0.82% : 0.000238s : 1568: predicate.environ_get_set_eliminate 1.09% : 0.000315s : 1966: predicate.exchange_switch_depend_value 1.42% : 0.000412s : 1966: predicate.float_depend_g_call 0.70% : 0.000202s : 997: predicate.float_environ_get_switch 0.66% : 0.000190s : 1180: predicate.float_tuple_getitem_switch 0.05% : 0.000015s : 165: predicate.fold_const_symbol 0.31% : 0.000090s : 531: predicate.get_grad_eliminate 2.03% : 0.000587s : 2090: predicate.getattr_setattr_resolve 0.06% : 0.000017s : 165: predicate.graph_param_transform 0.56% : 0.000163s : 997: predicate.incorporate_call 0.55% : 0.000161s : 997: predicate.incorporate_call_switch 4.18% : 0.001210s : 5397: predicate.inline 2.44% : 0.000706s : 2683: predicate.inline_without_move 0.18% : 0.000051s : 531: predicate.j_node_and_user_rematch 0.35% : 0.000100s : 493: predicate.less_batch_normalization 1.14% : 0.000330s : 1964: predicate.list_to_tuple_eliminator_ 1.81% : 0.000524s : 3370: predicate.load_eliminater 0.18% : 0.000053s : 183: predicate.loop_unroll_after_grad 2.28% : 0.000661s : 3091: predicate.loop_unroll_before_grad 0.94% : 0.000273s : 1757: predicate.make_slice_get_slice_eliminator 0.57% : 0.000165s : 999: predicate.merge_addn 3.25% : 0.000940s : 4286: predicate.micro_step_allgather_replace 3.24% : 0.000938s : 4286: predicate.mini_step_allgather_replace 0.76% : 0.000220s : 1382: predicate.minmaximum_grad 0.19% : 0.000056s : 322: predicate.mutable_eliminate 0.10% : 0.000029s : 165: predicate.opt_reshape 0.11% : 0.000033s : 183: predicate.parallel_virtual_node 1.88% : 0.000544s : 1966: predicate.partial_defer_inline 1.15% : 0.000333s : 1806: predicate.partial_eliminate 0.72% : 0.000209s : 1381: predicate.print_const_string_wrapper 0.57% : 0.000165s : 986: predicate.reduce_all_const_elim 0.87% : 0.000253s : 1382: predicate.reduce_eliminate 0.17% : 0.000049s : 531: predicate.remove_not_recompute_node 2.11% : 0.000610s : 5958: predicate.replace_applicator 0.96% : 0.000278s : 2683: predicate.replace_old_param 0.06% : 0.000018s : 183: predicate.reset_defer_inline 0.74% : 0.000215s : 1382: predicate.reshape_eliminate 3.28% : 0.000950s : 4286: predicate.row_tensor_add_zeros_like 0.11% : 0.000033s : 183: predicate.row_tensor_eliminate 3.64% : 0.001055s : 4402: predicate.same_eliminate 0.25% : 0.000073s : 738: predicate.set_cell_output_no_recompute 0.33% : 0.000096s : 531: predicate.shard_identity_eliminate 2.13% : 0.000617s : 2866: predicate.special_op_eliminate 0.68% : 0.000197s : 999: predicate.specialize_transform 3.50% : 0.001015s : 4286: predicate.split_environ_get_set_with_tuple_value 1.69% : 0.000489s : 2683: predicate.stack_unstack_eliminate 1.81% : 0.000525s : 3370: predicate.stopgrad_eliminater 0.11% : 0.000031s : 183: predicate.switch_call_monad_eliminater 1.19% : 0.000345s : 1966: predicate.switch_defer_inline 4.50% : 0.001305s : 6368: predicate.switch_layer_defer_inline 4.15% : 0.001203s : 6128: predicate.switch_simplify 0.76% : 0.000220s : 1382: predicate.tile_eliminate 0.72% : 0.000207s : 1382: predicate.transpose_eliminate 1.04% : 0.000302s : 1733: predicate.tuple_list_convert_item_index_to_positive 1.12% : 0.000323s : 1739: predicate.tuple_list_get_item_const_eliminator 0.92% : 0.000265s : 1739: predicate.tuple_list_get_item_depend_reorder 1.91% : 0.000554s : 2961: predicate.tuple_list_get_item_eliminator 0.99% : 0.000287s : 1739: predicate.tuple_list_get_set_item_eliminator 1.63% : 0.000472s : 2736: predicate.tuple_list_set_item_eliminator 1.06% : 0.000307s : 1964: predicate.tuple_to_list_eliminator_ 1.82% : 0.000527s : 3370: predicate.updatestate_pure_node_eliminater 2.44% : 0.000708s : 4368: predicate.updatestate_useless_node_eliminater 0.12% : 0.000035s : 183: predicate.value_based_eliminate 0.32% : 0.000092s : 533: predicate.virtual_dataset_eliminate 0.30% : 0.000088s : 531: predicate.virtual_output_eliminate 0.12% : 0.000034s : 183: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.067756 841 68.32% : 0.046290s : 383: func_graph_cloner_run.FuncGraphClonerGraph 2.26% : 0.001529s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.42% : 0.019936s : 436: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.381664 350 0.00% : 0.000007s : 1: ForceFp32Comm 0.29% : 0.047806s : 1: a1a2 0.00% : 0.000190s : 1: add_cache_embedding 0.00% : 0.000239s : 1: add_comm_op_reuse_tag 0.01% : 0.000925s : 1: add_recomputation 0.00% : 0.000524s : 1: assign_add_opt 0.01% : 0.002059s : 1: auto_monad 0.00% : 0.000409s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001290s : 1: bootstrap 0.00% : 0.000094s : 1: cconv 0.00% : 0.000226s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000156s : 1: convert_after_rewriter 0.00% : 0.000430s : 1: cse_after_recomputation 0.00% : 0.000094s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.01% : 0.001842s : 1: eliminate_special_op_node 0.00% : 0.000133s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000007s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000037s : 1: graph_reusing 0.00% : 0.000016s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.30% : 0.049584s : 1: inline 0.01% : 0.001194s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000780s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001037s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.24% : 0.038935s : 61: opt.transform.a1a2 0.00% : 0.000265s : 1: opt.transform.loop_unroll_optimizer 0.82% : 0.135109s : 148: opt.transform.opt_a 0.01% : 0.001290s : 1: opt.transform.opt_after_cconv 0.03% : 0.005180s : 27: opt.transform.opt_b 0.26% : 0.042520s : 20: opt.transform.opt_resolve 0.01% : 0.001497s : 1: opt.transform.opt_trans_graph 0.01% : 0.001246s : 6: opt.transform.special_op_eliminate 0.01% : 0.001107s : 4: opt.transform.symbol_engine_opt 4.37% : 0.715988s : 1: opt_a 0.01% : 0.002351s : 1: opt_after_cconv 0.04% : 0.006237s : 1: opt_b 4.50% : 0.736582s : 1: optimize 0.00% : 0.000194s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000115s : 1: order_py_execute_after_rewriter 0.00% : 0.000194s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000238s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000016s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000068s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000203s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000010s : 1: pipeline_parallel_scheduler 0.00% : 0.000100s : 1: pipeline_split 0.00% : 0.000116s : 1: pre_auto_parallel 0.00% : 0.000129s : 1: py_interpret_to_execute 0.00% : 0.000191s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000145s : 1: remove_cast_before_assign_add 0.01% : 0.000899s : 1: remove_dup_value 0.98% : 0.160131s : 3: renormalize.infer 0.43% : 0.070871s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001315s : 1: rewriter_after_opt_a 0.01% : 0.002077s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000229s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000182s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.001231s : 1: symbol_engine_optimizer 83.06% : 13.605969s : 1: task_emit 0.01% : 0.001537s : 1: tuple_transform 4.50% : 0.736473s : 1: type_inference 0.01% : 0.001485s : 1: validate TotalTime = 15.214, [21] [bootstrap]: 0.00116259 [type_inference]: 0.709704 [auto_monad]: 0.00184658 [graph_reusing]: 2.58e-05 [inline]: 0.0416367, [2] [rewriter_before_opt_a]: 0.00145285 [a1a2]: 0.0401485, [2] [Cycle 1]: 0.0275654, [11] [expand_dump_flag]: 3.096e-05 [switch_simplify]: 0.00103027 [loop_unroll]: 0.0007012 [a_1]: 0.0215238 [recompute_prepare]: 0.00015878 [updatestate_depend_eliminate]: 0.0003494 [updatestate_assign_eliminate]: 8.867e-05 [updatestate_loads_eliminate]: 0.00018867 [parameter_eliminate]: 4.2e-06 [a_2]: 0.00323149 [parallel_inline_pass]: 0.000101 [Cycle 2]: 0.00543361, [11] [expand_dump_flag]: 1.03e-06 [switch_simplify]: 9.354e-05 [loop_unroll]: 9.207e-05 [a_1]: 0.00318504 [recompute_prepare]: 9.761e-05 [updatestate_depend_eliminate]: 7.926e-05 [updatestate_assign_eliminate]: 6.304e-05 [updatestate_loads_eliminate]: 6.39e-05 [parameter_eliminate]: 2.42e-06 [a_2]: 0.00157197 [parallel_inline_pass]: 0.00011007 [parallel-infer-symbol]: 0.00024966 [pre_auto_parallel]: 9.494e-05 [insert-virtual-dataset]: 0.00117714 [parallel-infer-symbol-second]: 2.05e-06 [dataset_repeat_opt]: 8.113e-05 [pipeline_split]: 7.538e-05 [optimize]: 0.775019, [52] [py_interpret_to_execute]: 0.00012004 [rewriter_before_opt_a]: 0.0002687 [opt_a]: 0.756921, [3] [Cycle 1]: 0.655982, [46] [expand_dump_flag]: 1.65e-06 [switch_simplify]: 0.00011837 [loop_unroll]: 0.00010835 [a_1]: 0.00343548 [recompute_prepare]: 0.0001026 [updatestate_depend_eliminate]: 0.00024278 [updatestate_assign_eliminate]: 6.601e-05 [updatestate_loads_eliminate]: 6.484e-05 [parameter_eliminate]: 2.90001e-06 [a_2]: 0.00153675 [accelerated_algorithm]: 0.00022976 [shard]: 1.4e-06 [meta_shard_fg_expand]: 4.858e-05 [shard_inline]: 0.00010489 [auto_parallel]: 7.076e-05 [parallel]: 0.118672 [flash_sp]: 7.521e-05 [merge_comm]: 0.00016472 [allreduce_fusion]: 9.642e-05 [matmul_add_comm_reduction]: 0.00011839 [allreduce_slice_to_reducescatter]: 3.79994e-07 [virtual_shard_identity]: 0.00015458 [virtual_dataset]: 0.00018295 [get_grad_eliminate_]: 0.00014306 [virtual_output]: 0.00014084 [merge_forward]: 8.904e-05 [cell_reuse_recompute_pass]: 3.09e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031967 [before_grad]: 0.00024485 [inplace_validation]: 0.00015218 [parallel_renormalize]: 0.0212828 [update_top_fg]: 6.40008e-07 [cast_eliminate]: 0.00020871 [meta_fg_expand]: 0.283942 [inplace_validation_after_expand]: 0.00186482 [flash_sp_send_recv_attached]: 0.00147713 [receive_attached]: 8.882e-05 [after_resolve]: 0.00246371 [a_after_grad]: 0.00462383 [special_op_eliminate]: 0.00227464 [renormalize]: 0.170477 [add_forward_monad_depend]: 0.00042014 [auto_monad_grad]: 0.00026427 [auto_monad_eliminator]: 0.00222327 [cse]: 0.00537417 [a_3]: 0.0317152 [Cycle 2]: 0.0852453, [46] [expand_dump_flag]: 6.415e-05 [switch_simplify]: 0.00242397 [loop_unroll]: 0.00195953 [a_1]: 0.0378365 [recompute_prepare]: 0.00023649 [updatestate_depend_eliminate]: 0.00033043 [updatestate_assign_eliminate]: 0.00013681 [updatestate_loads_eliminate]: 0.00018971 [parameter_eliminate]: 3.11999e-06 [a_2]: 0.00584988 [accelerated_algorithm]: 0.00021373 [shard]: 1.56001e-06 [meta_shard_fg_expand]: 9.448e-05 [shard_inline]: 0.00019274 [auto_parallel]: 0.00014674 [parallel]: 8.77e-06 [flash_sp]: 0.00014986 [merge_comm]: 0.00014298 [allreduce_fusion]: 0.00013371 [matmul_add_comm_reduction]: 0.00014285 [allreduce_slice_to_reducescatter]: 3.89991e-07 [virtual_shard_identity]: 0.00019543 [virtual_dataset]: 0.00018821 [get_grad_eliminate_]: 0.00018102 [virtual_output]: 0.00018527 [merge_forward]: 0.00011998 [cell_reuse_recompute_pass]: 2.12e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031776 [before_grad]: 0.0003197 [inplace_validation]: 0.00011264 [parallel_renormalize]: 9.00036e-08 [update_top_fg]: 4.60001e-07 [cast_eliminate]: 0.00020645 [meta_fg_expand]: 0.00035257 [inplace_validation_after_expand]: 0.00023616 [flash_sp_send_recv_attached]: 1.74e-06 [receive_attached]: 1.04e-06 [after_resolve]: 0.00021708 [a_after_grad]: 0.00032357 [special_op_eliminate]: 0.00018558 [renormalize]: 0.0213417 [add_forward_monad_depend]: 5.41001e-06 [auto_monad_grad]: 2.2e-06 [auto_monad_eliminator]: 0.00036947 [cse]: 0.00828799 [a_3]: 0.00131964 [Cycle 3]: 0.0156734, [46] [expand_dump_flag]: 1.6e-06 [switch_simplify]: 0.00017775 [loop_unroll]: 0.00017486 [a_1]: 0.00573522 [recompute_prepare]: 0.00018507 [updatestate_depend_eliminate]: 0.00018932 [updatestate_assign_eliminate]: 0.00012047 [updatestate_loads_eliminate]: 0.00011647 [parameter_eliminate]: 2.92e-06 [a_2]: 0.00280639 [accelerated_algorithm]: 0.00020403 [shard]: 1.8e-06 [meta_shard_fg_expand]: 6.441e-05 [shard_inline]: 0.00017934 [auto_parallel]: 0.00013699 [parallel]: 8.99e-06 [flash_sp]: 2.13001e-06 [merge_comm]: 0.00013321 [allreduce_fusion]: 0.00012153 [matmul_add_comm_reduction]: 0.00015176 [allreduce_slice_to_reducescatter]: 4.29995e-07 [virtual_shard_identity]: 0.00018683 [virtual_dataset]: 0.00017825 [get_grad_eliminate_]: 0.00017103 [virtual_output]: 0.00017463 [merge_forward]: 0.00012145 [cell_reuse_recompute_pass]: 2.84e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00030915 [before_grad]: 0.00030685 [inplace_validation]: 0.00011629 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 4.50003e-07 [cast_eliminate]: 0.00019395 [meta_fg_expand]: 0.00015097 [inplace_validation_after_expand]: 0.00015419 [flash_sp_send_recv_attached]: 1.56999e-06 [receive_attached]: 9.5999e-07 [after_resolve]: 0.00019959 [a_after_grad]: 0.00035898 [special_op_eliminate]: 0.00017798 [renormalize]: 1.19995e-07 [add_forward_monad_depend]: 2.25e-06 [auto_monad_grad]: 1.73999e-06 [auto_monad_eliminator]: 0.00020506 [cse]: 0.00055154 [a_3]: 0.0012635 [py_interpret_to_execute_after_opt_a]: 0.00017606 [slice_cell_reuse_recomputed_activation]: 1.89e-06 [rewriter_after_opt_a]: 0.00109093 [convert_after_rewriter]: 0.0001746 [order_py_execute_after_rewriter]: 0.00010337 [opt_b]: 0.00529885, [1] [Cycle 1]: 0.00529131, [7] [b_1]: 0.00414281 [b_2]: 0.00018306 [updatestate_depend_eliminate]: 0.00012336 [updatestate_assign_eliminate]: 0.00011088 [updatestate_loads_eliminate]: 0.00011612 [renormalize]: 5.40007e-07 [cse]: 0.00055111 [optimize_parallel_all_gather_comm]: 0.00018253 [overlap_param_gather]: 9.10004e-07 [cconv]: 7.734e-05 [loop_unroll]: 0.00094049 [opt_after_cconv]: 0.00210025, [1] [Cycle 1]: 0.00209311, [7] [c_1]: 0.00102578 [parameter_eliminate]: 1.99e-06 [updatestate_depend_eliminate]: 0.00016179 [updatestate_assign_eliminate]: 0.00018191 [updatestate_loads_eliminate]: 0.00012147 [cse]: 0.00053545 [renormalize]: 4.89992e-07 [remove_dup_value]: 0.00087082 [tuple_transform]: 0.00119447, [1] [Cycle 1]: 0.00118764, [2] [d_1]: 0.00116978 [renormalize]: 3.90006e-07 [partial_unused_args_eliminate]: 2.22e-06 [add_cache_embedding]: 0.00017695 [add_recomputation]: 0.00084136 [cse_after_recomputation]: 0.00046187, [1] [Cycle 1]: 0.00045415, [1] [cse]: 0.00043847 [environ_conv]: 0.00011761 [swap_dp_allreduce_reducescatter]: 0.00016722 [bias_add_comm_swap]: 1.77001e-06 [label_micro_interleaved_index]: 1.12e-06 [label_fine_grained_interleaved_index]: 0.00065224 [merge_cast_opt]: 1.25001e-06 [slice_recompute_activation]: 0.00018558 [micro_interleaved_order_control]: 1.69e-06 [assign_add_opt]: 0.00047143 [ForceFp32Comm]: 1.19999e-06 [remove_cast_before_assign_add]: 0.00013441 [full_micro_interleaved_order_control]: 1.41999e-06 [reorder_send_recv_between_fp_bp]: 1.14999e-06 [comm_op_add_attrs]: 0.00019668 [add_comm_op_reuse_tag]: 0.00020022 [interleave_split_concat_branches]: 6.79996e-07 [interleave_parallel_branches]: 5.29995e-07 [overlap_opt_shard_in_pipeline]: 1.016e-05 [overlap_opt_shard_grad_in_pipeline]: 1.94e-06 [control_data_broadcast_order]: 6.60002e-07 [grouped_pairwise_exchange_alltoall]: 8.21e-06 [offloading_packed_experts]: 1.14e-06 [overlap_recompute_and_grad_model_parallel]: 1.03e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.3001e-07 [overlap_recompute_allgather_and_fa_grad]: 5.579e-05 [overlap_grad_ring_attention]: 0.0001854 [overlap_grad_flash_sp]: 0.00014298 [begin_end_overlap_inline]: 5.10001e-07 [split_matmul_comm_elemetwise]: 1.13e-06 [split_layernorm_comm]: 1.10999e-06 [handle_group_info]: 4.48e-06 [symbol_engine_optimizer]: 0.00105969, [1] [Cycle 1]: 0.00105335, [6] [build]: 6.163e-05 [elim_shapecalc]: 0.00017962 [elim_not_effective]: 0.00027315 [opt_reshape]: 0.00017037 [fold_const_symbol]: 0.0003262 [renormalize]: 3.69997e-07 [pipeline_parallel_scheduler]: 2.48e-06 [auto_monad_reorder]: 0.00034744 [get_jit_bprop_graph]: 4.00003e-07 [rewriter_after_jit_bprop_graph]: 3.09999e-07 [eliminate_special_op_node]: 0.00157344 [distribtued_split]: 1.05999e-06 [validate]: 0.00036763 [task_emit]: 13.6792 [execute]: 8.07001e-06 Sums bootstrap : 0.001163s : 0.01% type_inference : 0.709704s : 4.67% auto_monad : 0.001847s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001453s : 0.01% inline.a1a2.expand_dump_flag : 0.000032s : 0.00% inline.a1a2.switch_simplify : 0.001124s : 0.01% inline.a1a2.loop_unroll : 0.000793s : 0.01% inline.a1a2.a_1 : 0.024709s : 0.16% inline.a1a2.recompute_prepare : 0.000256s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000429s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000152s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000253s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.004803s : 0.03% inline.a1a2.parallel_inline_pass : 0.000211s : 0.00% parallel-infer-symbol : 0.000250s : 0.00% pre_auto_parallel : 0.000095s : 0.00% insert-virtual-dataset : 0.001177s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000081s : 0.00% pipeline_split : 0.000075s : 0.00% optimize.py_interpret_to_execute : 0.000120s : 0.00% optimize.rewriter_before_opt_a : 0.000269s : 0.00% optimize.opt_a.expand_dump_flag : 0.000067s : 0.00% optimize.opt_a.switch_simplify : 0.002720s : 0.02% optimize.opt_a.loop_unroll : 0.002243s : 0.01% optimize.opt_a.a_1 : 0.047007s : 0.31% optimize.opt_a.recompute_prepare : 0.000524s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000763s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000323s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000371s : 0.00% optimize.opt_a.parameter_eliminate : 0.000009s : 0.00% optimize.opt_a.a_2 : 0.010193s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000648s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000207s : 0.00% optimize.opt_a.shard_inline : 0.000477s : 0.00% optimize.opt_a.auto_parallel : 0.000354s : 0.00% optimize.opt_a.parallel : 0.118690s : 0.78% optimize.opt_a.flash_sp : 0.000227s : 0.00% optimize.opt_a.merge_comm : 0.000441s : 0.00% optimize.opt_a.allreduce_fusion : 0.000352s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000413s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000537s : 0.00% optimize.opt_a.virtual_dataset : 0.000549s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000495s : 0.00% optimize.opt_a.virtual_output : 0.000501s : 0.00% optimize.opt_a.merge_forward : 0.000330s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000947s : 0.01% optimize.opt_a.before_grad : 0.000871s : 0.01% optimize.opt_a.inplace_validation : 0.000381s : 0.00% optimize.opt_a.parallel_renormalize : 0.021283s : 0.14% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000609s : 0.00% optimize.opt_a.meta_fg_expand : 0.284446s : 1.87% optimize.opt_a.inplace_validation_after_expand : 0.002255s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001480s : 0.01% optimize.opt_a.receive_attached : 0.000091s : 0.00% optimize.opt_a.after_resolve : 0.002880s : 0.02% optimize.opt_a.a_after_grad : 0.005306s : 0.03% optimize.opt_a.special_op_eliminate : 0.002638s : 0.02% optimize.opt_a.renormalize : 0.191819s : 1.26% optimize.opt_a.add_forward_monad_depend : 0.000428s : 0.00% optimize.opt_a.auto_monad_grad : 0.000268s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002798s : 0.02% optimize.opt_a.cse : 0.014214s : 0.09% optimize.opt_a.a_3 : 0.034298s : 0.23% optimize.py_interpret_to_execute_after_opt_a : 0.000176s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001091s : 0.01% optimize.convert_after_rewriter : 0.000175s : 0.00% optimize.order_py_execute_after_rewriter : 0.000103s : 0.00% optimize.opt_b.b_1 : 0.004143s : 0.03% optimize.opt_b.b_2 : 0.000183s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000123s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000111s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000116s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000551s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000183s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000077s : 0.00% optimize.loop_unroll : 0.000940s : 0.01% optimize.opt_after_cconv.c_1 : 0.001026s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000162s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000182s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000121s : 0.00% optimize.opt_after_cconv.cse : 0.000535s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000871s : 0.01% optimize.tuple_transform.d_1 : 0.001170s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000177s : 0.00% optimize.add_recomputation : 0.000841s : 0.01% optimize.cse_after_recomputation.cse : 0.000438s : 0.00% optimize.environ_conv : 0.000118s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000167s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000652s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000186s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000471s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000134s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000197s : 0.00% optimize.add_comm_op_reuse_tag : 0.000200s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000010s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000056s : 0.00% optimize.overlap_grad_ring_attention : 0.000185s : 0.00% optimize.overlap_grad_flash_sp : 0.000143s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000004s : 0.00% optimize.symbol_engine_optimizer.build : 0.000062s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000180s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000273s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000170s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000326s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000347s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001573s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000368s : 0.00% task_emit : 13.679224s : 89.98% execute : 0.000008s : 0.00% Time group info: ------[substitution.] 0.052866 4902 0.03% : 0.000016s : 4: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.09% : 0.000047s : 7: substitution.addn_zero_filter 0.02% : 0.000013s : 7: substitution.adjust_all_reduce_mul_add 0.54% : 0.000287s : 71: substitution.arithmetic_simplify 0.09% : 0.000048s : 10: substitution.cast_eliminate 0.10% : 0.000053s : 47: substitution.depend_value_elim 0.07% : 0.000035s : 127: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.04% : 0.000022s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000016s : 12: substitution.environ_get_depend_swap 0.05% : 0.000029s : 27: substitution.environ_get_eliminate 0.06% : 0.000031s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000021s : 28: substitution.float_depend_g_call 0.02% : 0.000009s : 12: substitution.float_environ_get_switch 0.02% : 0.000010s : 10: substitution.float_tuple_getitem_switch 0.07% : 0.000035s : 127: substitution.fold_const_symbol 64.78% : 0.034249s : 290: substitution.getattr_setattr_resolve 0.20% : 0.000104s : 165: substitution.graph_param_transform 0.01% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.37% : 0.012357s : 363: substitution.inline 1.47% : 0.000775s : 127: substitution.inline_without_move 0.28% : 0.000146s : 361: substitution.j_node_and_user_rematch 0.23% : 0.000124s : 40: substitution.less_batch_normalization 0.09% : 0.000045s : 90: substitution.load_eliminater 0.09% : 0.000047s : 10: substitution.merge_addn 0.22% : 0.000117s : 115: substitution.minmaximum_grad 0.01% : 0.000003s : 10: substitution.opt_reshape 0.03% : 0.000018s : 1: substitution.partial_defer_inline 0.12% : 0.000062s : 28: substitution.partial_eliminate 0.04% : 0.000021s : 35: substitution.reduce_all_const_elim 0.05% : 0.000028s : 15: substitution.reduce_eliminate 0.48% : 0.000251s : 361: substitution.remove_not_recompute_node 2.24% : 0.001185s : 612: substitution.replace_applicator 0.25% : 0.000133s : 324: substitution.replace_old_param 0.19% : 0.000099s : 31: substitution.reshape_eliminate 0.02% : 0.000012s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000009s : 4: substitution.specialize_transform 0.03% : 0.000015s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000084s : 36: substitution.switch_simplify 0.05% : 0.000026s : 11: substitution.tile_eliminate 0.52% : 0.000275s : 115: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000141s : 121: substitution.tuple_list_get_item_const_eliminator 0.41% : 0.000216s : 121: substitution.tuple_list_get_item_depend_reorder 1.57% : 0.000830s : 356: substitution.tuple_list_get_item_eliminator 0.36% : 0.000188s : 121: substitution.tuple_list_get_set_item_eliminator 0.36% : 0.000192s : 210: substitution.updatestate_pure_node_eliminater 0.75% : 0.000397s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.709263 2 96.59% : 0.685089s : 1: type_inference.infer 3.41% : 0.024174s : 1: type_inference.specialize ------[replace.] 0.010463 851 0.31% : 0.000032s : 4: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.46% : 0.000048s : 7: replace.depend_value_elim 0.38% : 0.000040s : 3: replace.environ_get_set_eliminate 27.37% : 0.002864s : 189: replace.getattr_setattr_resolve 30.60% : 0.003201s : 342: replace.inline 0.20% : 0.000021s : 1: replace.merge_addn 1.05% : 0.000110s : 7: replace.partial_eliminate 3.99% : 0.000418s : 28: replace.replace_applicator 3.60% : 0.000376s : 36: replace.switch_simplify 0.47% : 0.000049s : 6: replace.tuple_list_get_item_depend_reorder 31.20% : 0.003264s : 225: replace.tuple_list_get_item_eliminator 0.15% : 0.000015s : 1: replace.updatestate_useless_node_eliminater 0.17% : 0.000017s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042219 851 0.03% : 0.000013s : 4: match.ad_related_special_op_eliminate 0.02% : 0.000010s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.03% : 0.000015s : 3: match.environ_get_set_eliminate 69.51% : 0.029344s : 189: match.getattr_setattr_resolve 28.70% : 0.012115s : 342: match.inline 0.05% : 0.000020s : 1: match.merge_addn 0.08% : 0.000033s : 7: match.partial_eliminate 0.25% : 0.000105s : 28: match.replace_applicator 0.15% : 0.000064s : 36: match.switch_simplify 0.07% : 0.000028s : 6: match.tuple_list_get_item_depend_reorder 1.08% : 0.000454s : 225: match.tuple_list_get_item_eliminator 0.02% : 0.000007s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.024713157659 0.74% : 0.000184s : 1382: predicate.accumulaten_eliminater 0.28% : 0.000069s : 331: predicate.ad_related_special_op_eliminate 0.55% : 0.000136s : 998: predicate.addn_check_dump 0.76% : 0.000188s : 1382: predicate.addn_zero_filter 0.77% : 0.000190s : 1382: predicate.adjust_all_reduce_mul_add 1.70% : 0.000421s : 2381: predicate.arithmetic_simplify 1.11% : 0.000274s : 1907: predicate.cast_eliminate 3.46% : 0.000854s : 4404: predicate.check_bprop_eliminate 0.55% : 0.000137s : 998: predicate.compare_switch_simplify 0.06% : 0.000015s : 184: predicate.const_output_eliminate 0.20% : 0.000048s : 322: predicate.convert_tensor_all_eliminate 1.14% : 0.000281s : 1617: predicate.convert_tensor_eliminate 0.56% : 0.000140s : 1001: predicate.depend_value_elim 0.77% : 0.000189s : 1386: predicate.dict_get_item_const_eliminator 0.83% : 0.000206s : 1386: predicate.dict_get_item_eliminator 0.79% : 0.000196s : 1386: predicate.dict_set_item_eliminator 0.05% : 0.000012s : 165: predicate.elim_not_effective 0.11% : 0.000028s : 165: predicate.elim_shapecalc_of_broadcastargs 0.80% : 0.000199s : 1567: predicate.environ_add_const_eliminate 0.83% : 0.000205s : 1570: predicate.environ_get_add_eliminate 0.81% : 0.000201s : 1567: predicate.environ_get_depend_swap 1.45% : 0.000359s : 2568: predicate.environ_get_eliminate 0.80% : 0.000199s : 1570: predicate.environ_get_set_eliminate 1.07% : 0.000264s : 1967: predicate.exchange_switch_depend_value 1.33% : 0.000329s : 1967: predicate.float_depend_g_call 0.54% : 0.000134s : 998: predicate.float_environ_get_switch 0.65% : 0.000161s : 1182: predicate.float_tuple_getitem_switch 0.05% : 0.000012s : 165: predicate.fold_const_symbol 0.32% : 0.000079s : 532: predicate.get_grad_eliminate 2.06% : 0.000510s : 2090: predicate.getattr_setattr_resolve 0.06% : 0.000015s : 165: predicate.graph_param_transform 0.54% : 0.000133s : 998: predicate.incorporate_call 0.53% : 0.000132s : 998: predicate.incorporate_call_switch 3.74% : 0.000925s : 5403: predicate.inline 2.41% : 0.000596s : 2684: predicate.inline_without_move 0.17% : 0.000041s : 532: predicate.j_node_and_user_rematch 0.34% : 0.000085s : 494: predicate.less_batch_normalization 1.10% : 0.000271s : 1966: predicate.list_to_tuple_eliminator_ 1.80% : 0.000445s : 3374: predicate.load_eliminater 0.20% : 0.000049s : 184: predicate.loop_unroll_after_grad 2.37% : 0.000586s : 3092: predicate.loop_unroll_before_grad 0.93% : 0.000231s : 1760: predicate.make_slice_get_slice_eliminator 0.56% : 0.000139s : 1000: predicate.merge_addn 3.34% : 0.000827s : 4288: predicate.micro_step_allgather_replace 3.36% : 0.000830s : 4288: predicate.mini_step_allgather_replace 0.74% : 0.000182s : 1383: predicate.minmaximum_grad 0.19% : 0.000048s : 322: predicate.mutable_eliminate 0.10% : 0.000025s : 165: predicate.opt_reshape 0.12% : 0.000029s : 184: predicate.parallel_virtual_node 1.96% : 0.000484s : 1967: predicate.partial_defer_inline 1.08% : 0.000267s : 1808: predicate.partial_eliminate 0.75% : 0.000184s : 1382: predicate.print_const_string_wrapper 0.57% : 0.000141s : 987: predicate.reduce_all_const_elim 0.89% : 0.000221s : 1383: predicate.reduce_eliminate 0.16% : 0.000039s : 532: predicate.remove_not_recompute_node 2.17% : 0.000537s : 5961: predicate.replace_applicator 0.82% : 0.000202s : 2684: predicate.replace_old_param 0.06% : 0.000015s : 184: predicate.reset_defer_inline 0.74% : 0.000183s : 1383: predicate.reshape_eliminate 3.54% : 0.000875s : 4288: predicate.row_tensor_add_zeros_like 0.12% : 0.000029s : 184: predicate.row_tensor_eliminate 3.53% : 0.000873s : 4404: predicate.same_eliminate 0.23% : 0.000058s : 739: predicate.set_cell_output_no_recompute 0.32% : 0.000080s : 532: predicate.shard_identity_eliminate 2.22% : 0.000549s : 2868: predicate.special_op_eliminate 0.63% : 0.000154s : 1000: predicate.specialize_transform 3.74% : 0.000925s : 4288: predicate.split_environ_get_set_with_tuple_value 1.58% : 0.000391s : 2684: predicate.stack_unstack_eliminate 1.81% : 0.000447s : 3374: predicate.stopgrad_eliminater 0.10% : 0.000025s : 184: predicate.switch_call_monad_eliminater 1.19% : 0.000293s : 1967: predicate.switch_defer_inline 4.60% : 0.001137s : 6371: predicate.switch_layer_defer_inline 4.45% : 0.001099s : 6131: predicate.switch_simplify 0.72% : 0.000177s : 1383: predicate.tile_eliminate 0.70% : 0.000173s : 1383: predicate.transpose_eliminate 1.02% : 0.000252s : 1735: predicate.tuple_list_convert_item_index_to_positive 1.06% : 0.000261s : 1741: predicate.tuple_list_get_item_const_eliminator 0.92% : 0.000227s : 1741: predicate.tuple_list_get_item_depend_reorder 1.84% : 0.000456s : 2964: predicate.tuple_list_get_item_eliminator 0.96% : 0.000238s : 1741: predicate.tuple_list_get_set_item_eliminator 1.62% : 0.000401s : 2739: predicate.tuple_list_set_item_eliminator 1.05% : 0.000259s : 1966: predicate.tuple_to_list_eliminator_ 1.78% : 0.000440s : 3374: predicate.updatestate_pure_node_eliminater 2.44% : 0.000603s : 4373: predicate.updatestate_useless_node_eliminater 0.11% : 0.000028s : 184: predicate.value_based_eliminate 0.32% : 0.000079s : 534: predicate.virtual_dataset_eliminate 0.31% : 0.000076s : 532: predicate.virtual_output_eliminate 0.11% : 0.000028s : 184: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.062379 841 67.88% : 0.042343s : 383: func_graph_cloner_run.FuncGraphClonerGraph 2.28% : 0.001424s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.84% : 0.018612s : 436: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.437438 350 0.00% : 0.000005s : 1: ForceFp32Comm 0.24% : 0.040153s : 1: a1a2 0.00% : 0.000185s : 1: add_cache_embedding 0.00% : 0.000208s : 1: add_comm_op_reuse_tag 0.01% : 0.000854s : 1: add_recomputation 0.00% : 0.000482s : 1: assign_add_opt 0.01% : 0.001868s : 1: auto_monad 0.00% : 0.000360s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.01% : 0.001206s : 1: bootstrap 0.00% : 0.000084s : 1: cconv 0.00% : 0.000206s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.00% : 0.000183s : 1: convert_after_rewriter 0.00% : 0.000467s : 1: cse_after_recomputation 0.00% : 0.000089s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.001589s : 1: eliminate_special_op_node 0.00% : 0.000126s : 1: environ_conv 0.00% : 0.000016s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000033s : 1: graph_reusing 0.00% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000007s : 1: handle_group_info 0.25% : 0.041646s : 1: inline 0.01% : 0.001198s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000663s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.000951s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.19% : 0.031833s : 61: opt.transform.a1a2 0.00% : 0.000230s : 1: opt.transform.loop_unroll_optimizer 0.69% : 0.113195s : 148: opt.transform.opt_a 0.01% : 0.001023s : 1: opt.transform.opt_after_cconv 0.03% : 0.004294s : 27: opt.transform.opt_b 0.24% : 0.040190s : 20: opt.transform.opt_resolve 0.01% : 0.001167s : 1: opt.transform.opt_trans_graph 0.01% : 0.001038s : 6: opt.transform.special_op_eliminate 0.01% : 0.000943s : 4: opt.transform.symbol_engine_opt 4.60% : 0.756926s : 1: opt_a 0.01% : 0.002106s : 1: opt_after_cconv 0.03% : 0.005303s : 1: opt_b 4.72% : 0.775029s : 1: optimize 0.00% : 0.000192s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000110s : 1: order_py_execute_after_rewriter 0.00% : 0.000147s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000192s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000014s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000060s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000265s : 1: parallel-infer-symbol 0.00% : 0.000008s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000085s : 1: pipeline_split 0.00% : 0.000105s : 1: pre_auto_parallel 0.00% : 0.000128s : 1: py_interpret_to_execute 0.00% : 0.000184s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000141s : 1: remove_cast_before_assign_add 0.01% : 0.000886s : 1: remove_dup_value 0.90% : 0.148632s : 3: renormalize.infer 0.39% : 0.064433s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001102s : 1: rewriter_after_opt_a 0.01% : 0.001744s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000193s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000175s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.001064s : 1: symbol_engine_optimizer 83.22% : 13.679262s : 1: task_emit 0.01% : 0.001199s : 1: tuple_transform 4.32% : 0.709732s : 1: type_inference 0.01% : 0.001403s : 1: validate TotalTime = 15.375, [21] [bootstrap]: 0.0011759 [type_inference]: 0.720939 [auto_monad]: 0.00199429 [graph_reusing]: 2.704e-05 [inline]: 0.041945, [2] [rewriter_before_opt_a]: 0.00147518 [a1a2]: 0.0404324, [2] [Cycle 1]: 0.0278265, [11] [expand_dump_flag]: 3.256e-05 [switch_simplify]: 0.00107187 [loop_unroll]: 0.00067908 [a_1]: 0.0216994 [recompute_prepare]: 0.00016087 [updatestate_depend_eliminate]: 0.0003496 [updatestate_assign_eliminate]: 8.951e-05 [updatestate_loads_eliminate]: 0.00020143 [parameter_eliminate]: 6.12e-06 [a_2]: 0.00327097 [parallel_inline_pass]: 0.00010316 [Cycle 2]: 0.0053289, [11] [expand_dump_flag]: 1.34e-06 [switch_simplify]: 9.417e-05 [loop_unroll]: 9.313e-05 [a_1]: 0.00316602 [recompute_prepare]: 9.943e-05 [updatestate_depend_eliminate]: 7.226e-05 [updatestate_assign_eliminate]: 5.972e-05 [updatestate_loads_eliminate]: 6.207e-05 [parameter_eliminate]: 3.10001e-06 [a_2]: 0.00151116 [parallel_inline_pass]: 0.00010039 [parallel-infer-symbol]: 0.00017837 [pre_auto_parallel]: 9.767e-05 [insert-virtual-dataset]: 0.00117331 [parallel-infer-symbol-second]: 2.52e-06 [dataset_repeat_opt]: 7.08e-05 [pipeline_split]: 9.59e-05 [optimize]: 0.80957, [52] [py_interpret_to_execute]: 0.00011778 [rewriter_before_opt_a]: 0.00026644 [opt_a]: 0.790932, [3] [Cycle 1]: 0.67885, [46] [expand_dump_flag]: 2.02001e-06 [switch_simplify]: 0.00011307 [loop_unroll]: 9.881e-05 [a_1]: 0.00334466 [recompute_prepare]: 0.00010461 [updatestate_depend_eliminate]: 9.707e-05 [updatestate_assign_eliminate]: 6.275e-05 [updatestate_loads_eliminate]: 6.529e-05 [parameter_eliminate]: 3.22999e-06 [a_2]: 0.0015713 [accelerated_algorithm]: 0.00023027 [shard]: 2.68e-06 [meta_shard_fg_expand]: 4.801e-05 [shard_inline]: 0.00010701 [auto_parallel]: 7.527e-05 [parallel]: 0.129884 [flash_sp]: 8.164e-05 [merge_comm]: 0.00017835 [allreduce_fusion]: 0.00010205 [matmul_add_comm_reduction]: 0.00012151 [allreduce_slice_to_reducescatter]: 5.69999e-07 [virtual_shard_identity]: 0.00015878 [virtual_dataset]: 0.00019469 [get_grad_eliminate_]: 0.00014708 [virtual_output]: 0.00014437 [merge_forward]: 0.00011215 [cell_reuse_recompute_pass]: 3.51001e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025749 [before_grad]: 0.00024062 [inplace_validation]: 0.00016143 [parallel_renormalize]: 0.0223928 [update_top_fg]: 7.10002e-07 [cast_eliminate]: 0.00021186 [meta_fg_expand]: 0.28671 [inplace_validation_after_expand]: 0.00190858 [flash_sp_send_recv_attached]: 0.00151465 [receive_attached]: 9.38e-05 [after_resolve]: 0.00244125 [a_after_grad]: 0.0046459 [special_op_eliminate]: 0.00224231 [renormalize]: 0.178317 [add_forward_monad_depend]: 0.00036333 [auto_monad_grad]: 0.0002706 [auto_monad_eliminator]: 0.00223111 [cse]: 0.00537094 [a_3]: 0.0318727 [Cycle 2]: 0.0962162, [46] [expand_dump_flag]: 6.601e-05 [switch_simplify]: 0.00239144 [loop_unroll]: 0.00197269 [a_1]: 0.0443199 [recompute_prepare]: 0.00028468 [updatestate_depend_eliminate]: 0.00038733 [updatestate_assign_eliminate]: 0.00014576 [updatestate_loads_eliminate]: 0.00022388 [parameter_eliminate]: 4.57e-06 [a_2]: 0.00602047 [accelerated_algorithm]: 0.00022942 [shard]: 3.49e-06 [meta_shard_fg_expand]: 0.00016666 [shard_inline]: 0.00019953 [auto_parallel]: 0.00015329 [parallel]: 1.521e-05 [flash_sp]: 0.0001681 [merge_comm]: 0.00015213 [allreduce_fusion]: 0.00012567 [matmul_add_comm_reduction]: 0.00016413 [allreduce_slice_to_reducescatter]: 5.90007e-07 [virtual_shard_identity]: 0.00020158 [virtual_dataset]: 0.00019421 [get_grad_eliminate_]: 0.00018577 [virtual_output]: 0.00019167 [merge_forward]: 0.00013085 [cell_reuse_recompute_pass]: 2.61e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00033568 [before_grad]: 0.00031494 [inplace_validation]: 0.00011378 [parallel_renormalize]: 1.09998e-07 [update_top_fg]: 8.29998e-07 [cast_eliminate]: 0.00021681 [meta_fg_expand]: 0.00054884 [inplace_validation_after_expand]: 0.00027596 [flash_sp_send_recv_attached]: 2.56e-06 [receive_attached]: 2.1e-06 [after_resolve]: 0.00022753 [a_after_grad]: 0.00032219 [special_op_eliminate]: 0.00019085 [renormalize]: 0.0245631 [add_forward_monad_depend]: 5.71e-06 [auto_monad_grad]: 3.25e-06 [auto_monad_eliminator]: 0.00037064 [cse]: 0.00880509 [a_3]: 0.00132825 [Cycle 3]: 0.0158398, [46] [expand_dump_flag]: 2.56e-06 [switch_simplify]: 0.00018421 [loop_unroll]: 0.00017797 [a_1]: 0.00571743 [recompute_prepare]: 0.00019431 [updatestate_depend_eliminate]: 0.00019328 [updatestate_assign_eliminate]: 0.00012526 [updatestate_loads_eliminate]: 0.00012153 [parameter_eliminate]: 3.39e-06 [a_2]: 0.00281803 [accelerated_algorithm]: 0.000207 [shard]: 1.57001e-06 [meta_shard_fg_expand]: 6.725e-05 [shard_inline]: 0.000206 [auto_parallel]: 0.00014571 [parallel]: 1.051e-05 [flash_sp]: 2.3e-06 [merge_comm]: 0.00013524 [allreduce_fusion]: 0.00012696 [matmul_add_comm_reduction]: 0.00015451 [allreduce_slice_to_reducescatter]: 6.80011e-07 [virtual_shard_identity]: 0.00019223 [virtual_dataset]: 0.00018256 [get_grad_eliminate_]: 0.00017647 [virtual_output]: 0.00017812 [merge_forward]: 0.0001249 [cell_reuse_recompute_pass]: 3.64e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00032184 [before_grad]: 0.00030192 [inplace_validation]: 0.00011946 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 4.80009e-07 [cast_eliminate]: 0.00020106 [meta_fg_expand]: 0.00015383 [inplace_validation_after_expand]: 0.00016045 [flash_sp_send_recv_attached]: 2.13999e-06 [receive_attached]: 1.51001e-06 [after_resolve]: 0.00020022 [a_after_grad]: 0.00030135 [special_op_eliminate]: 0.00018085 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 2.28999e-06 [auto_monad_grad]: 1.64e-06 [auto_monad_eliminator]: 0.00020916 [cse]: 0.00058125 [a_3]: 0.00129644 [py_interpret_to_execute_after_opt_a]: 0.00019257 [slice_cell_reuse_recomputed_activation]: 2.74999e-06 [rewriter_after_opt_a]: 0.00111956 [convert_after_rewriter]: 0.00014218 [order_py_execute_after_rewriter]: 0.00010175 [opt_b]: 0.00545988, [1] [Cycle 1]: 0.00545087, [7] [b_1]: 0.00426338 [b_2]: 0.0001884 [updatestate_depend_eliminate]: 0.00012899 [updatestate_assign_eliminate]: 0.00013268 [updatestate_loads_eliminate]: 0.00011906 [renormalize]: 5.29995e-07 [cse]: 0.00054974 [optimize_parallel_all_gather_comm]: 0.00018337 [overlap_param_gather]: 1.45e-06 [cconv]: 8.95e-05 [loop_unroll]: 0.00100798 [opt_after_cconv]: 0.00210786, [1] [Cycle 1]: 0.0021004, [7] [c_1]: 0.00104351 [parameter_eliminate]: 2.92e-06 [updatestate_depend_eliminate]: 0.00016846 [updatestate_assign_eliminate]: 0.00012207 [updatestate_loads_eliminate]: 0.00012082 [cse]: 0.00058083 [renormalize]: 6.00005e-07 [remove_dup_value]: 0.00093394 [tuple_transform]: 0.0012294, [1] [Cycle 1]: 0.00122175, [2] [d_1]: 0.00119905 [renormalize]: 4.19997e-07 [partial_unused_args_eliminate]: 3.37999e-06 [add_cache_embedding]: 0.00019097 [add_recomputation]: 0.00086822 [cse_after_recomputation]: 0.00041283, [1] [Cycle 1]: 0.00040395, [1] [cse]: 0.00038993 [environ_conv]: 0.00013805 [swap_dp_allreduce_reducescatter]: 0.0001737 [bias_add_comm_swap]: 2.92e-06 [label_micro_interleaved_index]: 1.86999e-06 [label_fine_grained_interleaved_index]: 0.00070823 [merge_cast_opt]: 2.02001e-06 [slice_recompute_activation]: 0.00019243 [micro_interleaved_order_control]: 2.23001e-06 [assign_add_opt]: 0.00048473 [ForceFp32Comm]: 1.53e-06 [remove_cast_before_assign_add]: 0.00013698 [full_micro_interleaved_order_control]: 2.51e-06 [reorder_send_recv_between_fp_bp]: 2.54001e-06 [comm_op_add_attrs]: 0.00021738 [add_comm_op_reuse_tag]: 0.000201 [interleave_split_concat_branches]: 1.22e-06 [interleave_parallel_branches]: 9.10004e-07 [overlap_opt_shard_in_pipeline]: 1.123e-05 [overlap_opt_shard_grad_in_pipeline]: 3.68999e-06 [control_data_broadcast_order]: 1.16001e-06 [grouped_pairwise_exchange_alltoall]: 1.225e-05 [offloading_packed_experts]: 2.31e-06 [overlap_recompute_and_grad_model_parallel]: 2.3e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.49992e-07 [overlap_recompute_allgather_and_fa_grad]: 5.724e-05 [overlap_grad_ring_attention]: 0.00021235 [overlap_grad_flash_sp]: 0.00016997 [begin_end_overlap_inline]: 9.60004e-07 [split_matmul_comm_elemetwise]: 2.22e-06 [split_layernorm_comm]: 2.16e-06 [handle_group_info]: 7.02e-06 [symbol_engine_optimizer]: 0.00102813, [1] [Cycle 1]: 0.00102103, [6] [build]: 6.261e-05 [elim_shapecalc]: 0.00019061 [elim_not_effective]: 0.0002782 [opt_reshape]: 0.00017504 [fold_const_symbol]: 0.00027277 [renormalize]: 4.60001e-07 [pipeline_parallel_scheduler]: 3.88999e-06 [auto_monad_reorder]: 0.00039449 [get_jit_bprop_graph]: 5.50004e-07 [rewriter_after_jit_bprop_graph]: 4.40006e-07 [eliminate_special_op_node]: 0.00158562 [distribtued_split]: 1.44e-06 [validate]: 0.00039534 [task_emit]: 13.7939 [execute]: 1.238e-05 Sums bootstrap : 0.001176s : 0.01% type_inference : 0.720939s : 4.69% auto_monad : 0.001994s : 0.01% graph_reusing : 0.000027s : 0.00% inline.rewriter_before_opt_a : 0.001475s : 0.01% inline.a1a2.expand_dump_flag : 0.000034s : 0.00% inline.a1a2.switch_simplify : 0.001166s : 0.01% inline.a1a2.loop_unroll : 0.000772s : 0.01% inline.a1a2.a_1 : 0.024865s : 0.16% inline.a1a2.recompute_prepare : 0.000260s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000422s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000149s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000263s : 0.00% inline.a1a2.parameter_eliminate : 0.000009s : 0.00% inline.a1a2.a_2 : 0.004782s : 0.03% inline.a1a2.parallel_inline_pass : 0.000204s : 0.00% parallel-infer-symbol : 0.000178s : 0.00% pre_auto_parallel : 0.000098s : 0.00% insert-virtual-dataset : 0.001173s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000071s : 0.00% pipeline_split : 0.000096s : 0.00% optimize.py_interpret_to_execute : 0.000118s : 0.00% optimize.rewriter_before_opt_a : 0.000266s : 0.00% optimize.opt_a.expand_dump_flag : 0.000071s : 0.00% optimize.opt_a.switch_simplify : 0.002689s : 0.02% optimize.opt_a.loop_unroll : 0.002249s : 0.01% optimize.opt_a.a_1 : 0.053382s : 0.35% optimize.opt_a.recompute_prepare : 0.000584s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000678s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000334s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000411s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.010410s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000667s : 0.00% optimize.opt_a.shard : 0.000008s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000282s : 0.00% optimize.opt_a.shard_inline : 0.000513s : 0.00% optimize.opt_a.auto_parallel : 0.000374s : 0.00% optimize.opt_a.parallel : 0.129909s : 0.85% optimize.opt_a.flash_sp : 0.000252s : 0.00% optimize.opt_a.merge_comm : 0.000466s : 0.00% optimize.opt_a.allreduce_fusion : 0.000355s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000440s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000553s : 0.00% optimize.opt_a.virtual_dataset : 0.000571s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000509s : 0.00% optimize.opt_a.virtual_output : 0.000514s : 0.00% optimize.opt_a.merge_forward : 0.000368s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000010s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000915s : 0.01% optimize.opt_a.before_grad : 0.000857s : 0.01% optimize.opt_a.inplace_validation : 0.000395s : 0.00% optimize.opt_a.parallel_renormalize : 0.022393s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000630s : 0.00% optimize.opt_a.meta_fg_expand : 0.287413s : 1.87% optimize.opt_a.inplace_validation_after_expand : 0.002345s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.001519s : 0.01% optimize.opt_a.receive_attached : 0.000097s : 0.00% optimize.opt_a.after_resolve : 0.002869s : 0.02% optimize.opt_a.a_after_grad : 0.005269s : 0.03% optimize.opt_a.special_op_eliminate : 0.002614s : 0.02% optimize.opt_a.renormalize : 0.202881s : 1.32% optimize.opt_a.add_forward_monad_depend : 0.000371s : 0.00% optimize.opt_a.auto_monad_grad : 0.000275s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002811s : 0.02% optimize.opt_a.cse : 0.014757s : 0.10% optimize.opt_a.a_3 : 0.034497s : 0.22% optimize.py_interpret_to_execute_after_opt_a : 0.000193s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001120s : 0.01% optimize.convert_after_rewriter : 0.000142s : 0.00% optimize.order_py_execute_after_rewriter : 0.000102s : 0.00% optimize.opt_b.b_1 : 0.004263s : 0.03% optimize.opt_b.b_2 : 0.000188s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000129s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000133s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000119s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000550s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000183s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000089s : 0.00% optimize.loop_unroll : 0.001008s : 0.01% optimize.opt_after_cconv.c_1 : 0.001044s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000168s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000122s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000121s : 0.00% optimize.opt_after_cconv.cse : 0.000581s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000934s : 0.01% optimize.tuple_transform.d_1 : 0.001199s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000191s : 0.00% optimize.add_recomputation : 0.000868s : 0.01% optimize.cse_after_recomputation.cse : 0.000390s : 0.00% optimize.environ_conv : 0.000138s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000174s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000708s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000192s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000485s : 0.00% optimize.ForceFp32Comm : 0.000002s : 0.00% optimize.remove_cast_before_assign_add : 0.000137s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000217s : 0.00% optimize.add_comm_op_reuse_tag : 0.000201s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000011s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000012s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000057s : 0.00% optimize.overlap_grad_ring_attention : 0.000212s : 0.00% optimize.overlap_grad_flash_sp : 0.000170s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000063s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000191s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000278s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000175s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000273s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000394s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001586s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000395s : 0.00% task_emit : 13.793928s : 89.78% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.055091 4901 0.03% : 0.000018s : 4: substitution.ad_related_special_op_eliminate 0.04% : 0.000025s : 9: substitution.addn_check_dump 0.10% : 0.000057s : 7: substitution.addn_zero_filter 0.03% : 0.000016s : 7: substitution.adjust_all_reduce_mul_add 0.60% : 0.000329s : 71: substitution.arithmetic_simplify 0.11% : 0.000059s : 10: substitution.cast_eliminate 0.10% : 0.000055s : 47: substitution.depend_value_elim 0.06% : 0.000035s : 127: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.06% : 0.000034s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.05% : 0.000028s : 27: substitution.environ_get_eliminate 0.07% : 0.000037s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000024s : 28: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.02% : 0.000011s : 10: substitution.float_tuple_getitem_switch 0.07% : 0.000037s : 127: substitution.fold_const_symbol 61.87% : 0.034083s : 290: substitution.getattr_setattr_resolve 0.20% : 0.000108s : 164: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 26.21% : 0.014438s : 363: substitution.inline 1.44% : 0.000791s : 127: substitution.inline_without_move 0.27% : 0.000146s : 361: substitution.j_node_and_user_rematch 0.23% : 0.000126s : 40: substitution.less_batch_normalization 0.08% : 0.000045s : 90: substitution.load_eliminater 0.11% : 0.000058s : 10: substitution.merge_addn 0.22% : 0.000123s : 115: substitution.minmaximum_grad 0.01% : 0.000003s : 10: substitution.opt_reshape 0.03% : 0.000015s : 1: substitution.partial_defer_inline 0.11% : 0.000062s : 28: substitution.partial_eliminate 0.04% : 0.000022s : 35: substitution.reduce_all_const_elim 0.06% : 0.000032s : 15: substitution.reduce_eliminate 0.34% : 0.000187s : 361: substitution.remove_not_recompute_node 2.25% : 0.001237s : 612: substitution.replace_applicator 0.25% : 0.000135s : 324: substitution.replace_old_param 0.22% : 0.000120s : 31: substitution.reshape_eliminate 0.02% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000010s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.19% : 0.000102s : 36: substitution.switch_simplify 0.06% : 0.000032s : 11: substitution.tile_eliminate 0.53% : 0.000294s : 115: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000146s : 121: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000237s : 121: substitution.tuple_list_get_item_depend_reorder 1.71% : 0.000944s : 356: substitution.tuple_list_get_item_eliminator 0.36% : 0.000201s : 121: substitution.tuple_list_get_set_item_eliminator 0.35% : 0.000195s : 210: substitution.updatestate_pure_node_eliminater 0.63% : 0.000347s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000013s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.720483 2 96.48% : 0.695112s : 1: type_inference.infer 3.52% : 0.025371s : 1: type_inference.specialize ------[replace.] 0.012291 851 0.31% : 0.000038s : 4: replace.ad_related_special_op_eliminate 0.05% : 0.000006s : 1: replace.arithmetic_simplify 0.37% : 0.000046s : 7: replace.depend_value_elim 0.32% : 0.000039s : 3: replace.environ_get_set_eliminate 25.89% : 0.003182s : 189: replace.getattr_setattr_resolve 29.78% : 0.003660s : 342: replace.inline 0.26% : 0.000031s : 1: replace.merge_addn 0.90% : 0.000111s : 7: replace.partial_eliminate 3.52% : 0.000433s : 28: replace.replace_applicator 3.23% : 0.000397s : 36: replace.switch_simplify 0.41% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 34.65% : 0.004259s : 225: replace.tuple_list_get_item_eliminator 0.13% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.17% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.044285 851 0.03% : 0.000015s : 4: match.ad_related_special_op_eliminate 0.02% : 0.000011s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000019s : 3: match.environ_get_set_eliminate 65.92% : 0.029194s : 189: match.getattr_setattr_resolve 32.04% : 0.014190s : 342: match.inline 0.07% : 0.000030s : 1: match.merge_addn 0.09% : 0.000038s : 7: match.partial_eliminate 0.25% : 0.000113s : 28: match.replace_applicator 0.18% : 0.000081s : 36: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.22% : 0.000541s : 225: match.tuple_list_get_item_eliminator 0.02% : 0.000010s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000012s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.024751157772 0.75% : 0.000186s : 1383: predicate.accumulaten_eliminater 0.28% : 0.000070s : 329: predicate.ad_related_special_op_eliminate 0.55% : 0.000136s : 999: predicate.addn_check_dump 0.73% : 0.000180s : 1383: predicate.addn_zero_filter 0.71% : 0.000175s : 1383: predicate.adjust_all_reduce_mul_add 1.68% : 0.000415s : 2383: predicate.arithmetic_simplify 1.12% : 0.000277s : 1909: predicate.cast_eliminate 3.44% : 0.000852s : 4406: predicate.check_bprop_eliminate 0.56% : 0.000138s : 999: predicate.compare_switch_simplify 0.06% : 0.000014s : 185: predicate.const_output_eliminate 0.19% : 0.000047s : 320: predicate.convert_tensor_all_eliminate 1.12% : 0.000277s : 1618: predicate.convert_tensor_eliminate 0.56% : 0.000139s : 1002: predicate.depend_value_elim 0.77% : 0.000192s : 1387: predicate.dict_get_item_const_eliminator 0.79% : 0.000195s : 1387: predicate.dict_get_item_eliminator 0.80% : 0.000199s : 1387: predicate.dict_set_item_eliminator 0.05% : 0.000012s : 164: predicate.elim_not_effective 0.12% : 0.000030s : 164: predicate.elim_shapecalc_of_broadcastargs 0.83% : 0.000206s : 1569: predicate.environ_add_const_eliminate 0.81% : 0.000200s : 1572: predicate.environ_get_add_eliminate 0.81% : 0.000201s : 1569: predicate.environ_get_depend_swap 1.38% : 0.000343s : 2571: predicate.environ_get_eliminate 0.81% : 0.000201s : 1572: predicate.environ_get_set_eliminate 1.06% : 0.000261s : 1968: predicate.exchange_switch_depend_value 1.38% : 0.000340s : 1968: predicate.float_depend_g_call 0.55% : 0.000136s : 999: predicate.float_environ_get_switch 0.65% : 0.000161s : 1184: predicate.float_tuple_getitem_switch 0.05% : 0.000013s : 164: predicate.fold_const_symbol 0.32% : 0.000079s : 533: predicate.get_grad_eliminate 2.08% : 0.000515s : 2090: predicate.getattr_setattr_resolve 0.06% : 0.000015s : 164: predicate.graph_param_transform 0.54% : 0.000133s : 999: predicate.incorporate_call 0.53% : 0.000131s : 999: predicate.incorporate_call_switch 3.88% : 0.000961s : 5409: predicate.inline 2.40% : 0.000595s : 2685: predicate.inline_without_move 0.16% : 0.000041s : 533: predicate.j_node_and_user_rematch 0.35% : 0.000086s : 495: predicate.less_batch_normalization 1.07% : 0.000265s : 1967: predicate.list_to_tuple_eliminator_ 1.80% : 0.000445s : 3378: predicate.load_eliminater 0.20% : 0.000049s : 185: predicate.loop_unroll_after_grad 2.36% : 0.000585s : 3093: predicate.loop_unroll_before_grad 0.96% : 0.000238s : 1763: predicate.make_slice_get_slice_eliminator 0.57% : 0.000141s : 1001: predicate.merge_addn 3.37% : 0.000834s : 4290: predicate.micro_step_allgather_replace 3.33% : 0.000824s : 4290: predicate.mini_step_allgather_replace 0.74% : 0.000182s : 1384: predicate.minmaximum_grad 0.19% : 0.000048s : 320: predicate.mutable_eliminate 0.10% : 0.000025s : 164: predicate.opt_reshape 0.12% : 0.000029s : 185: predicate.parallel_virtual_node 2.34% : 0.000579s : 1968: predicate.partial_defer_inline 1.06% : 0.000262s : 1810: predicate.partial_eliminate 0.73% : 0.000182s : 1383: predicate.print_const_string_wrapper 0.57% : 0.000141s : 988: predicate.reduce_all_const_elim 0.92% : 0.000228s : 1384: predicate.reduce_eliminate 0.16% : 0.000039s : 533: predicate.remove_not_recompute_node 2.05% : 0.000508s : 5964: predicate.replace_applicator 0.89% : 0.000220s : 2685: predicate.replace_old_param 0.06% : 0.000015s : 185: predicate.reset_defer_inline 0.76% : 0.000188s : 1384: predicate.reshape_eliminate 3.34% : 0.000828s : 4290: predicate.row_tensor_add_zeros_like 0.12% : 0.000030s : 185: predicate.row_tensor_eliminate 3.52% : 0.000872s : 4406: predicate.same_eliminate 0.24% : 0.000059s : 740: predicate.set_cell_output_no_recompute 0.33% : 0.000081s : 533: predicate.shard_identity_eliminate 2.20% : 0.000544s : 2870: predicate.special_op_eliminate 0.64% : 0.000157s : 1001: predicate.specialize_transform 3.81% : 0.000942s : 4290: predicate.split_environ_get_set_with_tuple_value 1.59% : 0.000394s : 2685: predicate.stack_unstack_eliminate 1.76% : 0.000437s : 3378: predicate.stopgrad_eliminater 0.10% : 0.000025s : 185: predicate.switch_call_monad_eliminater 1.16% : 0.000286s : 1968: predicate.switch_defer_inline 4.55% : 0.001126s : 6374: predicate.switch_layer_defer_inline 4.23% : 0.001046s : 6134: predicate.switch_simplify 0.74% : 0.000184s : 1384: predicate.tile_eliminate 0.74% : 0.000182s : 1384: predicate.transpose_eliminate 1.04% : 0.000258s : 1736: predicate.tuple_list_convert_item_index_to_positive 1.02% : 0.000254s : 1742: predicate.tuple_list_get_item_const_eliminator 0.91% : 0.000225s : 1742: predicate.tuple_list_get_item_depend_reorder 1.86% : 0.000460s : 2966: predicate.tuple_list_get_item_eliminator 0.98% : 0.000243s : 1742: predicate.tuple_list_get_set_item_eliminator 1.63% : 0.000404s : 2741: predicate.tuple_list_set_item_eliminator 1.12% : 0.000276s : 1967: predicate.tuple_to_list_eliminator_ 1.81% : 0.000448s : 3378: predicate.updatestate_pure_node_eliminater 2.41% : 0.000597s : 4378: predicate.updatestate_useless_node_eliminater 0.11% : 0.000027s : 185: predicate.value_based_eliminate 0.32% : 0.000080s : 535: predicate.virtual_dataset_eliminate 0.31% : 0.000078s : 533: predicate.virtual_output_eliminate 0.12% : 0.000030s : 185: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.065863 841 68.11% : 0.044859s : 383: func_graph_cloner_run.FuncGraphClonerGraph 2.35% : 0.001547s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.54% : 0.019457s : 436: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.652960 350 0.00% : 0.000005s : 1: ForceFp32Comm 0.24% : 0.040437s : 1: a1a2 0.00% : 0.000200s : 1: add_cache_embedding 0.00% : 0.000209s : 1: add_comm_op_reuse_tag 0.01% : 0.000882s : 1: add_recomputation 0.00% : 0.000496s : 1: assign_add_opt 0.01% : 0.002018s : 1: auto_monad 0.00% : 0.000409s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001223s : 1: bootstrap 0.00% : 0.000097s : 1: cconv 0.00% : 0.000227s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000150s : 1: convert_after_rewriter 0.00% : 0.000419s : 1: cse_after_recomputation 0.00% : 0.000079s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.01% : 0.001602s : 1: eliminate_special_op_node 0.00% : 0.000150s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000036s : 1: graph_reusing 0.00% : 0.000016s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.25% : 0.041953s : 1: inline 0.01% : 0.001196s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000720s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001020s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.19% : 0.031983s : 61: opt.transform.a1a2 0.00% : 0.000234s : 1: opt.transform.loop_unroll_optimizer 0.72% : 0.119999s : 148: opt.transform.opt_a 0.01% : 0.001041s : 1: opt.transform.opt_after_cconv 0.03% : 0.004411s : 27: opt.transform.opt_b 0.24% : 0.040546s : 20: opt.transform.opt_resolve 0.01% : 0.001196s : 1: opt.transform.opt_trans_graph 0.01% : 0.001058s : 6: opt.transform.special_op_eliminate 0.01% : 0.000910s : 4: opt.transform.symbol_engine_opt 4.75% : 0.790940s : 1: opt_a 0.01% : 0.002114s : 1: opt_after_cconv 0.03% : 0.005465s : 1: opt_b 4.86% : 0.809587s : 1: optimize 0.00% : 0.000193s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000108s : 1: order_py_execute_after_rewriter 0.00% : 0.000175s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000219s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000015s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000062s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000190s : 1: parallel-infer-symbol 0.00% : 0.000008s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000105s : 1: pipeline_split 0.00% : 0.000107s : 1: pre_auto_parallel 0.00% : 0.000125s : 1: py_interpret_to_execute 0.00% : 0.000203s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000143s : 1: remove_cast_before_assign_add 0.01% : 0.000950s : 1: remove_dup_value 0.94% : 0.156544s : 3: renormalize.infer 0.41% : 0.068682s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001132s : 1: rewriter_after_opt_a 0.01% : 0.001765s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000199s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000182s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.001033s : 1: symbol_engine_optimizer 82.83% : 13.793968s : 1: task_emit 0.01% : 0.001234s : 1: tuple_transform 4.33% : 0.720975s : 1: type_inference 0.01% : 0.001454s : 1: validate TotalTime = 15.4699, [21] [bootstrap]: 0.00117205 [type_inference]: 0.71469 [auto_monad]: 0.00184828 [graph_reusing]: 2.497e-05 [inline]: 0.0419489, [2] [rewriter_before_opt_a]: 0.00146641 [a1a2]: 0.0404475, [2] [Cycle 1]: 0.0277847, [11] [expand_dump_flag]: 3.018e-05 [switch_simplify]: 0.0010758 [loop_unroll]: 0.00067036 [a_1]: 0.0215751 [recompute_prepare]: 0.00015845 [updatestate_depend_eliminate]: 0.00034981 [updatestate_assign_eliminate]: 8.818e-05 [updatestate_loads_eliminate]: 0.00021692 [parameter_eliminate]: 4.46e-06 [a_2]: 0.00336128 [parallel_inline_pass]: 0.0001005 [Cycle 2]: 0.00548667, [11] [expand_dump_flag]: 1.08e-06 [switch_simplify]: 9.332e-05 [loop_unroll]: 9.2e-05 [a_1]: 0.00315916 [recompute_prepare]: 9.595e-05 [updatestate_depend_eliminate]: 0.00022621 [updatestate_assign_eliminate]: 6.303e-05 [updatestate_loads_eliminate]: 6.175e-05 [parameter_eliminate]: 2.67e-06 [a_2]: 0.00152148 [parallel_inline_pass]: 9.951e-05 [parallel-infer-symbol]: 0.00015528 [pre_auto_parallel]: 8.84e-05 [insert-virtual-dataset]: 0.00114077 [parallel-infer-symbol-second]: 2.07999e-06 [dataset_repeat_opt]: 7.798e-05 [pipeline_split]: 8.233e-05 [optimize]: 0.728542, [52] [py_interpret_to_execute]: 0.00011582 [rewriter_before_opt_a]: 0.00026836 [opt_a]: 0.710478, [3] [Cycle 1]: 0.609316, [46] [expand_dump_flag]: 1.54e-06 [switch_simplify]: 0.00010823 [loop_unroll]: 9.633e-05 [a_1]: 0.00328812 [recompute_prepare]: 0.00010136 [updatestate_depend_eliminate]: 0.00010315 [updatestate_assign_eliminate]: 6.194e-05 [updatestate_loads_eliminate]: 6.65e-05 [parameter_eliminate]: 2.53999e-06 [a_2]: 0.00162876 [accelerated_algorithm]: 0.00030925 [shard]: 1.98001e-06 [meta_shard_fg_expand]: 4.905e-05 [shard_inline]: 0.00010646 [auto_parallel]: 8.119e-05 [parallel]: 0.0729293 [flash_sp]: 7.374e-05 [merge_comm]: 0.00016393 [allreduce_fusion]: 9.409e-05 [matmul_add_comm_reduction]: 0.00011686 [allreduce_slice_to_reducescatter]: 3.6e-07 [virtual_shard_identity]: 0.00015359 [virtual_dataset]: 0.00018247 [get_grad_eliminate_]: 0.0001428 [virtual_output]: 0.00014073 [merge_forward]: 8.694e-05 [cell_reuse_recompute_pass]: 2.94999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024624 [before_grad]: 0.00029022 [inplace_validation]: 0.00015299 [parallel_renormalize]: 0.0212474 [update_top_fg]: 6.19999e-07 [cast_eliminate]: 0.0002094 [meta_fg_expand]: 0.284443 [inplace_validation_after_expand]: 0.00190086 [flash_sp_send_recv_attached]: 0.00148077 [receive_attached]: 8.861e-05 [after_resolve]: 0.00249923 [a_after_grad]: 0.0046523 [special_op_eliminate]: 0.00222805 [renormalize]: 0.170149 [add_forward_monad_depend]: 0.0004221 [auto_monad_grad]: 0.00025984 [auto_monad_eliminator]: 0.00219472 [cse]: 0.00521733 [a_3]: 0.0310033 [Cycle 2]: 0.0855821, [46] [expand_dump_flag]: 6.231e-05 [switch_simplify]: 0.00225153 [loop_unroll]: 0.00195874 [a_1]: 0.0380267 [recompute_prepare]: 0.00022864 [updatestate_depend_eliminate]: 0.00032394 [updatestate_assign_eliminate]: 0.00013526 [updatestate_loads_eliminate]: 0.00019173 [parameter_eliminate]: 3.25e-06 [a_2]: 0.00596618 [accelerated_algorithm]: 0.00021399 [shard]: 1.56001e-06 [meta_shard_fg_expand]: 9.188e-05 [shard_inline]: 0.00019041 [auto_parallel]: 0.00014148 [parallel]: 1.072e-05 [flash_sp]: 0.00014907 [merge_comm]: 0.00014468 [allreduce_fusion]: 0.00012074 [matmul_add_comm_reduction]: 0.00014277 [allreduce_slice_to_reducescatter]: 3.79994e-07 [virtual_shard_identity]: 0.00019469 [virtual_dataset]: 0.00018665 [get_grad_eliminate_]: 0.00018089 [virtual_output]: 0.00018436 [merge_forward]: 0.00012889 [cell_reuse_recompute_pass]: 2.34001e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031746 [before_grad]: 0.00031209 [inplace_validation]: 0.00010965 [parallel_renormalize]: 1.09998e-07 [update_top_fg]: 4.90007e-07 [cast_eliminate]: 0.00020071 [meta_fg_expand]: 0.00035032 [inplace_validation_after_expand]: 0.00023139 [flash_sp_send_recv_attached]: 2.18001e-06 [receive_attached]: 1.30999e-06 [after_resolve]: 0.00021332 [a_after_grad]: 0.00031282 [special_op_eliminate]: 0.00018556 [renormalize]: 0.0216048 [add_forward_monad_depend]: 5.64e-06 [auto_monad_grad]: 2.07999e-06 [auto_monad_eliminator]: 0.00037218 [cse]: 0.00836613 [a_3]: 0.00129792 [Cycle 3]: 0.0155592, [46] [expand_dump_flag]: 1.72999e-06 [switch_simplify]: 0.00017721 [loop_unroll]: 0.00017407 [a_1]: 0.00568731 [recompute_prepare]: 0.00018714 [updatestate_depend_eliminate]: 0.00019109 [updatestate_assign_eliminate]: 0.00012086 [updatestate_loads_eliminate]: 0.00011971 [parameter_eliminate]: 3.24001e-06 [a_2]: 0.00274756 [accelerated_algorithm]: 0.00020024 [shard]: 1.55e-06 [meta_shard_fg_expand]: 6.51e-05 [shard_inline]: 0.00021947 [auto_parallel]: 0.00014168 [parallel]: 1.026e-05 [flash_sp]: 2.47e-06 [merge_comm]: 0.0001333 [allreduce_fusion]: 0.0001223 [matmul_add_comm_reduction]: 0.00015177 [allreduce_slice_to_reducescatter]: 7.10002e-07 [virtual_shard_identity]: 0.00018499 [virtual_dataset]: 0.00017938 [get_grad_eliminate_]: 0.00017145 [virtual_output]: 0.00017663 [merge_forward]: 0.00011867 [cell_reuse_recompute_pass]: 3.13e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031268 [before_grad]: 0.00029993 [inplace_validation]: 0.0001138 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 4.29995e-07 [cast_eliminate]: 0.0001923 [meta_fg_expand]: 0.00015021 [inplace_validation_after_expand]: 0.00015055 [flash_sp_send_recv_attached]: 1.93001e-06 [receive_attached]: 1.33e-06 [after_resolve]: 0.00019563 [a_after_grad]: 0.00029273 [special_op_eliminate]: 0.00017595 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 2.19001e-06 [auto_monad_grad]: 1.54e-06 [auto_monad_eliminator]: 0.000201 [cse]: 0.00058883 [a_3]: 0.00126374 [py_interpret_to_execute_after_opt_a]: 0.00017718 [slice_cell_reuse_recomputed_activation]: 2.12e-06 [rewriter_after_opt_a]: 0.00109248 [convert_after_rewriter]: 0.00013855 [order_py_execute_after_rewriter]: 0.00010118 [opt_b]: 0.00532514, [1] [Cycle 1]: 0.00531736, [7] [b_1]: 0.00414225 [b_2]: 0.00018204 [updatestate_depend_eliminate]: 0.00012445 [updatestate_assign_eliminate]: 0.00011451 [updatestate_loads_eliminate]: 0.00014248 [renormalize]: 6.30011e-07 [cse]: 0.00054535 [optimize_parallel_all_gather_comm]: 0.00017903 [overlap_param_gather]: 9.70002e-07 [cconv]: 7.886e-05 [loop_unroll]: 0.00092539 [opt_after_cconv]: 0.00202659, [1] [Cycle 1]: 0.00201945, [7] [c_1]: 0.00102632 [parameter_eliminate]: 2.1e-06 [updatestate_depend_eliminate]: 0.00016121 [updatestate_assign_eliminate]: 0.00011964 [updatestate_loads_eliminate]: 0.00011876 [cse]: 0.00053019 [renormalize]: 5.19998e-07 [remove_dup_value]: 0.0009063 [tuple_transform]: 0.00122644, [1] [Cycle 1]: 0.00121833, [2] [d_1]: 0.00120068 [renormalize]: 2.2001e-07 [partial_unused_args_eliminate]: 2.37999e-06 [add_cache_embedding]: 0.00017822 [add_recomputation]: 0.00084153 [cse_after_recomputation]: 0.00040122, [1] [Cycle 1]: 0.00039362, [1] [cse]: 0.0003795 [environ_conv]: 0.00012005 [swap_dp_allreduce_reducescatter]: 0.00016666 [bias_add_comm_swap]: 2.22999e-06 [label_micro_interleaved_index]: 1.45999e-06 [label_fine_grained_interleaved_index]: 0.00073388 [merge_cast_opt]: 1.59e-06 [slice_recompute_activation]: 0.00018839 [micro_interleaved_order_control]: 2.04e-06 [assign_add_opt]: 0.00047424 [ForceFp32Comm]: 1.30999e-06 [remove_cast_before_assign_add]: 0.00013069 [full_micro_interleaved_order_control]: 1.82999e-06 [reorder_send_recv_between_fp_bp]: 1.67001e-06 [comm_op_add_attrs]: 0.00019867 [add_comm_op_reuse_tag]: 0.00020316 [interleave_split_concat_branches]: 7.60003e-07 [interleave_parallel_branches]: 8.60004e-07 [overlap_opt_shard_in_pipeline]: 1.047e-05 [overlap_opt_shard_grad_in_pipeline]: 2.22e-06 [control_data_broadcast_order]: 7.30011e-07 [grouped_pairwise_exchange_alltoall]: 9.43999e-06 [offloading_packed_experts]: 1.41999e-06 [overlap_recompute_and_grad_model_parallel]: 1.66999e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.10002e-07 [overlap_recompute_allgather_and_fa_grad]: 6.527e-05 [overlap_grad_ring_attention]: 0.00018941 [overlap_grad_flash_sp]: 0.0001465 [begin_end_overlap_inline]: 5.69999e-07 [split_matmul_comm_elemetwise]: 1.33e-06 [split_layernorm_comm]: 1.51999e-06 [handle_group_info]: 5.72001e-06 [symbol_engine_optimizer]: 0.00100468, [1] [Cycle 1]: 0.00099815, [6] [build]: 6.039e-05 [elim_shapecalc]: 0.00018304 [elim_not_effective]: 0.00027772 [opt_reshape]: 0.00017225 [fold_const_symbol]: 0.0002646 [renormalize]: 4.50003e-07 [pipeline_parallel_scheduler]: 2.5e-06 [auto_monad_reorder]: 0.00035064 [get_jit_bprop_graph]: 4.69998e-07 [rewriter_after_jit_bprop_graph]: 3.79994e-07 [eliminate_special_op_node]: 0.00164499 [distribtued_split]: 1.02e-06 [validate]: 0.00037382 [task_emit]: 13.9764 [execute]: 8.68e-06 Sums bootstrap : 0.001172s : 0.01% type_inference : 0.714690s : 4.62% auto_monad : 0.001848s : 0.01% graph_reusing : 0.000025s : 0.00% inline.rewriter_before_opt_a : 0.001466s : 0.01% inline.a1a2.expand_dump_flag : 0.000031s : 0.00% inline.a1a2.switch_simplify : 0.001169s : 0.01% inline.a1a2.loop_unroll : 0.000762s : 0.00% inline.a1a2.a_1 : 0.024734s : 0.16% inline.a1a2.recompute_prepare : 0.000254s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000576s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000151s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000279s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.004883s : 0.03% inline.a1a2.parallel_inline_pass : 0.000200s : 0.00% parallel-infer-symbol : 0.000155s : 0.00% pre_auto_parallel : 0.000088s : 0.00% insert-virtual-dataset : 0.001141s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000078s : 0.00% pipeline_split : 0.000082s : 0.00% optimize.py_interpret_to_execute : 0.000116s : 0.00% optimize.rewriter_before_opt_a : 0.000268s : 0.00% optimize.opt_a.expand_dump_flag : 0.000066s : 0.00% optimize.opt_a.switch_simplify : 0.002537s : 0.02% optimize.opt_a.loop_unroll : 0.002229s : 0.01% optimize.opt_a.a_1 : 0.047002s : 0.30% optimize.opt_a.recompute_prepare : 0.000517s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000618s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000318s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000378s : 0.00% optimize.opt_a.parameter_eliminate : 0.000009s : 0.00% optimize.opt_a.a_2 : 0.010342s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000723s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000206s : 0.00% optimize.opt_a.shard_inline : 0.000516s : 0.00% optimize.opt_a.auto_parallel : 0.000364s : 0.00% optimize.opt_a.parallel : 0.072950s : 0.47% optimize.opt_a.flash_sp : 0.000225s : 0.00% optimize.opt_a.merge_comm : 0.000442s : 0.00% optimize.opt_a.allreduce_fusion : 0.000337s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000411s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000533s : 0.00% optimize.opt_a.virtual_dataset : 0.000549s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000495s : 0.00% optimize.opt_a.virtual_output : 0.000502s : 0.00% optimize.opt_a.merge_forward : 0.000335s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000876s : 0.01% optimize.opt_a.before_grad : 0.000902s : 0.01% optimize.opt_a.inplace_validation : 0.000376s : 0.00% optimize.opt_a.parallel_renormalize : 0.021248s : 0.14% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000602s : 0.00% optimize.opt_a.meta_fg_expand : 0.284944s : 1.84% optimize.opt_a.inplace_validation_after_expand : 0.002283s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001485s : 0.01% optimize.opt_a.receive_attached : 0.000091s : 0.00% optimize.opt_a.after_resolve : 0.002908s : 0.02% optimize.opt_a.a_after_grad : 0.005258s : 0.03% optimize.opt_a.special_op_eliminate : 0.002590s : 0.02% optimize.opt_a.renormalize : 0.191754s : 1.24% optimize.opt_a.add_forward_monad_depend : 0.000430s : 0.00% optimize.opt_a.auto_monad_grad : 0.000263s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002768s : 0.02% optimize.opt_a.cse : 0.014172s : 0.09% optimize.opt_a.a_3 : 0.033565s : 0.22% optimize.py_interpret_to_execute_after_opt_a : 0.000177s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001092s : 0.01% optimize.convert_after_rewriter : 0.000139s : 0.00% optimize.order_py_execute_after_rewriter : 0.000101s : 0.00% optimize.opt_b.b_1 : 0.004142s : 0.03% optimize.opt_b.b_2 : 0.000182s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000124s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000115s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000142s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000545s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000179s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000079s : 0.00% optimize.loop_unroll : 0.000925s : 0.01% optimize.opt_after_cconv.c_1 : 0.001026s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000161s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000120s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000119s : 0.00% optimize.opt_after_cconv.cse : 0.000530s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000906s : 0.01% optimize.tuple_transform.d_1 : 0.001201s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000178s : 0.00% optimize.add_recomputation : 0.000842s : 0.01% optimize.cse_after_recomputation.cse : 0.000380s : 0.00% optimize.environ_conv : 0.000120s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000167s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000734s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000188s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000474s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000131s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000199s : 0.00% optimize.add_comm_op_reuse_tag : 0.000203s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000010s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000065s : 0.00% optimize.overlap_grad_ring_attention : 0.000189s : 0.00% optimize.overlap_grad_flash_sp : 0.000146s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000060s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000183s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000278s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000172s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000265s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000351s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001645s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000374s : 0.00% task_emit : 13.976396s : 90.41% execute : 0.000009s : 0.00% Time group info: ------[substitution.] 0.052901 4903 0.03% : 0.000017s : 4: substitution.ad_related_special_op_eliminate 0.04% : 0.000022s : 9: substitution.addn_check_dump 0.09% : 0.000049s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.55% : 0.000291s : 71: substitution.arithmetic_simplify 0.09% : 0.000048s : 10: substitution.cast_eliminate 0.10% : 0.000052s : 47: substitution.depend_value_elim 0.07% : 0.000036s : 127: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.04% : 0.000023s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000016s : 12: substitution.environ_get_depend_swap 0.06% : 0.000030s : 27: substitution.environ_get_eliminate 0.06% : 0.000032s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000023s : 28: substitution.float_depend_g_call 0.02% : 0.000009s : 12: substitution.float_environ_get_switch 0.02% : 0.000011s : 10: substitution.float_tuple_getitem_switch 0.07% : 0.000036s : 127: substitution.fold_const_symbol 64.63% : 0.034188s : 290: substitution.getattr_setattr_resolve 0.20% : 0.000105s : 166: substitution.graph_param_transform 0.01% : 0.000007s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.58% : 0.012473s : 363: substitution.inline 1.49% : 0.000786s : 127: substitution.inline_without_move 0.27% : 0.000144s : 361: substitution.j_node_and_user_rematch 0.35% : 0.000186s : 40: substitution.less_batch_normalization 0.08% : 0.000044s : 90: substitution.load_eliminater 0.09% : 0.000048s : 10: substitution.merge_addn 0.23% : 0.000122s : 115: substitution.minmaximum_grad 0.01% : 0.000003s : 10: substitution.opt_reshape 0.02% : 0.000013s : 1: substitution.partial_defer_inline 0.12% : 0.000062s : 28: substitution.partial_eliminate 0.04% : 0.000022s : 35: substitution.reduce_all_const_elim 0.06% : 0.000030s : 15: substitution.reduce_eliminate 0.34% : 0.000182s : 361: substitution.remove_not_recompute_node 2.18% : 0.001155s : 612: substitution.replace_applicator 0.25% : 0.000134s : 324: substitution.replace_old_param 0.20% : 0.000104s : 31: substitution.reshape_eliminate 0.02% : 0.000012s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000009s : 4: substitution.specialize_transform 0.03% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000085s : 36: substitution.switch_simplify 0.05% : 0.000025s : 11: substitution.tile_eliminate 0.52% : 0.000277s : 115: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000145s : 121: substitution.tuple_list_get_item_const_eliminator 0.41% : 0.000219s : 121: substitution.tuple_list_get_item_depend_reorder 1.62% : 0.000858s : 356: substitution.tuple_list_get_item_eliminator 0.36% : 0.000192s : 121: substitution.tuple_list_get_set_item_eliminator 0.36% : 0.000192s : 210: substitution.updatestate_pure_node_eliminater 0.63% : 0.000334s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.714242 2 96.62% : 0.690084s : 1: type_inference.infer 3.38% : 0.024158s : 1: type_inference.specialize ------[replace.] 0.010531 851 0.32% : 0.000034s : 4: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.48% : 0.000050s : 7: replace.depend_value_elim 0.37% : 0.000039s : 3: replace.environ_get_set_eliminate 27.57% : 0.002903s : 189: replace.getattr_setattr_resolve 30.20% : 0.003180s : 342: replace.inline 0.21% : 0.000023s : 1: replace.merge_addn 1.03% : 0.000109s : 7: replace.partial_eliminate 4.10% : 0.000431s : 28: replace.replace_applicator 3.66% : 0.000385s : 36: replace.switch_simplify 0.47% : 0.000049s : 6: replace.tuple_list_get_item_depend_reorder 31.22% : 0.003288s : 225: replace.tuple_list_get_item_eliminator 0.15% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.16% : 0.000017s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042414 851 0.03% : 0.000013s : 4: match.ad_related_special_op_eliminate 0.02% : 0.000009s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 69.30% : 0.029393s : 189: match.getattr_setattr_resolve 28.87% : 0.012245s : 342: match.inline 0.05% : 0.000021s : 1: match.merge_addn 0.08% : 0.000033s : 7: match.partial_eliminate 0.25% : 0.000105s : 28: match.replace_applicator 0.15% : 0.000065s : 36: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.09% : 0.000462s : 225: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.024473157546 0.72% : 0.000175s : 1381: predicate.accumulaten_eliminater 0.29% : 0.000071s : 333: predicate.ad_related_special_op_eliminate 0.55% : 0.000135s : 997: predicate.addn_check_dump 0.72% : 0.000176s : 1381: predicate.addn_zero_filter 0.73% : 0.000179s : 1381: predicate.adjust_all_reduce_mul_add 1.70% : 0.000416s : 2379: predicate.arithmetic_simplify 1.12% : 0.000275s : 1905: predicate.cast_eliminate 3.63% : 0.000890s : 4402: predicate.check_bprop_eliminate 0.56% : 0.000137s : 997: predicate.compare_switch_simplify 0.06% : 0.000015s : 183: predicate.const_output_eliminate 0.19% : 0.000047s : 324: predicate.convert_tensor_all_eliminate 1.10% : 0.000269s : 1616: predicate.convert_tensor_eliminate 0.57% : 0.000139s : 1000: predicate.depend_value_elim 0.79% : 0.000194s : 1385: predicate.dict_get_item_const_eliminator 0.82% : 0.000202s : 1385: predicate.dict_get_item_eliminator 0.80% : 0.000195s : 1385: predicate.dict_set_item_eliminator 0.05% : 0.000012s : 166: predicate.elim_not_effective 0.11% : 0.000028s : 166: predicate.elim_shapecalc_of_broadcastargs 0.81% : 0.000199s : 1565: predicate.environ_add_const_eliminate 0.83% : 0.000202s : 1568: predicate.environ_get_add_eliminate 0.81% : 0.000198s : 1565: predicate.environ_get_depend_swap 1.44% : 0.000352s : 2565: predicate.environ_get_eliminate 0.85% : 0.000209s : 1568: predicate.environ_get_set_eliminate 1.06% : 0.000260s : 1966: predicate.exchange_switch_depend_value 1.36% : 0.000332s : 1966: predicate.float_depend_g_call 0.56% : 0.000137s : 997: predicate.float_environ_get_switch 0.66% : 0.000162s : 1180: predicate.float_tuple_getitem_switch 0.05% : 0.000012s : 166: predicate.fold_const_symbol 0.32% : 0.000077s : 531: predicate.get_grad_eliminate 2.11% : 0.000517s : 2090: predicate.getattr_setattr_resolve 0.06% : 0.000015s : 166: predicate.graph_param_transform 0.55% : 0.000134s : 997: predicate.incorporate_call 0.54% : 0.000132s : 997: predicate.incorporate_call_switch 3.99% : 0.000976s : 5397: predicate.inline 2.41% : 0.000590s : 2683: predicate.inline_without_move 0.38% : 0.000094s : 531: predicate.j_node_and_user_rematch 0.35% : 0.000085s : 493: predicate.less_batch_normalization 1.09% : 0.000266s : 1965: predicate.list_to_tuple_eliminator_ 1.82% : 0.000445s : 3370: predicate.load_eliminater 0.21% : 0.000050s : 183: predicate.loop_unroll_after_grad 2.31% : 0.000566s : 3091: predicate.loop_unroll_before_grad 0.94% : 0.000229s : 1757: predicate.make_slice_get_slice_eliminator 0.56% : 0.000138s : 999: predicate.merge_addn 3.23% : 0.000791s : 4286: predicate.micro_step_allgather_replace 3.25% : 0.000795s : 4286: predicate.mini_step_allgather_replace 0.73% : 0.000180s : 1382: predicate.minmaximum_grad 0.20% : 0.000048s : 324: predicate.mutable_eliminate 0.10% : 0.000025s : 166: predicate.opt_reshape 0.11% : 0.000028s : 183: predicate.parallel_virtual_node 1.95% : 0.000477s : 1966: predicate.partial_defer_inline 1.06% : 0.000259s : 1806: predicate.partial_eliminate 0.74% : 0.000180s : 1381: predicate.print_const_string_wrapper 0.58% : 0.000142s : 986: predicate.reduce_all_const_elim 0.91% : 0.000222s : 1382: predicate.reduce_eliminate 0.16% : 0.000040s : 531: predicate.remove_not_recompute_node 2.07% : 0.000506s : 5958: predicate.replace_applicator 0.81% : 0.000199s : 2683: predicate.replace_old_param 0.06% : 0.000015s : 183: predicate.reset_defer_inline 0.75% : 0.000184s : 1382: predicate.reshape_eliminate 3.27% : 0.000800s : 4286: predicate.row_tensor_add_zeros_like 0.12% : 0.000029s : 183: predicate.row_tensor_eliminate 3.42% : 0.000838s : 4402: predicate.same_eliminate 0.24% : 0.000058s : 738: predicate.set_cell_output_no_recompute 0.32% : 0.000079s : 531: predicate.shard_identity_eliminate 2.22% : 0.000543s : 2866: predicate.special_op_eliminate 0.63% : 0.000154s : 999: predicate.specialize_transform 3.56% : 0.000870s : 4286: predicate.split_environ_get_set_with_tuple_value 1.62% : 0.000397s : 2683: predicate.stack_unstack_eliminate 1.80% : 0.000441s : 3370: predicate.stopgrad_eliminater 0.10% : 0.000025s : 183: predicate.switch_call_monad_eliminater 1.21% : 0.000296s : 1966: predicate.switch_defer_inline 4.54% : 0.001111s : 6368: predicate.switch_layer_defer_inline 4.45% : 0.001089s : 6128: predicate.switch_simplify 0.79% : 0.000193s : 1382: predicate.tile_eliminate 0.70% : 0.000172s : 1382: predicate.transpose_eliminate 1.04% : 0.000254s : 1734: predicate.tuple_list_convert_item_index_to_positive 1.05% : 0.000257s : 1740: predicate.tuple_list_get_item_const_eliminator 0.94% : 0.000229s : 1740: predicate.tuple_list_get_item_depend_reorder 1.93% : 0.000473s : 2962: predicate.tuple_list_get_item_eliminator 0.96% : 0.000234s : 1740: predicate.tuple_list_get_set_item_eliminator 1.63% : 0.000398s : 2737: predicate.tuple_list_set_item_eliminator 1.06% : 0.000258s : 1965: predicate.tuple_to_list_eliminator_ 1.80% : 0.000441s : 3370: predicate.updatestate_pure_node_eliminater 2.45% : 0.000600s : 4368: predicate.updatestate_useless_node_eliminater 0.11% : 0.000028s : 183: predicate.value_based_eliminate 0.32% : 0.000078s : 533: predicate.virtual_dataset_eliminate 0.31% : 0.000075s : 531: predicate.virtual_output_eliminate 0.12% : 0.000029s : 183: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.062854 841 68.06% : 0.042778s : 383: func_graph_cloner_run.FuncGraphClonerGraph 2.25% : 0.001416s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.69% : 0.018660s : 436: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.646428 350 0.00% : 0.000005s : 1: ForceFp32Comm 0.24% : 0.040452s : 1: a1a2 0.00% : 0.000186s : 1: add_cache_embedding 0.00% : 0.000211s : 1: add_comm_op_reuse_tag 0.01% : 0.000855s : 1: add_recomputation 0.00% : 0.000485s : 1: assign_add_opt 0.01% : 0.001871s : 1: auto_monad 0.00% : 0.000365s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001217s : 1: bootstrap 0.00% : 0.000086s : 1: cconv 0.00% : 0.000207s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000146s : 1: convert_after_rewriter 0.00% : 0.000406s : 1: cse_after_recomputation 0.00% : 0.000086s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.001661s : 1: eliminate_special_op_node 0.00% : 0.000129s : 1: environ_conv 0.00% : 0.000017s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000033s : 1: graph_reusing 0.00% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000009s : 1: handle_group_info 0.25% : 0.041958s : 1: inline 0.01% : 0.001161s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000746s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000937s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.19% : 0.031936s : 61: opt.transform.a1a2 0.00% : 0.000232s : 1: opt.transform.loop_unroll_optimizer 0.68% : 0.112391s : 148: opt.transform.opt_a 0.01% : 0.001024s : 1: opt.transform.opt_after_cconv 0.03% : 0.004291s : 27: opt.transform.opt_b 0.24% : 0.040212s : 20: opt.transform.opt_resolve 0.01% : 0.001197s : 1: opt.transform.opt_trans_graph 0.01% : 0.001050s : 6: opt.transform.special_op_eliminate 0.01% : 0.000892s : 4: opt.transform.symbol_engine_opt 4.27% : 0.710483s : 1: opt_a 0.01% : 0.002032s : 1: opt_after_cconv 0.03% : 0.005330s : 1: opt_b 4.38% : 0.728553s : 1: optimize 0.00% : 0.000188s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000107s : 1: order_py_execute_after_rewriter 0.00% : 0.000151s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000195s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000014s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000069s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000165s : 1: parallel-infer-symbol 0.00% : 0.000008s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000092s : 1: pipeline_split 0.00% : 0.000097s : 1: pre_auto_parallel 0.00% : 0.000123s : 1: py_interpret_to_execute 0.00% : 0.000187s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000137s : 1: remove_cast_before_assign_add 0.01% : 0.000921s : 1: remove_dup_value 0.87% : 0.145063s : 3: renormalize.infer 0.41% : 0.067898s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001104s : 1: rewriter_after_opt_a 0.01% : 0.001756s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000196s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000175s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.001008s : 1: symbol_engine_optimizer 83.96% : 13.976428s : 1: task_emit 0.01% : 0.001231s : 1: tuple_transform 4.29% : 0.714720s : 1: type_inference 0.01% : 0.001419s : 1: validate TotalTime = 15.5262, [21] [bootstrap]: 0.00131662 [type_inference]: 0.736022 [auto_monad]: 0.00189054 [graph_reusing]: 2.594e-05 [inline]: 0.0426565, [2] [rewriter_before_opt_a]: 0.0015007 [a1a2]: 0.0411171, [2] [Cycle 1]: 0.0283412, [11] [expand_dump_flag]: 6.323e-05 [switch_simplify]: 0.00105578 [loop_unroll]: 0.0006722 [a_1]: 0.0222055 [recompute_prepare]: 0.00016264 [updatestate_depend_eliminate]: 0.00036371 [updatestate_assign_eliminate]: 9.195e-05 [updatestate_loads_eliminate]: 0.00020818 [parameter_eliminate]: 7.23e-06 [a_2]: 0.00322913 [parallel_inline_pass]: 0.00010208 [Cycle 2]: 0.00527969, [11] [expand_dump_flag]: 1.24e-06 [switch_simplify]: 9.263e-05 [loop_unroll]: 9.202e-05 [a_1]: 0.00311496 [recompute_prepare]: 9.767e-05 [updatestate_depend_eliminate]: 7.3e-05 [updatestate_assign_eliminate]: 6.193e-05 [updatestate_loads_eliminate]: 6.256e-05 [parameter_eliminate]: 3.27e-06 [a_2]: 0.00150859 [parallel_inline_pass]: 9.945e-05 [parallel-infer-symbol]: 0.00017291 [pre_auto_parallel]: 0.00010578 [insert-virtual-dataset]: 0.00114376 [parallel-infer-symbol-second]: 2.52e-06 [dataset_repeat_opt]: 8.694e-05 [pipeline_split]: 0.00010216 [optimize]: 0.735653, [52] [py_interpret_to_execute]: 0.0001154 [rewriter_before_opt_a]: 0.00027299 [opt_a]: 0.717017, [3] [Cycle 1]: 0.613055, [46] [expand_dump_flag]: 2.11e-06 [switch_simplify]: 0.000111 [loop_unroll]: 9.794e-05 [a_1]: 0.00329977 [recompute_prepare]: 0.00010393 [updatestate_depend_eliminate]: 9.827e-05 [updatestate_assign_eliminate]: 6.38e-05 [updatestate_loads_eliminate]: 6.972e-05 [parameter_eliminate]: 3.50999e-06 [a_2]: 0.00164325 [accelerated_algorithm]: 0.00023994 [shard]: 2.11e-06 [meta_shard_fg_expand]: 4.865e-05 [shard_inline]: 0.00010803 [auto_parallel]: 7.409e-05 [parallel]: 0.0527706 [flash_sp]: 7.801e-05 [merge_comm]: 0.00017175 [allreduce_fusion]: 9.86e-05 [matmul_add_comm_reduction]: 0.00012024 [allreduce_slice_to_reducescatter]: 5.19998e-07 [virtual_shard_identity]: 0.00015307 [virtual_dataset]: 0.00018819 [get_grad_eliminate_]: 0.00014336 [virtual_output]: 0.00014058 [merge_forward]: 9.106e-05 [cell_reuse_recompute_pass]: 3.11001e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031313 [before_grad]: 0.00023952 [inplace_validation]: 0.00016311 [parallel_renormalize]: 0.0217903 [update_top_fg]: 8.29998e-07 [cast_eliminate]: 0.00021106 [meta_fg_expand]: 0.292561 [inplace_validation_after_expand]: 0.0019128 [flash_sp_send_recv_attached]: 0.00147832 [receive_attached]: 8.456e-05 [after_resolve]: 0.00245144 [a_after_grad]: 0.00466229 [special_op_eliminate]: 0.00229724 [renormalize]: 0.183562 [add_forward_monad_depend]: 0.00037843 [auto_monad_grad]: 0.00027369 [auto_monad_eliminator]: 0.00222465 [cse]: 0.00552199 [a_3]: 0.032407 [Cycle 2]: 0.0880825, [46] [expand_dump_flag]: 6.742e-05 [switch_simplify]: 0.0023232 [loop_unroll]: 0.00198544 [a_1]: 0.0394583 [recompute_prepare]: 0.00023662 [updatestate_depend_eliminate]: 0.00033519 [updatestate_assign_eliminate]: 0.00013554 [updatestate_loads_eliminate]: 0.00019612 [parameter_eliminate]: 4.65999e-06 [a_2]: 0.00603151 [accelerated_algorithm]: 0.00022117 [shard]: 2.66e-06 [meta_shard_fg_expand]: 9.78e-05 [shard_inline]: 0.00019661 [auto_parallel]: 0.00014117 [parallel]: 1.477e-05 [flash_sp]: 0.00014923 [merge_comm]: 0.00014448 [allreduce_fusion]: 0.00011933 [matmul_add_comm_reduction]: 0.00014108 [allreduce_slice_to_reducescatter]: 5.10001e-07 [virtual_shard_identity]: 0.00019446 [virtual_dataset]: 0.00019186 [get_grad_eliminate_]: 0.00017969 [virtual_output]: 0.00018698 [merge_forward]: 0.00011677 [cell_reuse_recompute_pass]: 2.89999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00032134 [before_grad]: 0.00030936 [inplace_validation]: 0.00011152 [parallel_renormalize]: 1.09998e-07 [update_top_fg]: 6.19999e-07 [cast_eliminate]: 0.00020688 [meta_fg_expand]: 0.00036669 [inplace_validation_after_expand]: 0.00023656 [flash_sp_send_recv_attached]: 2.48e-06 [receive_attached]: 1.92001e-06 [after_resolve]: 0.00021588 [a_after_grad]: 0.00034288 [special_op_eliminate]: 0.00018796 [renormalize]: 0.0221878 [add_forward_monad_depend]: 4.89999e-06 [auto_monad_grad]: 2.99001e-06 [auto_monad_eliminator]: 0.00038058 [cse]: 0.00843694 [a_3]: 0.00137721 [Cycle 3]: 0.0158547, [46] [expand_dump_flag]: 2.37999e-06 [switch_simplify]: 0.00017887 [loop_unroll]: 0.00017507 [a_1]: 0.00569381 [recompute_prepare]: 0.0001867 [updatestate_depend_eliminate]: 0.00019036 [updatestate_assign_eliminate]: 0.0001227 [updatestate_loads_eliminate]: 0.00012105 [parameter_eliminate]: 2.98e-06 [a_2]: 0.00290908 [accelerated_algorithm]: 0.00020707 [shard]: 1.54e-06 [meta_shard_fg_expand]: 6.407e-05 [shard_inline]: 0.00018114 [auto_parallel]: 0.00014117 [parallel]: 1.061e-05 [flash_sp]: 2.16e-06 [merge_comm]: 0.00013641 [allreduce_fusion]: 0.00012304 [matmul_add_comm_reduction]: 0.00015242 [allreduce_slice_to_reducescatter]: 5.69999e-07 [virtual_shard_identity]: 0.0001878 [virtual_dataset]: 0.00021301 [get_grad_eliminate_]: 0.00017406 [virtual_output]: 0.00017629 [merge_forward]: 0.00012372 [cell_reuse_recompute_pass]: 2.91e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031485 [before_grad]: 0.00030039 [inplace_validation]: 0.00011825 [parallel_renormalize]: 1.19995e-07 [update_top_fg]: 6.19999e-07 [cast_eliminate]: 0.00019497 [meta_fg_expand]: 0.0001491 [inplace_validation_after_expand]: 0.00015111 [flash_sp_send_recv_attached]: 1.81e-06 [receive_attached]: 2.02e-06 [after_resolve]: 0.00019658 [a_after_grad]: 0.0002948 [special_op_eliminate]: 0.00017764 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 2.28001e-06 [auto_monad_grad]: 1.37e-06 [auto_monad_eliminator]: 0.00020712 [cse]: 0.00056125 [a_3]: 0.00135346 [py_interpret_to_execute_after_opt_a]: 0.00018395 [slice_cell_reuse_recomputed_activation]: 2.37e-06 [rewriter_after_opt_a]: 0.00116348 [convert_after_rewriter]: 0.00014152 [order_py_execute_after_rewriter]: 0.00010887 [opt_b]: 0.00544381, [1] [Cycle 1]: 0.00543443, [7] [b_1]: 0.00427026 [b_2]: 0.00018378 [updatestate_depend_eliminate]: 0.00012783 [updatestate_assign_eliminate]: 0.00011541 [updatestate_loads_eliminate]: 0.00011883 [renormalize]: 5.60001e-07 [cse]: 0.00055067 [optimize_parallel_all_gather_comm]: 0.00024757 [overlap_param_gather]: 1.21999e-06 [cconv]: 9.011e-05 [loop_unroll]: 0.00099623 [opt_after_cconv]: 0.00205666, [1] [Cycle 1]: 0.00204808, [7] [c_1]: 0.00103213 [parameter_eliminate]: 4.61e-06 [updatestate_depend_eliminate]: 0.00016535 [updatestate_assign_eliminate]: 0.00012082 [updatestate_loads_eliminate]: 0.00012097 [cse]: 0.00053815 [renormalize]: 6.10002e-07 [remove_dup_value]: 0.00096593 [tuple_transform]: 0.00121783, [1] [Cycle 1]: 0.00120832, [2] [d_1]: 0.00118547 [renormalize]: 5.60001e-07 [partial_unused_args_eliminate]: 3.25e-06 [add_cache_embedding]: 0.00019299 [add_recomputation]: 0.00086799 [cse_after_recomputation]: 0.00041274, [1] [Cycle 1]: 0.00040359, [1] [cse]: 0.00038789 [environ_conv]: 0.00011928 [swap_dp_allreduce_reducescatter]: 0.00017235 [bias_add_comm_swap]: 2.95999e-06 [label_micro_interleaved_index]: 1.96999e-06 [label_fine_grained_interleaved_index]: 0.00069783 [merge_cast_opt]: 2.07e-06 [slice_recompute_activation]: 0.00019606 [micro_interleaved_order_control]: 2.47001e-06 [assign_add_opt]: 0.00048707 [ForceFp32Comm]: 1.5e-06 [remove_cast_before_assign_add]: 0.00013712 [full_micro_interleaved_order_control]: 2.24001e-06 [reorder_send_recv_between_fp_bp]: 1.75e-06 [comm_op_add_attrs]: 0.00020962 [add_comm_op_reuse_tag]: 0.00020275 [interleave_split_concat_branches]: 1.08e-06 [interleave_parallel_branches]: 8.99992e-07 [overlap_opt_shard_in_pipeline]: 9.98001e-06 [overlap_opt_shard_grad_in_pipeline]: 3.73001e-06 [control_data_broadcast_order]: 1.41999e-06 [grouped_pairwise_exchange_alltoall]: 1.303e-05 [offloading_packed_experts]: 2.59001e-06 [overlap_recompute_and_grad_model_parallel]: 2.35e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.7e-07 [overlap_recompute_allgather_and_fa_grad]: 7.457e-05 [overlap_grad_ring_attention]: 0.00017892 [overlap_grad_flash_sp]: 0.00014773 [begin_end_overlap_inline]: 9.49993e-07 [split_matmul_comm_elemetwise]: 2.37999e-06 [split_layernorm_comm]: 2.4e-06 [handle_group_info]: 7.66e-06 [symbol_engine_optimizer]: 0.00101881, [1] [Cycle 1]: 0.00101101, [6] [build]: 6.182e-05 [elim_shapecalc]: 0.00018348 [elim_not_effective]: 0.00028242 [opt_reshape]: 0.00017169 [fold_const_symbol]: 0.00026596 [renormalize]: 4.40006e-07 [pipeline_parallel_scheduler]: 4.24001e-06 [auto_monad_reorder]: 0.0003733 [get_jit_bprop_graph]: 6.10002e-07 [rewriter_after_jit_bprop_graph]: 4.69998e-07 [eliminate_special_op_node]: 0.00163016 [distribtued_split]: 4.23001e-06 [validate]: 0.00037398 [task_emit]: 14.0031 [execute]: 1.261e-05 Sums bootstrap : 0.001317s : 0.01% type_inference : 0.736022s : 4.74% auto_monad : 0.001891s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001501s : 0.01% inline.a1a2.expand_dump_flag : 0.000064s : 0.00% inline.a1a2.switch_simplify : 0.001148s : 0.01% inline.a1a2.loop_unroll : 0.000764s : 0.00% inline.a1a2.a_1 : 0.025320s : 0.16% inline.a1a2.recompute_prepare : 0.000260s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000437s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000154s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000271s : 0.00% inline.a1a2.parameter_eliminate : 0.000011s : 0.00% inline.a1a2.a_2 : 0.004738s : 0.03% inline.a1a2.parallel_inline_pass : 0.000202s : 0.00% parallel-infer-symbol : 0.000173s : 0.00% pre_auto_parallel : 0.000106s : 0.00% insert-virtual-dataset : 0.001144s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000087s : 0.00% pipeline_split : 0.000102s : 0.00% optimize.py_interpret_to_execute : 0.000115s : 0.00% optimize.rewriter_before_opt_a : 0.000273s : 0.00% optimize.opt_a.expand_dump_flag : 0.000072s : 0.00% optimize.opt_a.switch_simplify : 0.002613s : 0.02% optimize.opt_a.loop_unroll : 0.002258s : 0.01% optimize.opt_a.a_1 : 0.048452s : 0.31% optimize.opt_a.recompute_prepare : 0.000527s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000624s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000322s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000387s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.010584s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000668s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000211s : 0.00% optimize.opt_a.shard_inline : 0.000486s : 0.00% optimize.opt_a.auto_parallel : 0.000356s : 0.00% optimize.opt_a.parallel : 0.052796s : 0.34% optimize.opt_a.flash_sp : 0.000229s : 0.00% optimize.opt_a.merge_comm : 0.000453s : 0.00% optimize.opt_a.allreduce_fusion : 0.000341s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000414s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000535s : 0.00% optimize.opt_a.virtual_dataset : 0.000593s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000497s : 0.00% optimize.opt_a.virtual_output : 0.000504s : 0.00% optimize.opt_a.merge_forward : 0.000332s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000949s : 0.01% optimize.opt_a.before_grad : 0.000849s : 0.01% optimize.opt_a.inplace_validation : 0.000393s : 0.00% optimize.opt_a.parallel_renormalize : 0.021790s : 0.14% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000613s : 0.00% optimize.opt_a.meta_fg_expand : 0.293077s : 1.89% optimize.opt_a.inplace_validation_after_expand : 0.002300s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001483s : 0.01% optimize.opt_a.receive_attached : 0.000089s : 0.00% optimize.opt_a.after_resolve : 0.002864s : 0.02% optimize.opt_a.a_after_grad : 0.005300s : 0.03% optimize.opt_a.special_op_eliminate : 0.002663s : 0.02% optimize.opt_a.renormalize : 0.205750s : 1.33% optimize.opt_a.add_forward_monad_depend : 0.000386s : 0.00% optimize.opt_a.auto_monad_grad : 0.000278s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002812s : 0.02% optimize.opt_a.cse : 0.014520s : 0.09% optimize.opt_a.a_3 : 0.035138s : 0.23% optimize.py_interpret_to_execute_after_opt_a : 0.000184s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001163s : 0.01% optimize.convert_after_rewriter : 0.000142s : 0.00% optimize.order_py_execute_after_rewriter : 0.000109s : 0.00% optimize.opt_b.b_1 : 0.004270s : 0.03% optimize.opt_b.b_2 : 0.000184s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000128s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000115s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000119s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000551s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000248s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000090s : 0.00% optimize.loop_unroll : 0.000996s : 0.01% optimize.opt_after_cconv.c_1 : 0.001032s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000165s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000121s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000121s : 0.00% optimize.opt_after_cconv.cse : 0.000538s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000966s : 0.01% optimize.tuple_transform.d_1 : 0.001185s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000193s : 0.00% optimize.add_recomputation : 0.000868s : 0.01% optimize.cse_after_recomputation.cse : 0.000388s : 0.00% optimize.environ_conv : 0.000119s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000172s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000698s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000196s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000487s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000137s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000210s : 0.00% optimize.add_comm_op_reuse_tag : 0.000203s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000010s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000013s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000075s : 0.00% optimize.overlap_grad_ring_attention : 0.000179s : 0.00% optimize.overlap_grad_flash_sp : 0.000148s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000008s : 0.00% optimize.symbol_engine_optimizer.build : 0.000062s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000183s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000282s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000172s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000266s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000373s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001630s : 0.01% distribtued_split : 0.000004s : 0.00% validate : 0.000374s : 0.00% task_emit : 14.003127s : 90.26% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.054565 4901 0.03% : 0.000019s : 4: substitution.ad_related_special_op_eliminate 0.04% : 0.000023s : 9: substitution.addn_check_dump 0.10% : 0.000054s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.59% : 0.000324s : 71: substitution.arithmetic_simplify 0.10% : 0.000055s : 10: substitution.cast_eliminate 0.10% : 0.000055s : 47: substitution.depend_value_elim 0.07% : 0.000037s : 127: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000025s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.05% : 0.000029s : 27: substitution.environ_get_eliminate 0.06% : 0.000034s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000024s : 28: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.02% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.07% : 0.000036s : 127: substitution.fold_const_symbol 63.76% : 0.034789s : 290: substitution.getattr_setattr_resolve 0.20% : 0.000108s : 164: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000006s : 8: substitution.incorporate_call_switch 24.30% : 0.013258s : 363: substitution.inline 1.47% : 0.000801s : 127: substitution.inline_without_move 0.27% : 0.000146s : 361: substitution.j_node_and_user_rematch 0.24% : 0.000133s : 40: substitution.less_batch_normalization 0.09% : 0.000047s : 90: substitution.load_eliminater 0.10% : 0.000055s : 10: substitution.merge_addn 0.23% : 0.000125s : 115: substitution.minmaximum_grad 0.01% : 0.000003s : 10: substitution.opt_reshape 0.04% : 0.000020s : 1: substitution.partial_defer_inline 0.14% : 0.000079s : 28: substitution.partial_eliminate 0.04% : 0.000022s : 35: substitution.reduce_all_const_elim 0.07% : 0.000036s : 15: substitution.reduce_eliminate 0.34% : 0.000183s : 361: substitution.remove_not_recompute_node 2.29% : 0.001249s : 612: substitution.replace_applicator 0.25% : 0.000138s : 324: substitution.replace_old_param 0.20% : 0.000109s : 31: substitution.reshape_eliminate 0.02% : 0.000014s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000090s : 36: substitution.switch_simplify 0.05% : 0.000029s : 11: substitution.tile_eliminate 0.53% : 0.000288s : 115: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000146s : 121: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000229s : 121: substitution.tuple_list_get_item_depend_reorder 1.65% : 0.000899s : 356: substitution.tuple_list_get_item_eliminator 0.37% : 0.000203s : 121: substitution.tuple_list_get_set_item_eliminator 0.36% : 0.000196s : 210: substitution.updatestate_pure_node_eliminater 0.63% : 0.000344s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.735563 2 96.60% : 0.710590s : 1: type_inference.infer 3.40% : 0.024973s : 1: type_inference.specialize ------[replace.] 0.011382 851 0.34% : 0.000038s : 4: replace.ad_related_special_op_eliminate 0.06% : 0.000007s : 1: replace.arithmetic_simplify 0.42% : 0.000047s : 7: replace.depend_value_elim 0.37% : 0.000042s : 3: replace.environ_get_set_eliminate 28.89% : 0.003288s : 189: replace.getattr_setattr_resolve 30.82% : 0.003508s : 342: replace.inline 0.21% : 0.000024s : 1: replace.merge_addn 1.06% : 0.000121s : 7: replace.partial_eliminate 3.88% : 0.000441s : 28: replace.replace_applicator 3.51% : 0.000400s : 36: replace.switch_simplify 0.45% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 29.67% : 0.003377s : 225: replace.tuple_list_get_item_eliminator 0.15% : 0.000017s : 1: replace.updatestate_useless_node_eliminater 0.18% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.043761 851 0.03% : 0.000015s : 4: match.ad_related_special_op_eliminate 0.03% : 0.000012s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 68.35% : 0.029913s : 189: match.getattr_setattr_resolve 29.70% : 0.012999s : 342: match.inline 0.06% : 0.000025s : 1: match.merge_addn 0.10% : 0.000042s : 7: match.partial_eliminate 0.27% : 0.000117s : 28: match.replace_applicator 0.16% : 0.000070s : 36: match.switch_simplify 0.07% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 1.14% : 0.000497s : 225: match.tuple_list_get_item_eliminator 0.02% : 0.000010s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.024947157640 0.75% : 0.000188s : 1382: predicate.accumulaten_eliminater 0.28% : 0.000069s : 329: predicate.ad_related_special_op_eliminate 0.55% : 0.000137s : 998: predicate.addn_check_dump 0.73% : 0.000183s : 1382: predicate.addn_zero_filter 0.72% : 0.000179s : 1382: predicate.adjust_all_reduce_mul_add 1.78% : 0.000444s : 2381: predicate.arithmetic_simplify 1.09% : 0.000272s : 1907: predicate.cast_eliminate 3.43% : 0.000855s : 4404: predicate.check_bprop_eliminate 0.56% : 0.000139s : 998: predicate.compare_switch_simplify 0.06% : 0.000015s : 184: predicate.const_output_eliminate 0.19% : 0.000047s : 320: predicate.convert_tensor_all_eliminate 1.14% : 0.000285s : 1617: predicate.convert_tensor_eliminate 0.56% : 0.000141s : 1001: predicate.depend_value_elim 0.79% : 0.000198s : 1386: predicate.dict_get_item_const_eliminator 0.82% : 0.000205s : 1386: predicate.dict_get_item_eliminator 0.78% : 0.000194s : 1386: predicate.dict_set_item_eliminator 0.05% : 0.000013s : 164: predicate.elim_not_effective 0.11% : 0.000028s : 164: predicate.elim_shapecalc_of_broadcastargs 0.82% : 0.000204s : 1567: predicate.environ_add_const_eliminate 0.81% : 0.000201s : 1570: predicate.environ_get_add_eliminate 0.81% : 0.000203s : 1567: predicate.environ_get_depend_swap 1.40% : 0.000350s : 2568: predicate.environ_get_eliminate 0.81% : 0.000202s : 1570: predicate.environ_get_set_eliminate 1.08% : 0.000269s : 1967: predicate.exchange_switch_depend_value 1.34% : 0.000335s : 1967: predicate.float_depend_g_call 0.54% : 0.000135s : 998: predicate.float_environ_get_switch 0.65% : 0.000163s : 1182: predicate.float_tuple_getitem_switch 0.05% : 0.000012s : 164: predicate.fold_const_symbol 0.31% : 0.000078s : 532: predicate.get_grad_eliminate 2.11% : 0.000526s : 2090: predicate.getattr_setattr_resolve 0.06% : 0.000014s : 164: predicate.graph_param_transform 0.54% : 0.000135s : 998: predicate.incorporate_call 0.53% : 0.000133s : 998: predicate.incorporate_call_switch 3.81% : 0.000951s : 5403: predicate.inline 2.38% : 0.000593s : 2684: predicate.inline_without_move 0.16% : 0.000041s : 532: predicate.j_node_and_user_rematch 0.35% : 0.000089s : 494: predicate.less_batch_normalization 1.12% : 0.000279s : 1965: predicate.list_to_tuple_eliminator_ 1.77% : 0.000442s : 3374: predicate.load_eliminater 0.19% : 0.000048s : 184: predicate.loop_unroll_after_grad 2.41% : 0.000602s : 3092: predicate.loop_unroll_before_grad 1.15% : 0.000287s : 1760: predicate.make_slice_get_slice_eliminator 0.57% : 0.000141s : 1000: predicate.merge_addn 3.30% : 0.000823s : 4288: predicate.micro_step_allgather_replace 3.30% : 0.000824s : 4288: predicate.mini_step_allgather_replace 0.72% : 0.000180s : 1383: predicate.minmaximum_grad 0.19% : 0.000047s : 320: predicate.mutable_eliminate 0.10% : 0.000025s : 164: predicate.opt_reshape 0.11% : 0.000028s : 184: predicate.parallel_virtual_node 1.98% : 0.000493s : 1967: predicate.partial_defer_inline 1.06% : 0.000263s : 1808: predicate.partial_eliminate 0.74% : 0.000184s : 1382: predicate.print_const_string_wrapper 0.56% : 0.000139s : 987: predicate.reduce_all_const_elim 0.91% : 0.000227s : 1383: predicate.reduce_eliminate 0.17% : 0.000042s : 532: predicate.remove_not_recompute_node 2.12% : 0.000529s : 5961: predicate.replace_applicator 0.81% : 0.000202s : 2684: predicate.replace_old_param 0.06% : 0.000015s : 184: predicate.reset_defer_inline 0.74% : 0.000186s : 1383: predicate.reshape_eliminate 3.35% : 0.000835s : 4288: predicate.row_tensor_add_zeros_like 0.12% : 0.000030s : 184: predicate.row_tensor_eliminate 3.52% : 0.000877s : 4404: predicate.same_eliminate 0.24% : 0.000059s : 739: predicate.set_cell_output_no_recompute 0.32% : 0.000080s : 532: predicate.shard_identity_eliminate 2.41% : 0.000602s : 2868: predicate.special_op_eliminate 0.63% : 0.000156s : 1000: predicate.specialize_transform 3.67% : 0.000915s : 4288: predicate.split_environ_get_set_with_tuple_value 1.58% : 0.000394s : 2684: predicate.stack_unstack_eliminate 1.76% : 0.000439s : 3374: predicate.stopgrad_eliminater 0.10% : 0.000025s : 184: predicate.switch_call_monad_eliminater 1.20% : 0.000298s : 1967: predicate.switch_defer_inline 4.60% : 0.001147s : 6371: predicate.switch_layer_defer_inline 4.18% : 0.001042s : 6131: predicate.switch_simplify 0.72% : 0.000180s : 1383: predicate.tile_eliminate 0.72% : 0.000179s : 1383: predicate.transpose_eliminate 1.06% : 0.000264s : 1734: predicate.tuple_list_convert_item_index_to_positive 1.08% : 0.000269s : 1740: predicate.tuple_list_get_item_const_eliminator 0.97% : 0.000242s : 1740: predicate.tuple_list_get_item_depend_reorder 1.87% : 0.000467s : 2963: predicate.tuple_list_get_item_eliminator 0.98% : 0.000246s : 1740: predicate.tuple_list_get_set_item_eliminator 1.62% : 0.000404s : 2738: predicate.tuple_list_set_item_eliminator 1.08% : 0.000268s : 1965: predicate.tuple_to_list_eliminator_ 1.80% : 0.000449s : 3374: predicate.updatestate_pure_node_eliminater 2.43% : 0.000607s : 4373: predicate.updatestate_useless_node_eliminater 0.11% : 0.000028s : 184: predicate.value_based_eliminate 0.44% : 0.000110s : 534: predicate.virtual_dataset_eliminate 0.30% : 0.000075s : 532: predicate.virtual_output_eliminate 0.12% : 0.000029s : 184: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.069317 841 66.29% : 0.045950s : 383: func_graph_cloner_run.FuncGraphClonerGraph 2.17% : 0.001503s : 22: func_graph_cloner_run.FuncGraphClonerNode 31.54% : 0.021864s : 436: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.730209 350 0.00% : 0.000006s : 1: ForceFp32Comm 0.25% : 0.041122s : 1: a1a2 0.00% : 0.000201s : 1: add_cache_embedding 0.00% : 0.000211s : 1: add_comm_op_reuse_tag 0.01% : 0.000882s : 1: add_recomputation 0.00% : 0.000499s : 1: assign_add_opt 0.01% : 0.001915s : 1: auto_monad 0.00% : 0.000389s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001364s : 1: bootstrap 0.00% : 0.000098s : 1: cconv 0.00% : 0.000219s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000151s : 1: convert_after_rewriter 0.00% : 0.000418s : 1: cse_after_recomputation 0.00% : 0.000095s : 1: dataset_repeat_opt 0.00% : 0.000012s : 1: distribtued_split 0.01% : 0.001646s : 1: eliminate_special_op_node 0.00% : 0.000128s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000035s : 1: graph_reusing 0.00% : 0.000017s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000011s : 1: handle_group_info 0.26% : 0.042666s : 1: inline 0.01% : 0.001167s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000710s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001008s : 1: loop_unroll 0.00% : 0.000007s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.19% : 0.032362s : 61: opt.transform.a1a2 0.00% : 0.000229s : 1: opt.transform.loop_unroll_optimizer 0.69% : 0.115807s : 148: opt.transform.opt_a 0.01% : 0.001029s : 1: opt.transform.opt_after_cconv 0.03% : 0.004417s : 27: opt.transform.opt_b 0.25% : 0.041367s : 20: opt.transform.opt_resolve 0.01% : 0.001181s : 1: opt.transform.opt_trans_graph 0.01% : 0.001078s : 6: opt.transform.special_op_eliminate 0.01% : 0.000897s : 4: opt.transform.symbol_engine_opt 4.29% : 0.717025s : 1: opt_a 0.01% : 0.002063s : 1: opt_after_cconv 0.03% : 0.005449s : 1: opt_b 4.40% : 0.735668s : 1: optimize 0.00% : 0.000259s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000115s : 1: order_py_execute_after_rewriter 0.00% : 0.000152s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000185s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000014s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000079s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000186s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000011s : 1: pipeline_parallel_scheduler 0.00% : 0.000113s : 1: pipeline_split 0.00% : 0.000116s : 1: pre_auto_parallel 0.00% : 0.000123s : 1: py_interpret_to_execute 0.00% : 0.000195s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000143s : 1: remove_cast_before_assign_add 0.01% : 0.000983s : 1: remove_dup_value 0.92% : 0.154299s : 3: renormalize.infer 0.44% : 0.073197s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001176s : 1: rewriter_after_opt_a 0.01% : 0.001799s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000205s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000180s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.001023s : 1: symbol_engine_optimizer 83.70% : 14.003173s : 1: task_emit 0.01% : 0.001223s : 1: tuple_transform 4.40% : 0.736060s : 1: type_inference 0.01% : 0.001503s : 1: validate TotalTime = 15.6368, [21] [bootstrap]: 0.00126828 [type_inference]: 0.718615 [auto_monad]: 0.00189298 [graph_reusing]: 2.549e-05 [inline]: 0.0426906, [2] [rewriter_before_opt_a]: 0.0014561 [a1a2]: 0.0411965, [2] [Cycle 1]: 0.0281049, [11] [expand_dump_flag]: 3.227e-05 [switch_simplify]: 0.00109401 [loop_unroll]: 0.00067809 [a_1]: 0.0218293 [recompute_prepare]: 0.00016126 [updatestate_depend_eliminate]: 0.00035037 [updatestate_assign_eliminate]: 8.799e-05 [updatestate_loads_eliminate]: 0.00020188 [parameter_eliminate]: 5.89e-06 [a_2]: 0.00338811 [parallel_inline_pass]: 0.0001022 [Cycle 2]: 0.00566431, [11] [expand_dump_flag]: 1.24e-06 [switch_simplify]: 9.362e-05 [loop_unroll]: 9.258e-05 [a_1]: 0.00323092 [recompute_prepare]: 0.00010386 [updatestate_depend_eliminate]: 0.00022666 [updatestate_assign_eliminate]: 6.496e-05 [updatestate_loads_eliminate]: 6.633e-05 [parameter_eliminate]: 3.91999e-06 [a_2]: 0.00158908 [parallel_inline_pass]: 0.00011211 [parallel-infer-symbol]: 0.00017624 [pre_auto_parallel]: 9.945e-05 [insert-virtual-dataset]: 0.00115798 [parallel-infer-symbol-second]: 2.59001e-06 [dataset_repeat_opt]: 8.959e-05 [pipeline_split]: 0.00010444 [optimize]: 0.710572, [52] [py_interpret_to_execute]: 0.00011778 [rewriter_before_opt_a]: 0.00028136 [opt_a]: 0.692235, [3] [Cycle 1]: 0.590379, [46] [expand_dump_flag]: 2.04e-06 [switch_simplify]: 0.00012555 [loop_unroll]: 9.759e-05 [a_1]: 0.00348617 [recompute_prepare]: 0.00010835 [updatestate_depend_eliminate]: 9.952e-05 [updatestate_assign_eliminate]: 6.431e-05 [updatestate_loads_eliminate]: 6.617e-05 [parameter_eliminate]: 3.38e-06 [a_2]: 0.0016353 [accelerated_algorithm]: 0.0002632 [shard]: 2.12001e-06 [meta_shard_fg_expand]: 5.246e-05 [shard_inline]: 0.00012138 [auto_parallel]: 8.389e-05 [parallel]: 0.037333 [flash_sp]: 7.79e-05 [merge_comm]: 0.00017231 [allreduce_fusion]: 9.67e-05 [matmul_add_comm_reduction]: 0.0001206 [allreduce_slice_to_reducescatter]: 6.10002e-07 [virtual_shard_identity]: 0.00015233 [virtual_dataset]: 0.00018904 [get_grad_eliminate_]: 0.00014199 [virtual_output]: 0.000141 [merge_forward]: 9.12e-05 [cell_reuse_recompute_pass]: 2.75999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00032241 [before_grad]: 0.00023849 [inplace_validation]: 0.00015762 [parallel_renormalize]: 0.0215679 [update_top_fg]: 8.2e-07 [cast_eliminate]: 0.00020915 [meta_fg_expand]: 0.294195 [inplace_validation_after_expand]: 0.00185935 [flash_sp_send_recv_attached]: 0.00152792 [receive_attached]: 8.161e-05 [after_resolve]: 0.00240782 [a_after_grad]: 0.00464963 [special_op_eliminate]: 0.00227409 [renormalize]: 0.175422 [add_forward_monad_depend]: 0.0004436 [auto_monad_grad]: 0.00026522 [auto_monad_eliminator]: 0.00227037 [cse]: 0.00536184 [a_3]: 0.0318297 [Cycle 2]: 0.0861693, [46] [expand_dump_flag]: 6.332e-05 [switch_simplify]: 0.00229637 [loop_unroll]: 0.00198514 [a_1]: 0.0383995 [recompute_prepare]: 0.00023722 [updatestate_depend_eliminate]: 0.00033502 [updatestate_assign_eliminate]: 0.00013914 [updatestate_loads_eliminate]: 0.00019749 [parameter_eliminate]: 3.9e-06 [a_2]: 0.00589433 [accelerated_algorithm]: 0.00021399 [shard]: 2.42001e-06 [meta_shard_fg_expand]: 9.331e-05 [shard_inline]: 0.00019348 [auto_parallel]: 0.00014891 [parallel]: 1.515e-05 [flash_sp]: 0.00015049 [merge_comm]: 0.00015492 [allreduce_fusion]: 0.00012146 [matmul_add_comm_reduction]: 0.00014324 [allreduce_slice_to_reducescatter]: 6.19999e-07 [virtual_shard_identity]: 0.00019673 [virtual_dataset]: 0.00018819 [get_grad_eliminate_]: 0.00018125 [virtual_output]: 0.00018392 [merge_forward]: 0.0001174 [cell_reuse_recompute_pass]: 2.81e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00032634 [before_grad]: 0.00030914 [inplace_validation]: 0.00010936 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 8.29998e-07 [cast_eliminate]: 0.00020412 [meta_fg_expand]: 0.00034897 [inplace_validation_after_expand]: 0.0002348 [flash_sp_send_recv_attached]: 3.04999e-06 [receive_attached]: 1.43e-06 [after_resolve]: 0.00021604 [a_after_grad]: 0.00031446 [special_op_eliminate]: 0.00018717 [renormalize]: 0.0217833 [add_forward_monad_depend]: 5.30999e-06 [auto_monad_grad]: 2.04e-06 [auto_monad_eliminator]: 0.00036957 [cse]: 0.00832375 [a_3]: 0.00130212 [Cycle 3]: 0.0156624, [46] [expand_dump_flag]: 2.32999e-06 [switch_simplify]: 0.00017772 [loop_unroll]: 0.0001743 [a_1]: 0.00566147 [recompute_prepare]: 0.00018703 [updatestate_depend_eliminate]: 0.00018587 [updatestate_assign_eliminate]: 0.00012417 [updatestate_loads_eliminate]: 0.00011984 [parameter_eliminate]: 3.42999e-06 [a_2]: 0.00282658 [accelerated_algorithm]: 0.00020383 [shard]: 1.55e-06 [meta_shard_fg_expand]: 6.385e-05 [shard_inline]: 0.00017884 [auto_parallel]: 0.00014063 [parallel]: 9.97001e-06 [flash_sp]: 1.91e-06 [merge_comm]: 0.00013526 [allreduce_fusion]: 0.0001227 [matmul_add_comm_reduction]: 0.00015228 [allreduce_slice_to_reducescatter]: 5.69999e-07 [virtual_shard_identity]: 0.00018524 [virtual_dataset]: 0.00017914 [get_grad_eliminate_]: 0.00017093 [virtual_output]: 0.00017481 [merge_forward]: 0.00011917 [cell_reuse_recompute_pass]: 2.84999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00031668 [before_grad]: 0.00029812 [inplace_validation]: 0.00011694 [parallel_renormalize]: 1.59998e-07 [update_top_fg]: 5.9999e-07 [cast_eliminate]: 0.00026479 [meta_fg_expand]: 0.00015059 [inplace_validation_after_expand]: 0.00015614 [flash_sp_send_recv_attached]: 1.72999e-06 [receive_attached]: 1.6e-06 [after_resolve]: 0.0001967 [a_after_grad]: 0.00029457 [special_op_eliminate]: 0.00017724 [renormalize]: 1.2001e-07 [add_forward_monad_depend]: 2.55e-06 [auto_monad_grad]: 1.68e-06 [auto_monad_eliminator]: 0.00020434 [cse]: 0.00056397 [a_3]: 0.0012776 [py_interpret_to_execute_after_opt_a]: 0.00018087 [slice_cell_reuse_recomputed_activation]: 2.46e-06 [rewriter_after_opt_a]: 0.00121549 [convert_after_rewriter]: 0.00014133 [order_py_execute_after_rewriter]: 0.00010161 [opt_b]: 0.00531885, [1] [Cycle 1]: 0.00531024, [7] [b_1]: 0.00415421 [b_2]: 0.00018266 [updatestate_depend_eliminate]: 0.00012463 [updatestate_assign_eliminate]: 0.00011328 [updatestate_loads_eliminate]: 0.0001156 [renormalize]: 3.40005e-07 [cse]: 0.00055336 [optimize_parallel_all_gather_comm]: 0.00020209 [overlap_param_gather]: 1.36001e-06 [cconv]: 8.247e-05 [loop_unroll]: 0.00096894 [opt_after_cconv]: 0.00208582, [1] [Cycle 1]: 0.00207813, [7] [c_1]: 0.00105836 [parameter_eliminate]: 2.93e-06 [updatestate_depend_eliminate]: 0.00016204 [updatestate_assign_eliminate]: 0.00012209 [updatestate_loads_eliminate]: 0.00012136 [cse]: 0.00054623 [renormalize]: 5.79996e-07 [remove_dup_value]: 0.0008738 [tuple_transform]: 0.00119916, [1] [Cycle 1]: 0.00119101, [2] [d_1]: 0.00117151 [renormalize]: 5.10001e-07 [partial_unused_args_eliminate]: 3.04e-06 [add_cache_embedding]: 0.00017496 [add_recomputation]: 0.00088541 [cse_after_recomputation]: 0.00041137, [1] [Cycle 1]: 0.00040251, [1] [cse]: 0.00038576 [environ_conv]: 0.00011777 [swap_dp_allreduce_reducescatter]: 0.00017376 [bias_add_comm_swap]: 2.53e-06 [label_micro_interleaved_index]: 1.53e-06 [label_fine_grained_interleaved_index]: 0.00066148 [merge_cast_opt]: 1.84e-06 [slice_recompute_activation]: 0.00019186 [micro_interleaved_order_control]: 1.96001e-06 [assign_add_opt]: 0.00048193 [ForceFp32Comm]: 1.76999e-06 [remove_cast_before_assign_add]: 0.00013383 [full_micro_interleaved_order_control]: 2.47001e-06 [reorder_send_recv_between_fp_bp]: 1.45e-06 [comm_op_add_attrs]: 0.00019991 [add_comm_op_reuse_tag]: 0.0002037 [interleave_split_concat_branches]: 9.10004e-07 [interleave_parallel_branches]: 8.49992e-07 [overlap_opt_shard_in_pipeline]: 1.448e-05 [overlap_opt_shard_grad_in_pipeline]: 2.73e-06 [control_data_broadcast_order]: 1.01e-06 [grouped_pairwise_exchange_alltoall]: 9.79999e-06 [offloading_packed_experts]: 2.22e-06 [overlap_recompute_and_grad_model_parallel]: 1.91999e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.10002e-07 [overlap_recompute_allgather_and_fa_grad]: 5.828e-05 [overlap_grad_ring_attention]: 0.0001764 [overlap_grad_flash_sp]: 0.00014597 [begin_end_overlap_inline]: 1.04001e-06 [split_matmul_comm_elemetwise]: 2e-06 [split_layernorm_comm]: 1.93999e-06 [handle_group_info]: 7.38e-06 [symbol_engine_optimizer]: 0.00101093, [1] [Cycle 1]: 0.00100416, [6] [build]: 6.262e-05 [elim_shapecalc]: 0.00018498 [elim_not_effective]: 0.0002761 [opt_reshape]: 0.00017257 [fold_const_symbol]: 0.00026733 [renormalize]: 4.90007e-07 [pipeline_parallel_scheduler]: 3.3e-06 [auto_monad_reorder]: 0.00035638 [get_jit_bprop_graph]: 4.79995e-07 [rewriter_after_jit_bprop_graph]: 3.89991e-07 [eliminate_special_op_node]: 0.00158077 [distribtued_split]: 1.4e-06 [validate]: 0.00036465 [task_emit]: 14.1561 [execute]: 1.159e-05 Sums bootstrap : 0.001268s : 0.01% type_inference : 0.718615s : 4.60% auto_monad : 0.001893s : 0.01% graph_reusing : 0.000025s : 0.00% inline.rewriter_before_opt_a : 0.001456s : 0.01% inline.a1a2.expand_dump_flag : 0.000034s : 0.00% inline.a1a2.switch_simplify : 0.001188s : 0.01% inline.a1a2.loop_unroll : 0.000771s : 0.00% inline.a1a2.a_1 : 0.025060s : 0.16% inline.a1a2.recompute_prepare : 0.000265s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000577s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000153s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000268s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : 0.004977s : 0.03% inline.a1a2.parallel_inline_pass : 0.000214s : 0.00% parallel-infer-symbol : 0.000176s : 0.00% pre_auto_parallel : 0.000099s : 0.00% insert-virtual-dataset : 0.001158s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000090s : 0.00% pipeline_split : 0.000104s : 0.00% optimize.py_interpret_to_execute : 0.000118s : 0.00% optimize.rewriter_before_opt_a : 0.000281s : 0.00% optimize.opt_a.expand_dump_flag : 0.000068s : 0.00% optimize.opt_a.switch_simplify : 0.002600s : 0.02% optimize.opt_a.loop_unroll : 0.002257s : 0.01% optimize.opt_a.a_1 : 0.047547s : 0.30% optimize.opt_a.recompute_prepare : 0.000533s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000620s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000328s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000384s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.010356s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000681s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000210s : 0.00% optimize.opt_a.shard_inline : 0.000494s : 0.00% optimize.opt_a.auto_parallel : 0.000373s : 0.00% optimize.opt_a.parallel : 0.037358s : 0.24% optimize.opt_a.flash_sp : 0.000230s : 0.00% optimize.opt_a.merge_comm : 0.000462s : 0.00% optimize.opt_a.allreduce_fusion : 0.000341s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000416s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000534s : 0.00% optimize.opt_a.virtual_dataset : 0.000556s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000494s : 0.00% optimize.opt_a.virtual_output : 0.000500s : 0.00% optimize.opt_a.merge_forward : 0.000328s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000965s : 0.01% optimize.opt_a.before_grad : 0.000846s : 0.01% optimize.opt_a.inplace_validation : 0.000384s : 0.00% optimize.opt_a.parallel_renormalize : 0.021568s : 0.14% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000678s : 0.00% optimize.opt_a.meta_fg_expand : 0.294694s : 1.89% optimize.opt_a.inplace_validation_after_expand : 0.002250s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001533s : 0.01% optimize.opt_a.receive_attached : 0.000085s : 0.00% optimize.opt_a.after_resolve : 0.002821s : 0.02% optimize.opt_a.a_after_grad : 0.005259s : 0.03% optimize.opt_a.special_op_eliminate : 0.002638s : 0.02% optimize.opt_a.renormalize : 0.197205s : 1.26% optimize.opt_a.add_forward_monad_depend : 0.000451s : 0.00% optimize.opt_a.auto_monad_grad : 0.000269s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002844s : 0.02% optimize.opt_a.cse : 0.014250s : 0.09% optimize.opt_a.a_3 : 0.034409s : 0.22% optimize.py_interpret_to_execute_after_opt_a : 0.000181s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001215s : 0.01% optimize.convert_after_rewriter : 0.000141s : 0.00% optimize.order_py_execute_after_rewriter : 0.000102s : 0.00% optimize.opt_b.b_1 : 0.004154s : 0.03% optimize.opt_b.b_2 : 0.000183s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000125s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000113s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000116s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000553s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000202s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000082s : 0.00% optimize.loop_unroll : 0.000969s : 0.01% optimize.opt_after_cconv.c_1 : 0.001058s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000162s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000122s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000121s : 0.00% optimize.opt_after_cconv.cse : 0.000546s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000874s : 0.01% optimize.tuple_transform.d_1 : 0.001172s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000175s : 0.00% optimize.add_recomputation : 0.000885s : 0.01% optimize.cse_after_recomputation.cse : 0.000386s : 0.00% optimize.environ_conv : 0.000118s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000174s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000661s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000192s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000482s : 0.00% optimize.ForceFp32Comm : 0.000002s : 0.00% optimize.remove_cast_before_assign_add : 0.000134s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000200s : 0.00% optimize.add_comm_op_reuse_tag : 0.000204s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000014s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000058s : 0.00% optimize.overlap_grad_ring_attention : 0.000176s : 0.00% optimize.overlap_grad_flash_sp : 0.000146s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000063s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000185s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000276s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000173s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000267s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000356s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001581s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000365s : 0.00% task_emit : 14.156141s : 90.60% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.054190 4900 0.03% : 0.000018s : 4: substitution.ad_related_special_op_eliminate 0.04% : 0.000023s : 9: substitution.addn_check_dump 0.09% : 0.000051s : 7: substitution.addn_zero_filter 0.03% : 0.000016s : 7: substitution.adjust_all_reduce_mul_add 0.61% : 0.000332s : 71: substitution.arithmetic_simplify 0.23% : 0.000122s : 10: substitution.cast_eliminate 0.10% : 0.000056s : 47: substitution.depend_value_elim 0.07% : 0.000036s : 127: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000025s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000020s : 12: substitution.environ_get_depend_swap 0.05% : 0.000029s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000023s : 28: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.03% : 0.000014s : 10: substitution.float_tuple_getitem_switch 0.07% : 0.000038s : 127: substitution.fold_const_symbol 64.63% : 0.035021s : 290: substitution.getattr_setattr_resolve 0.19% : 0.000104s : 163: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.30% : 0.012628s : 363: substitution.inline 1.45% : 0.000785s : 127: substitution.inline_without_move 0.27% : 0.000145s : 361: substitution.j_node_and_user_rematch 0.25% : 0.000136s : 40: substitution.less_batch_normalization 0.08% : 0.000046s : 90: substitution.load_eliminater 0.10% : 0.000054s : 10: substitution.merge_addn 0.24% : 0.000128s : 115: substitution.minmaximum_grad 0.01% : 0.000003s : 10: substitution.opt_reshape 0.04% : 0.000020s : 1: substitution.partial_defer_inline 0.14% : 0.000074s : 28: substitution.partial_eliminate 0.10% : 0.000054s : 35: substitution.reduce_all_const_elim 0.06% : 0.000032s : 15: substitution.reduce_eliminate 0.34% : 0.000184s : 361: substitution.remove_not_recompute_node 2.20% : 0.001190s : 612: substitution.replace_applicator 0.26% : 0.000138s : 324: substitution.replace_old_param 0.20% : 0.000108s : 31: substitution.reshape_eliminate 0.03% : 0.000014s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000013s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000088s : 36: substitution.switch_simplify 0.05% : 0.000030s : 11: substitution.tile_eliminate 0.53% : 0.000289s : 115: substitution.tuple_list_convert_item_index_to_positive 0.30% : 0.000160s : 121: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000228s : 121: substitution.tuple_list_get_item_depend_reorder 1.62% : 0.000875s : 356: substitution.tuple_list_get_item_eliminator 0.38% : 0.000205s : 121: substitution.tuple_list_get_set_item_eliminator 0.36% : 0.000194s : 210: substitution.updatestate_pure_node_eliminater 0.64% : 0.000349s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.718113 2 96.59% : 0.693639s : 1: type_inference.infer 3.41% : 0.024474s : 1: type_inference.specialize ------[replace.] 0.011115 851 0.32% : 0.000035s : 4: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.46% : 0.000051s : 7: replace.depend_value_elim 0.36% : 0.000040s : 3: replace.environ_get_set_eliminate 29.55% : 0.003284s : 189: replace.getattr_setattr_resolve 29.77% : 0.003309s : 342: replace.inline 0.21% : 0.000024s : 1: replace.merge_addn 1.00% : 0.000111s : 7: replace.partial_eliminate 3.91% : 0.000435s : 28: replace.replace_applicator 3.70% : 0.000411s : 36: replace.switch_simplify 0.46% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 29.87% : 0.003320s : 225: replace.tuple_list_get_item_eliminator 0.15% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.18% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.043259 851 0.03% : 0.000015s : 4: match.ad_related_special_op_eliminate 0.03% : 0.000013s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 69.49% : 0.030059s : 189: match.getattr_setattr_resolve 28.63% : 0.012387s : 342: match.inline 0.06% : 0.000025s : 1: match.merge_addn 0.09% : 0.000038s : 7: match.partial_eliminate 0.25% : 0.000110s : 28: match.replace_applicator 0.16% : 0.000068s : 36: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.10% : 0.000474s : 225: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.024776157489 0.71% : 0.000176s : 1381: predicate.accumulaten_eliminater 0.27% : 0.000068s : 327: predicate.ad_related_special_op_eliminate 0.55% : 0.000136s : 997: predicate.addn_check_dump 0.71% : 0.000177s : 1381: predicate.addn_zero_filter 0.70% : 0.000174s : 1381: predicate.adjust_all_reduce_mul_add 1.71% : 0.000425s : 2379: predicate.arithmetic_simplify 1.09% : 0.000269s : 1905: predicate.cast_eliminate 3.44% : 0.000851s : 4402: predicate.check_bprop_eliminate 0.56% : 0.000138s : 997: predicate.compare_switch_simplify 0.06% : 0.000015s : 183: predicate.const_output_eliminate 0.18% : 0.000045s : 318: predicate.convert_tensor_all_eliminate 1.10% : 0.000273s : 1616: predicate.convert_tensor_eliminate 0.57% : 0.000142s : 1000: predicate.depend_value_elim 0.78% : 0.000193s : 1385: predicate.dict_get_item_const_eliminator 0.81% : 0.000200s : 1385: predicate.dict_get_item_eliminator 0.78% : 0.000194s : 1385: predicate.dict_set_item_eliminator 0.05% : 0.000012s : 163: predicate.elim_not_effective 0.11% : 0.000028s : 163: predicate.elim_shapecalc_of_broadcastargs 0.85% : 0.000210s : 1565: predicate.environ_add_const_eliminate 0.82% : 0.000204s : 1568: predicate.environ_get_add_eliminate 0.82% : 0.000204s : 1565: predicate.environ_get_depend_swap 1.41% : 0.000350s : 2565: predicate.environ_get_eliminate 0.87% : 0.000215s : 1568: predicate.environ_get_set_eliminate 1.06% : 0.000262s : 1966: predicate.exchange_switch_depend_value 1.34% : 0.000333s : 1966: predicate.float_depend_g_call 0.55% : 0.000136s : 997: predicate.float_environ_get_switch 0.66% : 0.000164s : 1180: predicate.float_tuple_getitem_switch 0.05% : 0.000012s : 163: predicate.fold_const_symbol 0.31% : 0.000076s : 531: predicate.get_grad_eliminate 2.11% : 0.000522s : 2090: predicate.getattr_setattr_resolve 0.06% : 0.000014s : 163: predicate.graph_param_transform 0.55% : 0.000137s : 997: predicate.incorporate_call 0.54% : 0.000133s : 997: predicate.incorporate_call_switch 3.79% : 0.000938s : 5397: predicate.inline 2.40% : 0.000594s : 2683: predicate.inline_without_move 0.17% : 0.000041s : 531: predicate.j_node_and_user_rematch 0.34% : 0.000085s : 493: predicate.less_batch_normalization 1.12% : 0.000278s : 1962: predicate.list_to_tuple_eliminator_ 1.78% : 0.000440s : 3370: predicate.load_eliminater 0.19% : 0.000048s : 183: predicate.loop_unroll_after_grad 2.34% : 0.000579s : 3091: predicate.loop_unroll_before_grad 0.94% : 0.000233s : 1757: predicate.make_slice_get_slice_eliminator 0.57% : 0.000142s : 999: predicate.merge_addn 3.34% : 0.000826s : 4286: predicate.micro_step_allgather_replace 3.54% : 0.000878s : 4286: predicate.mini_step_allgather_replace 0.73% : 0.000181s : 1382: predicate.minmaximum_grad 0.19% : 0.000047s : 318: predicate.mutable_eliminate 0.10% : 0.000025s : 163: predicate.opt_reshape 0.11% : 0.000028s : 183: predicate.parallel_virtual_node 1.95% : 0.000482s : 1966: predicate.partial_defer_inline 1.07% : 0.000265s : 1806: predicate.partial_eliminate 0.72% : 0.000178s : 1381: predicate.print_const_string_wrapper 0.57% : 0.000141s : 986: predicate.reduce_all_const_elim 0.92% : 0.000227s : 1382: predicate.reduce_eliminate 0.17% : 0.000042s : 531: predicate.remove_not_recompute_node 2.23% : 0.000552s : 5958: predicate.replace_applicator 0.81% : 0.000200s : 2683: predicate.replace_old_param 0.06% : 0.000015s : 183: predicate.reset_defer_inline 0.74% : 0.000184s : 1382: predicate.reshape_eliminate 3.36% : 0.000832s : 4286: predicate.row_tensor_add_zeros_like 0.12% : 0.000029s : 183: predicate.row_tensor_eliminate 3.52% : 0.000872s : 4402: predicate.same_eliminate 0.24% : 0.000059s : 738: predicate.set_cell_output_no_recompute 0.32% : 0.000079s : 531: predicate.shard_identity_eliminate 2.20% : 0.000545s : 2866: predicate.special_op_eliminate 0.64% : 0.000158s : 999: predicate.specialize_transform 3.70% : 0.000916s : 4286: predicate.split_environ_get_set_with_tuple_value 1.59% : 0.000395s : 2683: predicate.stack_unstack_eliminate 1.79% : 0.000444s : 3370: predicate.stopgrad_eliminater 0.10% : 0.000025s : 183: predicate.switch_call_monad_eliminater 1.20% : 0.000298s : 1966: predicate.switch_defer_inline 4.55% : 0.001126s : 6368: predicate.switch_layer_defer_inline 4.23% : 0.001049s : 6128: predicate.switch_simplify 0.75% : 0.000186s : 1382: predicate.tile_eliminate 0.72% : 0.000177s : 1382: predicate.transpose_eliminate 1.04% : 0.000258s : 1731: predicate.tuple_list_convert_item_index_to_positive 1.06% : 0.000264s : 1737: predicate.tuple_list_get_item_const_eliminator 0.94% : 0.000232s : 1737: predicate.tuple_list_get_item_depend_reorder 1.86% : 0.000460s : 2959: predicate.tuple_list_get_item_eliminator 1.01% : 0.000249s : 1737: predicate.tuple_list_get_set_item_eliminator 1.76% : 0.000437s : 2734: predicate.tuple_list_set_item_eliminator 1.09% : 0.000271s : 1962: predicate.tuple_to_list_eliminator_ 1.88% : 0.000466s : 3370: predicate.updatestate_pure_node_eliminater 2.42% : 0.000599s : 4368: predicate.updatestate_useless_node_eliminater 0.11% : 0.000028s : 183: predicate.value_based_eliminate 0.31% : 0.000078s : 533: predicate.virtual_dataset_eliminate 0.30% : 0.000075s : 531: predicate.virtual_output_eliminate 0.12% : 0.000029s : 183: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.065827 841 68.58% : 0.045142s : 383: func_graph_cloner_run.FuncGraphClonerGraph 2.23% : 0.001466s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.20% : 0.019218s : 436: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.805224 350 0.00% : 0.000006s : 1: ForceFp32Comm 0.25% : 0.041201s : 1: a1a2 0.00% : 0.000183s : 1: add_cache_embedding 0.00% : 0.000212s : 1: add_comm_op_reuse_tag 0.01% : 0.000900s : 1: add_recomputation 0.00% : 0.000493s : 1: assign_add_opt 0.01% : 0.001918s : 1: auto_monad 0.00% : 0.000372s : 1: auto_monad_reorder 0.00% : 0.000006s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001326s : 1: bootstrap 0.00% : 0.000090s : 1: cconv 0.00% : 0.000209s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000149s : 1: convert_after_rewriter 0.00% : 0.000417s : 1: cse_after_recomputation 0.00% : 0.000099s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.01% : 0.001598s : 1: eliminate_special_op_node 0.00% : 0.000127s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000033s : 1: graph_reusing 0.00% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.25% : 0.042700s : 1: inline 0.01% : 0.001180s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000672s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.000981s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.19% : 0.032400s : 61: opt.transform.a1a2 0.00% : 0.000229s : 1: opt.transform.loop_unroll_optimizer 0.68% : 0.113900s : 148: opt.transform.opt_a 0.01% : 0.001055s : 1: opt.transform.opt_after_cconv 0.03% : 0.004303s : 27: opt.transform.opt_b 0.25% : 0.041578s : 20: opt.transform.opt_resolve 0.01% : 0.001168s : 1: opt.transform.opt_trans_graph 0.01% : 0.001039s : 6: opt.transform.special_op_eliminate 0.01% : 0.000895s : 4: opt.transform.symbol_engine_opt 4.12% : 0.692240s : 1: opt_a 0.01% : 0.002092s : 1: opt_after_cconv 0.03% : 0.005323s : 1: opt_b 4.23% : 0.710584s : 1: optimize 0.00% : 0.000211s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000107s : 1: order_py_execute_after_rewriter 0.00% : 0.000208s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000182s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000018s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000063s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000188s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000115s : 1: pipeline_split 0.00% : 0.000110s : 1: pre_auto_parallel 0.00% : 0.000126s : 1: py_interpret_to_execute 0.00% : 0.000190s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000140s : 1: remove_cast_before_assign_add 0.01% : 0.000890s : 1: remove_dup_value 0.91% : 0.153557s : 3: renormalize.infer 0.39% : 0.065176s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001228s : 1: rewriter_after_opt_a 0.01% : 0.001762s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000199s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000182s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.001015s : 1: symbol_engine_optimizer 84.24% : 14.156186s : 1: task_emit 0.01% : 0.001204s : 1: tuple_transform 4.28% : 0.718651s : 1: type_inference 0.01% : 0.001668s : 1: validate distribute network loadcheckpoint. distribute network loadcheckpoint. distribute network loadcheckpoint. distribute network loadcheckpoint. distribute network loadcheckpoint. distribute network loadcheckpoint. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network loadcheckpoint. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. [WARNING] DISTRIBUTED(163842,ffff8ac54c10,python3.7):2025-02-07-13:54:19.852.981 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (3, 7) [const vector]{3, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163844,ffffae145c10,python3.7):2025-02-07-13:54:19.853.208 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (1, 5) [const vector]{1, 5}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163845,ffff83f78c10,python3.7):2025-02-07-13:54:19.853.499 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (2, 6) [const vector]{2, 6}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163843,ffffaa156c10,python3.7):2025-02-07-13:54:19.853.575 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 4) [const vector]{0, 4}, async: 0, submit_now: 1 distribute network loadcheckpoint. [WARNING] DISTRIBUTED(163846,ffff97644c10,python3.7):2025-02-07-13:54:19.853.896 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (3, 7) [const vector]{3, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163841,ffff99d3ac10,python3.7):2025-02-07-13:54:19.853.895 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (2, 6) [const vector]{2, 6}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:54:19.854.932 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (3, 7) [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:54:19.854.930 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (2, 6) [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:54:19.854.934 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (3, 7) [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:54:19.854.933 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (2, 6) [WARNING] DEVICE(163842,fffdcdffb0f0,python3.7):2025-02-07-13:54:19.855.070 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (3, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163841,fffde4ff90f0,python3.7):2025-02-07-13:54:19.855.083 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (2, 6), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163846,fffdd27fc0f0,python3.7):2025-02-07-13:54:19.855.136 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (3, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffdbaffd0f0,python3.7):2025-02-07-13:54:19.855.127 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (2, 6), hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network parameter broadcast. [WARNING] DISTRIBUTED(163840,ffff92fd9c10,python3.7):2025-02-07-13:54:19.856.515 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (1, 5) [const vector]{1, 5}, async: 0, submit_now: 1 distribute network parameter broadcast. [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:54:19.857.357 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (1, 5) [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:54:19.857.364 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (1, 5) [WARNING] DEVICE(163840,fffdd27fc0f0,python3.7):2025-02-07-13:54:19.857.516 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (1, 5), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffdf4ff90f0,python3.7):2025-02-07-13:54:19.857.597 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (1, 5), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(163839,ffff88d43c10,python3.7):2025-02-07-13:54:19.857.969 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 4) [const vector]{0, 4}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:54:19.858.890 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 4) [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:54:19.858.894 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 4) [WARNING] DEVICE(163839,fffdbe7fc0f0,python3.7):2025-02-07-13:54:19.859.036 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 4), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163843,fffdf0ff90f0,python3.7):2025-02-07-13:54:19.859.111 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 4), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffdbaffd0f0,python3.7):2025-02-07-13:54:19.922.029 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (2, 6) [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:54:19.922.081 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (2, 6) [WARNING] DEVICE(163840,fffdd27fc0f0,python3.7):2025-02-07-13:54:19.922.113 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (1, 5) [WARNING] DEVICE(163841,fffde4ff90f0,python3.7):2025-02-07-13:54:19.922.207 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (2, 6) [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:54:19.922.210 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (1, 5) [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:54:19.922.268 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (2, 6) [WARNING] DEVICE(163839,fffdbe7fc0f0,python3.7):2025-02-07-13:54:19.922.330 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 4) [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:54:19.922.389 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 4) [WARNING] DEVICE(163842,fffdcdffb0f0,python3.7):2025-02-07-13:54:19.922.485 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (3, 7) [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:54:19.922.546 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (3, 7) [WARNING] DEVICE(163843,fffdf0ff90f0,python3.7):2025-02-07-13:54:19.926.263 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 4) [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:54:19.926.325 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 4) [WARNING] DEVICE(163844,fffdf4ff90f0,python3.7):2025-02-07-13:54:19.926.345 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (1, 5) [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:54:19.926.397 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (1, 5) [WARNING] PARALLEL(163841,ffff99d3ac10,python3.7):2025-02-07-13:54:19.986.495 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163845,ffff83f78c10,python3.7):2025-02-07-13:54:19.986.819 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163842,ffff8ac54c10,python3.7):2025-02-07-13:54:19.987.050 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163840,ffff92fd9c10,python3.7):2025-02-07-13:54:19.987.498 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163839,ffff88d43c10,python3.7):2025-02-07-13:54:19.987.552 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163844,ffffae145c10,python3.7):2025-02-07-13:54:19.989.051 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163843,ffffaa156c10,python3.7):2025-02-07-13:54:19.989.496 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. ....... TotalTime = 0.17642, [21] [bootstrap]: 0.00034867 [type_inference]: 0.0422293 [auto_monad]: 0.00055482 [graph_reusing]: 6.54001e-06 [inline]: 1.5e-06 [parallel-infer-symbol]: 2.83e-06 [pre_auto_parallel]: 9.398e-05 [insert-virtual-dataset]: 3.43e-06 [parallel-infer-symbol-second]: 7.99992e-07 [dataset_repeat_opt]: 1.34e-06 [pipeline_split]: 1.57001e-06 [optimize]: 0.0175, [52] [py_interpret_to_execute]: 6.556e-05 [rewriter_before_opt_a]: 0.0002335 [opt_a]: 0.0136784, [2] [Cycle 1]: 0.00785909, [43] [expand_dump_flag]: 7.46e-06 [switch_simplify]: 0.00024269 [loop_unroll]: 9.63e-05 [a_1]: 0.00296651 [recompute_prepare]: 3.376e-05 [updatestate_depend_eliminate]: 0.00010212 [updatestate_assign_eliminate]: 2.216e-05 [updatestate_loads_eliminate]: 1.83e-05 [parameter_eliminate]: 4.27e-06 [a_2]: 0.00039174 [accelerated_algorithm]: 5.095e-05 [shard]: 2.35e-06 [meta_shard_fg_expand]: 1.004e-05 [shard_inline]: 2.722e-05 [auto_parallel]: 2.065e-05 [parallel]: 9.24e-06 [flash_sp]: 1.49e-05 [merge_comm]: 1.879e-05 [allreduce_fusion]: 1.586e-05 [matmul_add_comm_reduction]: 2.339e-05 [allreduce_slice_to_reducescatter]: 5.19998e-07 [virtual_shard_identity]: 2.681e-05 [virtual_dataset]: 2.544e-05 [get_grad_eliminate_]: 2.488e-05 [virtual_output]: 2.409e-05 [merge_forward]: 1.482e-05 [cell_reuse_recompute_pass]: 2.60001e-06 [cell_reuse_handle_not_recompute_node_pass]: 5.555e-05 [before_grad]: 4.785e-05 [inplace_validation]: 1.396e-05 [meta_fg_expand]: 2.058e-05 [inplace_validation_after_expand]: 1.743e-05 [flash_sp_send_recv_attached]: 3.11001e-06 [receive_attached]: 4.84999e-06 [after_resolve]: 3.266e-05 [a_after_grad]: 4.439e-05 [special_op_eliminate]: 2.553e-05 [renormalize]: 0.0025513 [add_forward_monad_depend]: 4.81999e-06 [auto_monad_grad]: 1.99e-06 [auto_monad_eliminator]: 6.41e-05 [cse]: 0.00030297 [a_3]: 0.00018384 [Cycle 2]: 0.00238052, [43] [expand_dump_flag]: 1.50001e-06 [switch_simplify]: 2.807e-05 [loop_unroll]: 2.511e-05 [a_1]: 0.00089699 [recompute_prepare]: 2.58e-05 [updatestate_depend_eliminate]: 1.72e-05 [updatestate_assign_eliminate]: 1.769e-05 [updatestate_loads_eliminate]: 1.661e-05 [parameter_eliminate]: 2.43e-06 [a_2]: 0.00037332 [accelerated_algorithm]: 2.893e-05 [shard]: 1.12e-06 [meta_shard_fg_expand]: 7.35999e-06 [shard_inline]: 2.687e-05 [auto_parallel]: 2.16e-05 [parallel]: 4.22e-06 [flash_sp]: 3.69e-06 [merge_comm]: 1.751e-05 [allreduce_fusion]: 1.528e-05 [matmul_add_comm_reduction]: 2.044e-05 [allreduce_slice_to_reducescatter]: 2.90005e-07 [virtual_shard_identity]: 2.671e-05 [virtual_dataset]: 2.512e-05 [get_grad_eliminate_]: 2.411e-05 [virtual_output]: 2.378e-05 [merge_forward]: 1.306e-05 [cell_reuse_recompute_pass]: 2.22e-06 [cell_reuse_handle_not_recompute_node_pass]: 5.185e-05 [before_grad]: 4.735e-05 [inplace_validation]: 1.225e-05 [meta_fg_expand]: 1.595e-05 [inplace_validation_after_expand]: 1.679e-05 [flash_sp_send_recv_attached]: 1.11999e-06 [receive_attached]: 8.29998e-07 [after_resolve]: 2.976e-05 [a_after_grad]: 4.307e-05 [special_op_eliminate]: 2.416e-05 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 1.42e-06 [auto_monad_grad]: 1.48e-06 [auto_monad_eliminator]: 4.491e-05 [cse]: 5.693e-05 [a_3]: 0.00016928 [py_interpret_to_execute_after_opt_a]: 2.177e-05 [slice_cell_reuse_recomputed_activation]: 2.72e-06 [rewriter_after_opt_a]: 0.00038762 [convert_after_rewriter]: 1.992e-05 [order_py_execute_after_rewriter]: 1.392e-05 [opt_b]: 0.0007471, [1] [Cycle 1]: 0.00074061, [7] [b_1]: 0.00056743 [b_2]: 2.968e-05 [updatestate_depend_eliminate]: 1.518e-05 [updatestate_assign_eliminate]: 1.779e-05 [updatestate_loads_eliminate]: 1.62e-05 [renormalize]: 3.89991e-07 [cse]: 5.642e-05 [optimize_parallel_all_gather_comm]: 1.973e-05 [overlap_param_gather]: 1.12e-06 [cconv]: 2.749e-05 [loop_unroll]: 0.00054008 [opt_after_cconv]: 0.00034899, [1] [Cycle 1]: 0.00034184, [7] [c_1]: 0.00018923 [parameter_eliminate]: 2.61e-06 [updatestate_depend_eliminate]: 1.778e-05 [updatestate_assign_eliminate]: 1.81e-05 [updatestate_loads_eliminate]: 1.67e-05 [cse]: 5.926e-05 [renormalize]: 3.89991e-07 [remove_dup_value]: 6.524e-05 [tuple_transform]: 0.00029945, [1] [Cycle 1]: 0.0002162, [2] [d_1]: 0.00020433 [renormalize]: 3.50003e-07 [partial_unused_args_eliminate]: 2.84999e-06 [add_cache_embedding]: 2.586e-05 [add_recomputation]: 0.00015083 [cse_after_recomputation]: 6.029e-05, [1] [Cycle 1]: 5.417e-05, [1] [cse]: 4.82e-05 [environ_conv]: 1.915e-05 [swap_dp_allreduce_reducescatter]: 1.838e-05 [bias_add_comm_swap]: 2.45e-06 [label_micro_interleaved_index]: 2.06e-06 [label_fine_grained_interleaved_index]: 2.49001e-06 [merge_cast_opt]: 1.52001e-06 [slice_recompute_activation]: 1.86e-06 [micro_interleaved_order_control]: 1.85e-06 [assign_add_opt]: 6.483e-05 [ForceFp32Comm]: 9.29998e-07 [remove_cast_before_assign_add]: 1.674e-05 [full_micro_interleaved_order_control]: 2.18001e-06 [reorder_send_recv_between_fp_bp]: 2.16e-06 [comm_op_add_attrs]: 5.883e-05 [add_comm_op_reuse_tag]: 2.2e-06 [interleave_split_concat_branches]: 8.2e-07 [interleave_parallel_branches]: 9.79999e-07 [overlap_opt_shard_in_pipeline]: 1.27999e-06 [overlap_opt_shard_grad_in_pipeline]: 2.16e-06 [control_data_broadcast_order]: 1.21999e-06 [grouped_pairwise_exchange_alltoall]: 9.51001e-06 [offloading_packed_experts]: 2.35e-06 [overlap_recompute_and_grad_model_parallel]: 1.96e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.29998e-07 [overlap_recompute_allgather_and_fa_grad]: 4.182e-05 [overlap_grad_ring_attention]: 2.18999e-06 [overlap_grad_flash_sp]: 3.39e-05 [begin_end_overlap_inline]: 7.59988e-07 [split_matmul_comm_elemetwise]: 2.06001e-06 [split_layernorm_comm]: 2.14e-06 [handle_group_info]: 7.38999e-06 [symbol_engine_optimizer]: 0.00021156, [1] [Cycle 1]: 0.00020597, [6] [build]: 1.799e-05 [elim_shapecalc]: 3.239e-05 [elim_not_effective]: 5.214e-05 [opt_reshape]: 2.64e-05 [fold_const_symbol]: 4.698e-05 [renormalize]: 3.6e-07 [pipeline_parallel_scheduler]: 1.69e-06 [auto_monad_reorder]: 0.00010668 [get_jit_bprop_graph]: 5.10001e-07 [rewriter_after_jit_bprop_graph]: 4.60001e-07 [eliminate_special_op_node]: 0.00060158 [distribtued_split]: 1.42e-06 [validate]: 7.986e-05 [task_emit]: 0.114547 [execute]: 1.149e-05 Sums bootstrap : 0.000349s : 0.20% type_inference : 0.042229s : 24.60% auto_monad : 0.000555s : 0.32% graph_reusing : 0.000007s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000094s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000066s : 0.04% optimize.rewriter_before_opt_a : 0.000234s : 0.14% optimize.opt_a.expand_dump_flag : 0.000009s : 0.01% optimize.opt_a.switch_simplify : 0.000271s : 0.16% optimize.opt_a.loop_unroll : 0.000121s : 0.07% optimize.opt_a.a_1 : 0.003863s : 2.25% optimize.opt_a.recompute_prepare : 0.000060s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000119s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000040s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000035s : 0.02% optimize.opt_a.parameter_eliminate : 0.000007s : 0.00% optimize.opt_a.a_2 : 0.000765s : 0.45% optimize.opt_a.accelerated_algorithm : 0.000080s : 0.05% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000017s : 0.01% optimize.opt_a.shard_inline : 0.000054s : 0.03% optimize.opt_a.auto_parallel : 0.000042s : 0.02% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000019s : 0.01% optimize.opt_a.merge_comm : 0.000036s : 0.02% optimize.opt_a.allreduce_fusion : 0.000031s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000044s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000054s : 0.03% optimize.opt_a.virtual_dataset : 0.000051s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000049s : 0.03% optimize.opt_a.virtual_output : 0.000048s : 0.03% optimize.opt_a.merge_forward : 0.000028s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000107s : 0.06% optimize.opt_a.before_grad : 0.000095s : 0.06% optimize.opt_a.inplace_validation : 0.000026s : 0.02% optimize.opt_a.meta_fg_expand : 0.000037s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000034s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000062s : 0.04% optimize.opt_a.a_after_grad : 0.000087s : 0.05% optimize.opt_a.special_op_eliminate : 0.000050s : 0.03% optimize.opt_a.renormalize : 0.002551s : 1.49% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000109s : 0.06% optimize.opt_a.cse : 0.000360s : 0.21% optimize.opt_a.a_3 : 0.000353s : 0.21% optimize.py_interpret_to_execute_after_opt_a : 0.000022s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000388s : 0.23% optimize.convert_after_rewriter : 0.000020s : 0.01% optimize.order_py_execute_after_rewriter : 0.000014s : 0.01% optimize.opt_b.b_1 : 0.000567s : 0.33% optimize.opt_b.b_2 : 0.000030s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000015s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000018s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000056s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000020s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000027s : 0.02% optimize.loop_unroll : 0.000540s : 0.31% optimize.opt_after_cconv.c_1 : 0.000189s : 0.11% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000018s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000018s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.cse : 0.000059s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000065s : 0.04% optimize.tuple_transform.d_1 : 0.000204s : 0.12% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000026s : 0.02% optimize.add_recomputation : 0.000151s : 0.09% optimize.cse_after_recomputation.cse : 0.000048s : 0.03% optimize.environ_conv : 0.000019s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000065s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000017s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000059s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000042s : 0.02% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000034s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000032s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000052s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000026s : 0.02% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000047s : 0.03% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000107s : 0.06% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000602s : 0.35% distribtued_split : 0.000001s : 0.00% validate : 0.000080s : 0.05% task_emit : 0.114547s : 66.72% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000669 300 1.13% : 0.000008s : 2: substitution.depend_value_elim 1.29% : 0.000009s : 18: substitution.elim_not_effective 1.23% : 0.000008s : 18: substitution.fold_const_symbol 3.01% : 0.000020s : 21: substitution.graph_param_transform 55.15% : 0.000369s : 15: substitution.inline 3.09% : 0.000021s : 36: substitution.j_node_and_user_rematch 3.41% : 0.000023s : 2: substitution.less_batch_normalization 3.12% : 0.000021s : 30: substitution.load_eliminater 1.02% : 0.000007s : 6: substitution.reduce_all_const_elim 4.54% : 0.000030s : 36: substitution.remove_not_recompute_node 0.97% : 0.000006s : 6: substitution.replace_old_param 2.50% : 0.000017s : 4: substitution.switch_simplify 3.50% : 0.000023s : 6: substitution.tuple_list_get_item_eliminator 8.35% : 0.000056s : 44: substitution.updatestate_pure_node_eliminater 7.70% : 0.000052s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.042162 2 93.42% : 0.039386s : 1: type_inference.infer 6.58% : 0.002776s : 1: type_inference.specialize ------[replace.] 0.000217 25 52.03% : 0.000113s : 15: replace.inline 31.84% : 0.000069s : 4: replace.switch_simplify 16.13% : 0.000035s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000392 25 91.45% : 0.000359s : 15: match.inline 3.56% : 0.000014s : 4: match.switch_simplify 5.00% : 0.000020s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.001126 6511 1.00% : 0.000011s : 72: predicate.accumulaten_eliminater 0.66% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000007s : 42: predicate.addn_check_dump 0.97% : 0.000011s : 72: predicate.addn_zero_filter 0.96% : 0.000011s : 72: predicate.adjust_all_reduce_mul_add 2.10% : 0.000024s : 114: predicate.arithmetic_simplify 1.06% : 0.000012s : 72: predicate.cast_eliminate 0.62% : 0.000007s : 42: predicate.check_bprop_eliminate 0.59% : 0.000007s : 42: predicate.compare_switch_simplify 0.16% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000004s : 21: predicate.convert_tensor_all_eliminate 1.45% : 0.000016s : 78: predicate.convert_tensor_eliminate 0.61% : 0.000007s : 42: predicate.depend_value_elim 1.04% : 0.000012s : 72: predicate.dict_get_item_const_eliminator 1.11% : 0.000013s : 72: predicate.dict_get_item_eliminator 1.05% : 0.000012s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.36% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.27% : 0.000014s : 93: predicate.environ_add_const_eliminate 1.26% : 0.000014s : 93: predicate.environ_get_add_eliminate 1.33% : 0.000015s : 93: predicate.environ_get_depend_swap 1.90% : 0.000021s : 135: predicate.environ_get_eliminate 1.27% : 0.000014s : 93: predicate.environ_get_set_eliminate 1.34% : 0.000015s : 93: predicate.exchange_switch_depend_value 1.80% : 0.000020s : 93: predicate.float_depend_g_call 0.59% : 0.000007s : 42: predicate.float_environ_get_switch 0.87% : 0.000010s : 63: predicate.float_tuple_getitem_switch 0.17% : 0.000002s : 21: predicate.fold_const_symbol 0.61% : 0.000007s : 42: predicate.get_grad_eliminate 0.22% : 0.000002s : 21: predicate.graph_param_transform 0.64% : 0.000007s : 42: predicate.incorporate_call 0.60% : 0.000007s : 42: predicate.incorporate_call_switch 6.11% : 0.000069s : 291: predicate.inline 0.89% : 0.000010s : 42: predicate.inline_without_move 0.34% : 0.000004s : 42: predicate.j_node_and_user_rematch 0.71% : 0.000008s : 42: predicate.less_batch_normalization 1.82% : 0.000021s : 120: predicate.list_to_tuple_eliminator_ 2.88% : 0.000032s : 192: predicate.load_eliminater 0.64% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.71% : 0.000019s : 110: predicate.loop_unroll_before_grad 1.75% : 0.000020s : 114: predicate.make_slice_get_slice_eliminator 0.60% : 0.000007s : 42: predicate.merge_addn 0.59% : 0.000007s : 42: predicate.micro_step_allgather_replace 0.60% : 0.000007s : 42: predicate.mini_step_allgather_replace 0.98% : 0.000011s : 72: predicate.minmaximum_grad 0.39% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000004s : 21: predicate.opt_reshape 0.34% : 0.000004s : 21: predicate.parallel_virtual_node 1.83% : 0.000021s : 93: predicate.partial_defer_inline 1.66% : 0.000019s : 99: predicate.partial_eliminate 1.03% : 0.000012s : 72: predicate.print_const_string_wrapper 0.66% : 0.000007s : 42: predicate.reduce_all_const_elim 1.20% : 0.000013s : 72: predicate.reduce_eliminate 0.36% : 0.000004s : 42: predicate.remove_not_recompute_node 1.17% : 0.000013s : 120: predicate.replace_applicator 0.35% : 0.000004s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 0.99% : 0.000011s : 72: predicate.reshape_eliminate 0.62% : 0.000007s : 42: predicate.row_tensor_add_zeros_like 0.36% : 0.000004s : 21: predicate.row_tensor_eliminate 0.77% : 0.000009s : 42: predicate.same_eliminate 0.39% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.64% : 0.000007s : 42: predicate.shard_identity_eliminate 0.99% : 0.000011s : 63: predicate.special_op_eliminate 0.79% : 0.000009s : 42: predicate.specialize_transform 0.69% : 0.000008s : 42: predicate.split_environ_get_set_with_tuple_value 0.69% : 0.000008s : 42: predicate.stack_unstack_eliminate 2.72% : 0.000031s : 192: predicate.stopgrad_eliminater 0.35% : 0.000004s : 21: predicate.switch_call_monad_eliminater 1.48% : 0.000017s : 93: predicate.switch_defer_inline 2.01% : 0.000023s : 135: predicate.switch_layer_defer_inline 4.62% : 0.000052s : 253: predicate.switch_simplify 1.04% : 0.000012s : 72: predicate.tile_eliminate 1.07% : 0.000012s : 72: predicate.transpose_eliminate 1.75% : 0.000020s : 114: predicate.tuple_list_convert_item_index_to_positive 1.83% : 0.000021s : 114: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000018s : 114: predicate.tuple_list_get_item_depend_reorder 2.62% : 0.000029s : 162: predicate.tuple_list_get_item_eliminator 1.66% : 0.000019s : 114: predicate.tuple_list_get_set_item_eliminator 2.40% : 0.000027s : 156: predicate.tuple_list_set_item_eliminator 1.77% : 0.000020s : 120: predicate.tuple_to_list_eliminator_ 2.86% : 0.000032s : 192: predicate.updatestate_pure_node_eliminater 3.62% : 0.000041s : 234: predicate.updatestate_useless_node_eliminater 0.35% : 0.000004s : 21: predicate.value_based_eliminate 0.62% : 0.000007s : 42: predicate.virtual_dataset_eliminate 0.61% : 0.000007s : 42: predicate.virtual_output_eliminate 0.34% : 0.000004s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002539 41 66.22% : 0.001681s : 24: func_graph_cloner_run.FuncGraphClonerGraph 33.78% : 0.000857s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.203694 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000030s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000156s : 1: add_recomputation 0.03% : 0.000069s : 1: assign_add_opt 0.28% : 0.000578s : 1: auto_monad 0.06% : 0.000116s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.19% : 0.000380s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000064s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000024s : 1: convert_after_rewriter 0.03% : 0.000064s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.30% : 0.000617s : 1: eliminate_special_op_node 0.01% : 0.000024s : 1: environ_conv 0.01% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000015s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000011s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.27% : 0.000551s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000036s : 1: opt.transform.loop_unroll_optimizer 2.98% : 0.006080s : 80: opt.transform.opt_a 0.09% : 0.000187s : 1: opt.transform.opt_after_cconv 0.28% : 0.000572s : 27: opt.transform.opt_b 0.10% : 0.000202s : 1: opt.transform.opt_trans_graph 0.04% : 0.000089s : 3: opt.transform.special_op_eliminate 0.07% : 0.000152s : 4: opt.transform.symbol_engine_opt 6.72% : 0.013683s : 1: opt_a 0.17% : 0.000354s : 1: opt_after_cconv 0.37% : 0.000751s : 1: opt_b 8.60% : 0.017509s : 1: optimize 0.01% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000018s : 1: order_py_execute_after_rewriter 0.02% : 0.000038s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.02% : 0.000046s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000107s : 1: pre_auto_parallel 0.04% : 0.000073s : 1: py_interpret_to_execute 0.01% : 0.000026s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000020s : 1: remove_cast_before_assign_add 0.04% : 0.000072s : 1: remove_dup_value 0.80% : 0.001635s : 1: renormalize.infer 0.45% : 0.000907s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000395s : 1: rewriter_after_opt_a 0.12% : 0.000241s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000022s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000215s : 1: symbol_engine_optimizer 56.25% : 0.114580s : 1: task_emit 0.15% : 0.000303s : 1: tuple_transform 20.74% : 0.042252s : 1: type_inference 0.07% : 0.000150s : 1: validate [WARNING] DEVICE(163846,fffdd27fc0f0,python3.7):2025-02-07-13:54:20.106.400 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (3, 7) [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:54:20.106.475 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (3, 7) TotalTime = 0.180194, [21] [bootstrap]: 0.00037764 [type_inference]: 0.0437368 [auto_monad]: 0.00057942 [graph_reusing]: 5.74e-06 [inline]: 1.55e-06 [parallel-infer-symbol]: 2.67e-06 [pre_auto_parallel]: 9.05e-05 [insert-virtual-dataset]: 3.38e-06 [parallel-infer-symbol-second]: 5.79996e-07 [dataset_repeat_opt]: 1.54e-06 [pipeline_split]: 1.62999e-06 [optimize]: 0.016128, [52] [py_interpret_to_execute]: 6.716e-05 [rewriter_before_opt_a]: 0.00020473 [opt_a]: 0.0126146, [2] [Cycle 1]: 0.00702059, [43] [expand_dump_flag]: 6.95999e-06 [switch_simplify]: 0.0002183 [loop_unroll]: 7.957e-05 [a_1]: 0.00242259 [recompute_prepare]: 2.645e-05 [updatestate_depend_eliminate]: 9.952e-05 [updatestate_assign_eliminate]: 2.347e-05 [updatestate_loads_eliminate]: 1.636e-05 [parameter_eliminate]: 4.08001e-06 [a_2]: 0.00032592 [accelerated_algorithm]: 4.361e-05 [shard]: 2.42e-06 [meta_shard_fg_expand]: 8.94e-06 [shard_inline]: 2.135e-05 [auto_parallel]: 1.882e-05 [parallel]: 8.23e-06 [flash_sp]: 1.504e-05 [merge_comm]: 1.737e-05 [allreduce_fusion]: 1.374e-05 [matmul_add_comm_reduction]: 2.278e-05 [allreduce_slice_to_reducescatter]: 5.00004e-07 [virtual_shard_identity]: 2.254e-05 [virtual_dataset]: 2.106e-05 [get_grad_eliminate_]: 2.055e-05 [virtual_output]: 6.882e-05 [merge_forward]: 1.446e-05 [cell_reuse_recompute_pass]: 1.89e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.239e-05 [before_grad]: 3.831e-05 [inplace_validation]: 1.328e-05 [meta_fg_expand]: 1.709e-05 [inplace_validation_after_expand]: 1.61e-05 [flash_sp_send_recv_attached]: 3.14999e-06 [receive_attached]: 4.79999e-06 [after_resolve]: 2.879e-05 [a_after_grad]: 3.667e-05 [special_op_eliminate]: 2.143e-05 [renormalize]: 0.002449 [add_forward_monad_depend]: 4.51e-06 [auto_monad_grad]: 2.08001e-06 [auto_monad_eliminator]: 6.084e-05 [cse]: 0.000316 [a_3]: 0.00015414 [Cycle 2]: 0.0020126, [43] [expand_dump_flag]: 1.51001e-06 [switch_simplify]: 2.333e-05 [loop_unroll]: 2.074e-05 [a_1]: 0.00073324 [recompute_prepare]: 2.068e-05 [updatestate_depend_eliminate]: 1.729e-05 [updatestate_assign_eliminate]: 1.668e-05 [updatestate_loads_eliminate]: 1.527e-05 [parameter_eliminate]: 2.41e-06 [a_2]: 0.00031005 [accelerated_algorithm]: 2.408e-05 [shard]: 1.24e-06 [meta_shard_fg_expand]: 6.79e-06 [shard_inline]: 2.15e-05 [auto_parallel]: 1.97e-05 [parallel]: 5.03e-06 [flash_sp]: 3.50999e-06 [merge_comm]: 1.621e-05 [allreduce_fusion]: 1.347e-05 [matmul_add_comm_reduction]: 1.908e-05 [allreduce_slice_to_reducescatter]: 2.90005e-07 [virtual_shard_identity]: 2.233e-05 [virtual_dataset]: 2.053e-05 [get_grad_eliminate_]: 2.003e-05 [virtual_output]: 2e-05 [merge_forward]: 1.191e-05 [cell_reuse_recompute_pass]: 2.21e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.055e-05 [before_grad]: 3.611e-05 [inplace_validation]: 1.189e-05 [meta_fg_expand]: 1.343e-05 [inplace_validation_after_expand]: 1.583e-05 [flash_sp_send_recv_attached]: 9.20001e-07 [receive_attached]: 7.7e-07 [after_resolve]: 2.48e-05 [a_after_grad]: 3.515e-05 [special_op_eliminate]: 2.009e-05 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 1.57999e-06 [auto_monad_grad]: 1.41001e-06 [auto_monad_eliminator]: 4.109e-05 [cse]: 5.619e-05 [a_3]: 0.00014017 [py_interpret_to_execute_after_opt_a]: 1.967e-05 [slice_cell_reuse_recomputed_activation]: 2.84e-06 [rewriter_after_opt_a]: 0.00037607 [convert_after_rewriter]: 1.914e-05 [order_py_execute_after_rewriter]: 1.301e-05 [opt_b]: 0.00064202, [1] [Cycle 1]: 0.00063558, [7] [b_1]: 0.00047496 [b_2]: 2.346e-05 [updatestate_depend_eliminate]: 1.41e-05 [updatestate_assign_eliminate]: 1.613e-05 [updatestate_loads_eliminate]: 1.496e-05 [renormalize]: 3.89991e-07 [cse]: 5.568e-05 [optimize_parallel_all_gather_comm]: 1.912e-05 [overlap_param_gather]: 3.09e-06 [cconv]: 2.782e-05 [loop_unroll]: 0.00053445 [opt_after_cconv]: 0.0003086, [1] [Cycle 1]: 0.00030227, [7] [c_1]: 0.0001514 [parameter_eliminate]: 2.43e-06 [updatestate_depend_eliminate]: 1.681e-05 [updatestate_assign_eliminate]: 1.598e-05 [updatestate_loads_eliminate]: 1.554e-05 [cse]: 6.165e-05 [renormalize]: 4.69998e-07 [remove_dup_value]: 6.549e-05 [tuple_transform]: 0.0001797, [1] [Cycle 1]: 0.00017422, [2] [d_1]: 0.00016359 [renormalize]: 2.09999e-07 [partial_unused_args_eliminate]: 2.52e-06 [add_cache_embedding]: 2.497e-05 [add_recomputation]: 0.00015027 [cse_after_recomputation]: 5.826e-05, [1] [Cycle 1]: 5.235e-05, [1] [cse]: 4.643e-05 [environ_conv]: 1.799e-05 [swap_dp_allreduce_reducescatter]: 7.844e-05 [bias_add_comm_swap]: 2.2e-06 [label_micro_interleaved_index]: 1.86e-06 [label_fine_grained_interleaved_index]: 2.01e-06 [merge_cast_opt]: 1.36001e-06 [slice_recompute_activation]: 1.81e-06 [micro_interleaved_order_control]: 1.85e-06 [assign_add_opt]: 6.001e-05 [ForceFp32Comm]: 8.99992e-07 [remove_cast_before_assign_add]: 1.562e-05 [full_micro_interleaved_order_control]: 2.12999e-06 [reorder_send_recv_between_fp_bp]: 2.35e-06 [comm_op_add_attrs]: 6.154e-05 [add_comm_op_reuse_tag]: 2.17001e-06 [interleave_split_concat_branches]: 1.03e-06 [interleave_parallel_branches]: 8.2e-07 [overlap_opt_shard_in_pipeline]: 2.32999e-06 [overlap_opt_shard_grad_in_pipeline]: 2.31e-06 [control_data_broadcast_order]: 1.06001e-06 [grouped_pairwise_exchange_alltoall]: 9.62999e-06 [offloading_packed_experts]: 2.3e-06 [overlap_recompute_and_grad_model_parallel]: 1.91e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.40009e-07 [overlap_recompute_allgather_and_fa_grad]: 4.137e-05 [overlap_grad_ring_attention]: 2.21e-06 [overlap_grad_flash_sp]: 2.864e-05 [begin_end_overlap_inline]: 1.09e-06 [split_matmul_comm_elemetwise]: 1.9e-06 [split_layernorm_comm]: 1.76e-06 [handle_group_info]: 9.39e-06 [symbol_engine_optimizer]: 0.00018056, [1] [Cycle 1]: 0.00017535, [6] [build]: 1.707e-05 [elim_shapecalc]: 2.85e-05 [elim_not_effective]: 4.21e-05 [opt_reshape]: 2.238e-05 [fold_const_symbol]: 3.672e-05 [renormalize]: 4.1e-07 [pipeline_parallel_scheduler]: 1.54e-06 [auto_monad_reorder]: 0.00010054 [get_jit_bprop_graph]: 4.60001e-07 [rewriter_after_jit_bprop_graph]: 4.19997e-07 [eliminate_special_op_node]: 0.00057651 [distribtued_split]: 1.29e-06 [validate]: 7.592e-05 [task_emit]: 0.118169 [execute]: 1.16e-05 Sums bootstrap : 0.000378s : 0.22% type_inference : 0.043737s : 24.93% auto_monad : 0.000579s : 0.33% graph_reusing : 0.000006s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000091s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000067s : 0.04% optimize.rewriter_before_opt_a : 0.000205s : 0.12% optimize.opt_a.expand_dump_flag : 0.000008s : 0.00% optimize.opt_a.switch_simplify : 0.000242s : 0.14% optimize.opt_a.loop_unroll : 0.000100s : 0.06% optimize.opt_a.a_1 : 0.003156s : 1.80% optimize.opt_a.recompute_prepare : 0.000047s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000117s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000040s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000032s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000636s : 0.36% optimize.opt_a.accelerated_algorithm : 0.000068s : 0.04% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000043s : 0.02% optimize.opt_a.auto_parallel : 0.000039s : 0.02% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000019s : 0.01% optimize.opt_a.merge_comm : 0.000034s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000042s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.02% optimize.opt_a.virtual_output : 0.000089s : 0.05% optimize.opt_a.merge_forward : 0.000026s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000083s : 0.05% optimize.opt_a.before_grad : 0.000074s : 0.04% optimize.opt_a.inplace_validation : 0.000025s : 0.01% optimize.opt_a.meta_fg_expand : 0.000031s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000032s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000054s : 0.03% optimize.opt_a.a_after_grad : 0.000072s : 0.04% optimize.opt_a.special_op_eliminate : 0.000042s : 0.02% optimize.opt_a.renormalize : 0.002449s : 1.40% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000102s : 0.06% optimize.opt_a.cse : 0.000372s : 0.21% optimize.opt_a.a_3 : 0.000294s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000020s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000376s : 0.21% optimize.convert_after_rewriter : 0.000019s : 0.01% optimize.order_py_execute_after_rewriter : 0.000013s : 0.01% optimize.opt_b.b_1 : 0.000475s : 0.27% optimize.opt_b.b_2 : 0.000023s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000056s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000019s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000028s : 0.02% optimize.loop_unroll : 0.000534s : 0.30% optimize.opt_after_cconv.c_1 : 0.000151s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.cse : 0.000062s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000065s : 0.04% optimize.tuple_transform.d_1 : 0.000164s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000025s : 0.01% optimize.add_recomputation : 0.000150s : 0.09% optimize.cse_after_recomputation.cse : 0.000046s : 0.03% optimize.environ_conv : 0.000018s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000078s : 0.04% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000060s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000016s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000062s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000041s : 0.02% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000029s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000009s : 0.01% optimize.symbol_engine_optimizer.build : 0.000017s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000029s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000042s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000101s : 0.06% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000577s : 0.33% distribtued_split : 0.000001s : 0.00% validate : 0.000076s : 0.04% task_emit : 0.118169s : 67.36% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000625 300 1.14% : 0.000007s : 2: substitution.depend_value_elim 1.05% : 0.000007s : 18: substitution.elim_not_effective 0.99% : 0.000006s : 18: substitution.fold_const_symbol 2.73% : 0.000017s : 21: substitution.graph_param_transform 59.71% : 0.000373s : 15: substitution.inline 2.50% : 0.000016s : 36: substitution.j_node_and_user_rematch 3.31% : 0.000021s : 2: substitution.less_batch_normalization 2.61% : 0.000016s : 30: substitution.load_eliminater 0.92% : 0.000006s : 6: substitution.reduce_all_const_elim 3.29% : 0.000021s : 36: substitution.remove_not_recompute_node 0.93% : 0.000006s : 6: substitution.replace_old_param 2.41% : 0.000015s : 4: substitution.switch_simplify 3.79% : 0.000024s : 6: substitution.tuple_list_get_item_eliminator 7.67% : 0.000048s : 44: substitution.updatestate_pure_node_eliminater 6.95% : 0.000043s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.043669 2 93.63% : 0.040888s : 1: type_inference.infer 6.37% : 0.002781s : 1: type_inference.specialize ------[replace.] 0.000221 25 53.06% : 0.000117s : 15: replace.inline 30.81% : 0.000068s : 4: replace.switch_simplify 16.13% : 0.000036s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000397 25 91.73% : 0.000365s : 15: match.inline 3.26% : 0.000013s : 4: match.switch_simplify 5.01% : 0.000020s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000956 6511 1.01% : 0.000010s : 72: predicate.accumulaten_eliminater 0.66% : 0.000006s : 21: predicate.ad_related_special_op_eliminate 0.57% : 0.000005s : 42: predicate.addn_check_dump 0.97% : 0.000009s : 72: predicate.addn_zero_filter 0.97% : 0.000009s : 72: predicate.adjust_all_reduce_mul_add 2.15% : 0.000021s : 114: predicate.arithmetic_simplify 1.01% : 0.000010s : 72: predicate.cast_eliminate 0.62% : 0.000006s : 42: predicate.check_bprop_eliminate 0.57% : 0.000005s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.51% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.60% : 0.000006s : 42: predicate.depend_value_elim 1.09% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.12% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.13% : 0.000011s : 72: predicate.dict_set_item_eliminator 0.19% : 0.000002s : 21: predicate.elim_not_effective 0.37% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.29% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.27% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.25% : 0.000012s : 93: predicate.environ_get_depend_swap 1.89% : 0.000018s : 135: predicate.environ_get_eliminate 1.26% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.30% : 0.000012s : 93: predicate.exchange_switch_depend_value 1.69% : 0.000016s : 93: predicate.float_depend_g_call 0.58% : 0.000006s : 42: predicate.float_environ_get_switch 0.85% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.61% : 0.000006s : 42: predicate.get_grad_eliminate 0.21% : 0.000002s : 21: predicate.graph_param_transform 0.60% : 0.000006s : 42: predicate.incorporate_call 0.57% : 0.000005s : 42: predicate.incorporate_call_switch 5.47% : 0.000052s : 291: predicate.inline 0.80% : 0.000008s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.75% : 0.000007s : 42: predicate.less_batch_normalization 1.77% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.89% : 0.000028s : 192: predicate.load_eliminater 0.74% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.69% : 0.000016s : 110: predicate.loop_unroll_before_grad 1.69% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.59% : 0.000006s : 42: predicate.merge_addn 0.59% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.59% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.98% : 0.000009s : 72: predicate.minmaximum_grad 0.42% : 0.000004s : 21: predicate.mutable_eliminate 0.32% : 0.000003s : 21: predicate.opt_reshape 0.36% : 0.000003s : 21: predicate.parallel_virtual_node 1.87% : 0.000018s : 93: predicate.partial_defer_inline 1.55% : 0.000015s : 99: predicate.partial_eliminate 1.05% : 0.000010s : 72: predicate.print_const_string_wrapper 0.66% : 0.000006s : 42: predicate.reduce_all_const_elim 1.23% : 0.000012s : 72: predicate.reduce_eliminate 0.34% : 0.000003s : 42: predicate.remove_not_recompute_node 1.12% : 0.000011s : 120: predicate.replace_applicator 0.34% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.04% : 0.000010s : 72: predicate.reshape_eliminate 0.61% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.35% : 0.000003s : 21: predicate.row_tensor_eliminate 0.79% : 0.000008s : 42: predicate.same_eliminate 0.37% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.70% : 0.000007s : 42: predicate.shard_identity_eliminate 1.03% : 0.000010s : 63: predicate.special_op_eliminate 0.72% : 0.000007s : 42: predicate.specialize_transform 0.71% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.73% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.72% : 0.000026s : 192: predicate.stopgrad_eliminater 0.34% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.56% : 0.000015s : 93: predicate.switch_defer_inline 1.97% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.78% : 0.000046s : 253: predicate.switch_simplify 1.07% : 0.000010s : 72: predicate.tile_eliminate 1.02% : 0.000010s : 72: predicate.transpose_eliminate 1.86% : 0.000018s : 114: predicate.tuple_list_convert_item_index_to_positive 1.77% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.70% : 0.000016s : 114: predicate.tuple_list_get_item_depend_reorder 2.58% : 0.000025s : 162: predicate.tuple_list_get_item_eliminator 1.69% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.36% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.77% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.93% : 0.000028s : 192: predicate.updatestate_pure_node_eliminater 3.89% : 0.000037s : 234: predicate.updatestate_useless_node_eliminater 0.35% : 0.000003s : 21: predicate.value_based_eliminate 0.61% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.63% : 0.000006s : 42: predicate.virtual_output_eliminate 0.47% : 0.000005s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002996 41 67.03% : 0.002008s : 24: func_graph_cloner_run.FuncGraphClonerGraph 32.97% : 0.000988s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.204744 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000029s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000155s : 1: add_recomputation 0.03% : 0.000064s : 1: assign_add_opt 0.29% : 0.000601s : 1: auto_monad 0.05% : 0.000108s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.20% : 0.000408s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000066s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000024s : 1: convert_after_rewriter 0.03% : 0.000062s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.29% : 0.000591s : 1: eliminate_special_op_node 0.01% : 0.000022s : 1: environ_conv 0.01% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000013s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000013s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.27% : 0.000544s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000032s : 1: opt.transform.loop_unroll_optimizer 2.46% : 0.005047s : 80: opt.transform.opt_a 0.07% : 0.000150s : 1: opt.transform.opt_after_cconv 0.23% : 0.000478s : 27: opt.transform.opt_b 0.08% : 0.000162s : 1: opt.transform.opt_trans_graph 0.04% : 0.000075s : 3: opt.transform.special_op_eliminate 0.06% : 0.000125s : 4: opt.transform.symbol_engine_opt 6.16% : 0.012619s : 1: opt_a 0.15% : 0.000313s : 1: opt_after_cconv 0.32% : 0.000645s : 1: opt_b 7.88% : 0.016137s : 1: optimize 0.01% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.02% : 0.000032s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000006s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.02% : 0.000045s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000099s : 1: pre_auto_parallel 0.04% : 0.000073s : 1: py_interpret_to_execute 0.01% : 0.000024s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000019s : 1: remove_cast_before_assign_add 0.03% : 0.000071s : 1: remove_dup_value 0.76% : 0.001563s : 1: renormalize.infer 0.43% : 0.000878s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000383s : 1: rewriter_after_opt_a 0.10% : 0.000210s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.04% : 0.000082s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000184s : 1: symbol_engine_optimizer 57.73% : 0.118201s : 1: task_emit 0.09% : 0.000183s : 1: tuple_transform 21.37% : 0.043760s : 1: type_inference 0.08% : 0.000163s : 1: validate TotalTime = 0.181558, [21] [bootstrap]: 0.00036796 [type_inference]: 0.0451238 [auto_monad]: 0.00054515 [graph_reusing]: 5.75e-06 [inline]: 1.56001e-06 [parallel-infer-symbol]: 2.28001e-06 [pre_auto_parallel]: 8.735e-05 [insert-virtual-dataset]: 3.20999e-06 [parallel-infer-symbol-second]: 7.20014e-07 [dataset_repeat_opt]: 1.39e-06 [pipeline_split]: 1.55e-06 [optimize]: 0.0160207, [52] [py_interpret_to_execute]: 6.526e-05 [rewriter_before_opt_a]: 0.00019833 [opt_a]: 0.012542, [2] [Cycle 1]: 0.00707003, [43] [expand_dump_flag]: 6.88e-06 [switch_simplify]: 0.00021805 [loop_unroll]: 8.031e-05 [a_1]: 0.00247193 [recompute_prepare]: 2.714e-05 [updatestate_depend_eliminate]: 0.00010073 [updatestate_assign_eliminate]: 2.232e-05 [updatestate_loads_eliminate]: 1.638e-05 [parameter_eliminate]: 4.13999e-06 [a_2]: 0.00032428 [accelerated_algorithm]: 4.445e-05 [shard]: 2.53999e-06 [meta_shard_fg_expand]: 9.04e-06 [shard_inline]: 2.117e-05 [auto_parallel]: 1.91e-05 [parallel]: 8.16e-06 [flash_sp]: 1.422e-05 [merge_comm]: 1.654e-05 [allreduce_fusion]: 1.355e-05 [matmul_add_comm_reduction]: 2.179e-05 [allreduce_slice_to_reducescatter]: 4.70012e-07 [virtual_shard_identity]: 2.314e-05 [virtual_dataset]: 2.075e-05 [get_grad_eliminate_]: 2.024e-05 [virtual_output]: 2.02e-05 [merge_forward]: 1.331e-05 [cell_reuse_recompute_pass]: 2.01e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.374e-05 [before_grad]: 3.779e-05 [inplace_validation]: 1.302e-05 [meta_fg_expand]: 1.711e-05 [inplace_validation_after_expand]: 1.61e-05 [flash_sp_send_recv_attached]: 2.89999e-06 [receive_attached]: 4.92e-06 [after_resolve]: 2.884e-05 [a_after_grad]: 3.54e-05 [special_op_eliminate]: 2.108e-05 [renormalize]: 0.00247517 [add_forward_monad_depend]: 4.43e-06 [auto_monad_grad]: 2.22999e-06 [auto_monad_eliminator]: 7.655e-05 [cse]: 0.00033121 [a_3]: 0.0001559 [Cycle 2]: 0.00195377, [43] [expand_dump_flag]: 1.69e-06 [switch_simplify]: 2.314e-05 [loop_unroll]: 2.107e-05 [a_1]: 0.00068661 [recompute_prepare]: 2.095e-05 [updatestate_depend_eliminate]: 1.61e-05 [updatestate_assign_eliminate]: 1.61e-05 [updatestate_loads_eliminate]: 1.449e-05 [parameter_eliminate]: 2.2e-06 [a_2]: 0.0003071 [accelerated_algorithm]: 2.44e-05 [shard]: 1.37e-06 [meta_shard_fg_expand]: 6.53999e-06 [shard_inline]: 2.133e-05 [auto_parallel]: 1.964e-05 [parallel]: 3.86e-06 [flash_sp]: 3.48e-06 [merge_comm]: 1.544e-05 [allreduce_fusion]: 1.326e-05 [matmul_add_comm_reduction]: 1.857e-05 [allreduce_slice_to_reducescatter]: 2.80008e-07 [virtual_shard_identity]: 2.23e-05 [virtual_dataset]: 2.155e-05 [get_grad_eliminate_]: 2.036e-05 [virtual_output]: 1.996e-05 [merge_forward]: 1.212e-05 [cell_reuse_recompute_pass]: 2.35e-06 [cell_reuse_handle_not_recompute_node_pass]: 3.972e-05 [before_grad]: 3.673e-05 [inplace_validation]: 1.154e-05 [meta_fg_expand]: 1.317e-05 [inplace_validation_after_expand]: 1.523e-05 [flash_sp_send_recv_attached]: 8.79998e-07 [receive_attached]: 8.29998e-07 [after_resolve]: 2.45e-05 [a_after_grad]: 3.466e-05 [special_op_eliminate]: 2.034e-05 [renormalize]: 7.99919e-08 [add_forward_monad_depend]: 1.59e-06 [auto_monad_grad]: 1.44e-06 [auto_monad_eliminator]: 4.215e-05 [cse]: 5.558e-05 [a_3]: 0.00014161 [py_interpret_to_execute_after_opt_a]: 1.999e-05 [slice_cell_reuse_recomputed_activation]: 2.32999e-06 [rewriter_after_opt_a]: 0.00037167 [convert_after_rewriter]: 1.881e-05 [order_py_execute_after_rewriter]: 1.282e-05 [opt_b]: 0.0006412, [1] [Cycle 1]: 0.00063523, [7] [b_1]: 0.00047681 [b_2]: 2.426e-05 [updatestate_depend_eliminate]: 1.372e-05 [updatestate_assign_eliminate]: 1.593e-05 [updatestate_loads_eliminate]: 1.489e-05 [renormalize]: 3.89991e-07 [cse]: 5.501e-05 [optimize_parallel_all_gather_comm]: 2.186e-05 [overlap_param_gather]: 2.85001e-06 [cconv]: 2.801e-05 [loop_unroll]: 0.00056848 [opt_after_cconv]: 0.00031568, [1] [Cycle 1]: 0.00030852, [7] [c_1]: 0.00015803 [parameter_eliminate]: 2.24001e-06 [updatestate_depend_eliminate]: 1.702e-05 [updatestate_assign_eliminate]: 1.665e-05 [updatestate_loads_eliminate]: 1.552e-05 [cse]: 6.384e-05 [renormalize]: 5.10001e-07 [remove_dup_value]: 6.706e-05 [tuple_transform]: 0.00018546, [1] [Cycle 1]: 0.00018001, [2] [d_1]: 0.00016946 [renormalize]: 3.50003e-07 [partial_unused_args_eliminate]: 3.00001e-06 [add_cache_embedding]: 2.454e-05 [add_recomputation]: 0.00015166 [cse_after_recomputation]: 5.665e-05, [1] [Cycle 1]: 5.102e-05, [1] [cse]: 4.544e-05 [environ_conv]: 1.814e-05 [swap_dp_allreduce_reducescatter]: 1.889e-05 [bias_add_comm_swap]: 2.53001e-06 [label_micro_interleaved_index]: 1.77001e-06 [label_fine_grained_interleaved_index]: 2.02e-06 [merge_cast_opt]: 1.09999e-06 [slice_recompute_activation]: 2.07e-06 [micro_interleaved_order_control]: 1.78e-06 [assign_add_opt]: 5.968e-05 [ForceFp32Comm]: 8.39995e-07 [remove_cast_before_assign_add]: 1.675e-05 [full_micro_interleaved_order_control]: 2.26e-06 [reorder_send_recv_between_fp_bp]: 2.02001e-06 [comm_op_add_attrs]: 5.639e-05 [add_comm_op_reuse_tag]: 2.49001e-06 [interleave_split_concat_branches]: 8.09989e-07 [interleave_parallel_branches]: 8.39995e-07 [overlap_opt_shard_in_pipeline]: 2.05e-06 [overlap_opt_shard_grad_in_pipeline]: 2.42e-06 [control_data_broadcast_order]: 1.39e-06 [grouped_pairwise_exchange_alltoall]: 9.48001e-06 [offloading_packed_experts]: 2.60001e-06 [overlap_recompute_and_grad_model_parallel]: 2.12999e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.11999e-06 [overlap_recompute_allgather_and_fa_grad]: 4.225e-05 [overlap_grad_ring_attention]: 2e-06 [overlap_grad_flash_sp]: 2.673e-05 [begin_end_overlap_inline]: 7.50006e-07 [split_matmul_comm_elemetwise]: 1.83999e-06 [split_layernorm_comm]: 1.86999e-06 [handle_group_info]: 1.108e-05 [symbol_engine_optimizer]: 0.00018008, [1] [Cycle 1]: 0.00017505, [6] [build]: 1.844e-05 [elim_shapecalc]: 2.802e-05 [elim_not_effective]: 4.186e-05 [opt_reshape]: 2.162e-05 [fold_const_symbol]: 3.714e-05 [renormalize]: 3.69997e-07 [pipeline_parallel_scheduler]: 2.11e-06 [auto_monad_reorder]: 0.00010255 [get_jit_bprop_graph]: 5.10001e-07 [rewriter_after_jit_bprop_graph]: 5.10001e-07 [eliminate_special_op_node]: 0.00056718 [distribtued_split]: 1.59e-06 [validate]: 7.683e-05 [task_emit]: 0.118299 [execute]: 1.14e-05 Sums bootstrap : 0.000368s : 0.21% type_inference : 0.045124s : 25.51% auto_monad : 0.000545s : 0.31% graph_reusing : 0.000006s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000087s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000065s : 0.04% optimize.rewriter_before_opt_a : 0.000198s : 0.11% optimize.opt_a.expand_dump_flag : 0.000009s : 0.00% optimize.opt_a.switch_simplify : 0.000241s : 0.14% optimize.opt_a.loop_unroll : 0.000101s : 0.06% optimize.opt_a.a_1 : 0.003159s : 1.79% optimize.opt_a.recompute_prepare : 0.000048s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000117s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000038s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000031s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000631s : 0.36% optimize.opt_a.accelerated_algorithm : 0.000069s : 0.04% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000042s : 0.02% optimize.opt_a.auto_parallel : 0.000039s : 0.02% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000018s : 0.01% optimize.opt_a.merge_comm : 0.000032s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000040s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.02% optimize.opt_a.virtual_output : 0.000040s : 0.02% optimize.opt_a.merge_forward : 0.000025s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000083s : 0.05% optimize.opt_a.before_grad : 0.000075s : 0.04% optimize.opt_a.inplace_validation : 0.000025s : 0.01% optimize.opt_a.meta_fg_expand : 0.000030s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000031s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000053s : 0.03% optimize.opt_a.a_after_grad : 0.000070s : 0.04% optimize.opt_a.special_op_eliminate : 0.000041s : 0.02% optimize.opt_a.renormalize : 0.002475s : 1.40% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000119s : 0.07% optimize.opt_a.cse : 0.000387s : 0.22% optimize.opt_a.a_3 : 0.000298s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000020s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000372s : 0.21% optimize.convert_after_rewriter : 0.000019s : 0.01% optimize.order_py_execute_after_rewriter : 0.000013s : 0.01% optimize.opt_b.b_1 : 0.000477s : 0.27% optimize.opt_b.b_2 : 0.000024s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000055s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000022s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000028s : 0.02% optimize.loop_unroll : 0.000568s : 0.32% optimize.opt_after_cconv.c_1 : 0.000158s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.cse : 0.000064s : 0.04% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000067s : 0.04% optimize.tuple_transform.d_1 : 0.000169s : 0.10% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000025s : 0.01% optimize.add_recomputation : 0.000152s : 0.09% optimize.cse_after_recomputation.cse : 0.000045s : 0.03% optimize.environ_conv : 0.000018s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000019s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000060s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000017s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000056s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000042s : 0.02% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000027s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000011s : 0.01% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000042s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000103s : 0.06% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000567s : 0.32% distribtued_split : 0.000002s : 0.00% validate : 0.000077s : 0.04% task_emit : 0.118299s : 66.88% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000624 300 1.08% : 0.000007s : 2: substitution.depend_value_elim 1.07% : 0.000007s : 18: substitution.elim_not_effective 0.98% : 0.000006s : 18: substitution.fold_const_symbol 2.66% : 0.000017s : 21: substitution.graph_param_transform 59.70% : 0.000373s : 15: substitution.inline 2.45% : 0.000015s : 36: substitution.j_node_and_user_rematch 3.39% : 0.000021s : 2: substitution.less_batch_normalization 2.66% : 0.000017s : 30: substitution.load_eliminater 0.90% : 0.000006s : 6: substitution.reduce_all_const_elim 3.19% : 0.000020s : 36: substitution.remove_not_recompute_node 0.89% : 0.000006s : 6: substitution.replace_old_param 2.45% : 0.000015s : 4: substitution.switch_simplify 4.23% : 0.000026s : 6: substitution.tuple_list_get_item_eliminator 7.66% : 0.000048s : 44: substitution.updatestate_pure_node_eliminater 6.69% : 0.000042s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.045058 2 93.54% : 0.042148s : 1: type_inference.infer 6.46% : 0.002910s : 1: type_inference.specialize ------[replace.] 0.000219 25 52.74% : 0.000116s : 15: replace.inline 30.72% : 0.000067s : 4: replace.switch_simplify 16.54% : 0.000036s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000399 25 91.15% : 0.000363s : 15: match.inline 3.23% : 0.000013s : 4: match.switch_simplify 5.62% : 0.000022s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000952 6511 1.05% : 0.000010s : 72: predicate.accumulaten_eliminater 0.71% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000005s : 42: predicate.addn_check_dump 1.05% : 0.000010s : 72: predicate.addn_zero_filter 1.01% : 0.000010s : 72: predicate.adjust_all_reduce_mul_add 2.13% : 0.000020s : 114: predicate.arithmetic_simplify 1.08% : 0.000010s : 72: predicate.cast_eliminate 0.59% : 0.000006s : 42: predicate.check_bprop_eliminate 0.57% : 0.000005s : 42: predicate.compare_switch_simplify 0.16% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.44% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.61% : 0.000006s : 42: predicate.depend_value_elim 1.08% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.09% : 0.000010s : 72: predicate.dict_get_item_eliminator 1.07% : 0.000010s : 72: predicate.dict_set_item_eliminator 0.19% : 0.000002s : 21: predicate.elim_not_effective 0.38% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.28% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.27% : 0.000012s : 93: predicate.environ_get_depend_swap 1.93% : 0.000018s : 135: predicate.environ_get_eliminate 1.29% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.35% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.67% : 0.000016s : 93: predicate.float_depend_g_call 0.58% : 0.000005s : 42: predicate.float_environ_get_switch 0.86% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000001s : 21: predicate.fold_const_symbol 0.62% : 0.000006s : 42: predicate.get_grad_eliminate 0.21% : 0.000002s : 21: predicate.graph_param_transform 0.60% : 0.000006s : 42: predicate.incorporate_call 0.58% : 0.000006s : 42: predicate.incorporate_call_switch 5.44% : 0.000052s : 291: predicate.inline 0.80% : 0.000008s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.75% : 0.000007s : 42: predicate.less_batch_normalization 1.79% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.88% : 0.000027s : 192: predicate.load_eliminater 0.77% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.78% : 0.000017s : 110: predicate.loop_unroll_before_grad 1.73% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.61% : 0.000006s : 42: predicate.merge_addn 0.62% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.60% : 0.000006s : 42: predicate.mini_step_allgather_replace 1.00% : 0.000010s : 72: predicate.minmaximum_grad 0.40% : 0.000004s : 21: predicate.mutable_eliminate 0.32% : 0.000003s : 21: predicate.opt_reshape 0.33% : 0.000003s : 21: predicate.parallel_virtual_node 1.85% : 0.000018s : 93: predicate.partial_defer_inline 1.57% : 0.000015s : 99: predicate.partial_eliminate 1.08% : 0.000010s : 72: predicate.print_const_string_wrapper 0.66% : 0.000006s : 42: predicate.reduce_all_const_elim 1.23% : 0.000012s : 72: predicate.reduce_eliminate 0.35% : 0.000003s : 42: predicate.remove_not_recompute_node 1.17% : 0.000011s : 120: predicate.replace_applicator 0.35% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.01% : 0.000010s : 72: predicate.reshape_eliminate 0.62% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.37% : 0.000003s : 21: predicate.row_tensor_eliminate 0.85% : 0.000008s : 42: predicate.same_eliminate 0.36% : 0.000003s : 46: predicate.set_cell_output_no_recompute 0.69% : 0.000007s : 42: predicate.shard_identity_eliminate 1.05% : 0.000010s : 63: predicate.special_op_eliminate 0.69% : 0.000007s : 42: predicate.specialize_transform 0.72% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.71% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.76% : 0.000026s : 192: predicate.stopgrad_eliminater 0.34% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.45% : 0.000014s : 93: predicate.switch_defer_inline 2.04% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.85% : 0.000046s : 253: predicate.switch_simplify 1.01% : 0.000010s : 72: predicate.tile_eliminate 1.02% : 0.000010s : 72: predicate.transpose_eliminate 1.80% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.66% : 0.000025s : 162: predicate.tuple_list_get_item_eliminator 1.65% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.39% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.83% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.86% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.69% : 0.000035s : 234: predicate.updatestate_useless_node_eliminater 0.34% : 0.000003s : 21: predicate.value_based_eliminate 0.61% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.61% : 0.000006s : 42: predicate.virtual_output_eliminate 0.36% : 0.000003s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002997 41 65.82% : 0.001973s : 24: func_graph_cloner_run.FuncGraphClonerGraph 34.18% : 0.001024s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.206001 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000028s : 1: add_cache_embedding 0.00% : 0.000006s : 1: add_comm_op_reuse_tag 0.08% : 0.000157s : 1: add_recomputation 0.03% : 0.000063s : 1: assign_add_opt 0.27% : 0.000565s : 1: auto_monad 0.05% : 0.000111s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.19% : 0.000397s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000061s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000023s : 1: convert_after_rewriter 0.03% : 0.000060s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.28% : 0.000581s : 1: eliminate_special_op_node 0.01% : 0.000022s : 1: environ_conv 0.01% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000012s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000014s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.28% : 0.000578s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000033s : 1: opt.transform.loop_unroll_optimizer 2.43% : 0.005003s : 80: opt.transform.opt_a 0.08% : 0.000156s : 1: opt.transform.opt_after_cconv 0.23% : 0.000480s : 27: opt.transform.opt_b 0.08% : 0.000167s : 1: opt.transform.opt_trans_graph 0.04% : 0.000076s : 3: opt.transform.special_op_eliminate 0.06% : 0.000124s : 4: opt.transform.symbol_engine_opt 6.09% : 0.012546s : 1: opt_a 0.16% : 0.000320s : 1: opt_after_cconv 0.31% : 0.000645s : 1: opt_b 7.78% : 0.016029s : 1: optimize 0.01% : 0.000026s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.01% : 0.000030s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.02% : 0.000046s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000098s : 1: pre_auto_parallel 0.03% : 0.000071s : 1: py_interpret_to_execute 0.01% : 0.000024s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000020s : 1: remove_cast_before_assign_add 0.04% : 0.000073s : 1: remove_dup_value 0.77% : 0.001582s : 1: renormalize.infer 0.43% : 0.000884s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000378s : 1: rewriter_after_opt_a 0.10% : 0.000205s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000022s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000183s : 1: symbol_engine_optimizer 57.44% : 0.118331s : 1: task_emit 0.09% : 0.000189s : 1: tuple_transform 21.92% : 0.045145s : 1: type_inference 0.08% : 0.000174s : 1: validate TotalTime = 0.183883, [21] [bootstrap]: 0.00038628 [type_inference]: 0.0441298 [auto_monad]: 0.00056046 [graph_reusing]: 5.40999e-06 [inline]: 1.43e-06 [parallel-infer-symbol]: 2.62e-06 [pre_auto_parallel]: 8.986e-05 [insert-virtual-dataset]: 3.84e-06 [parallel-infer-symbol-second]: 6.40008e-07 [dataset_repeat_opt]: 1.47999e-06 [pipeline_split]: 1.81e-06 [optimize]: 0.0163593, [52] [py_interpret_to_execute]: 6.556e-05 [rewriter_before_opt_a]: 0.00020147 [opt_a]: 0.0127515, [2] [Cycle 1]: 0.00711275, [43] [expand_dump_flag]: 6.92e-06 [switch_simplify]: 0.00022078 [loop_unroll]: 7.885e-05 [a_1]: 0.00246609 [recompute_prepare]: 2.792e-05 [updatestate_depend_eliminate]: 0.00010246 [updatestate_assign_eliminate]: 2.245e-05 [updatestate_loads_eliminate]: 1.758e-05 [parameter_eliminate]: 3.99e-06 [a_2]: 0.00038314 [accelerated_algorithm]: 4.499e-05 [shard]: 2.78e-06 [meta_shard_fg_expand]: 9.14e-06 [shard_inline]: 2.215e-05 [auto_parallel]: 1.947e-05 [parallel]: 8.38999e-06 [flash_sp]: 1.441e-05 [merge_comm]: 1.723e-05 [allreduce_fusion]: 1.374e-05 [matmul_add_comm_reduction]: 2.259e-05 [allreduce_slice_to_reducescatter]: 4.89992e-07 [virtual_shard_identity]: 2.349e-05 [virtual_dataset]: 2.046e-05 [get_grad_eliminate_]: 2.058e-05 [virtual_output]: 2e-05 [merge_forward]: 1.399e-05 [cell_reuse_recompute_pass]: 2.36e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.17e-05 [before_grad]: 4.021e-05 [inplace_validation]: 1.317e-05 [meta_fg_expand]: 1.715e-05 [inplace_validation_after_expand]: 1.576e-05 [flash_sp_send_recv_attached]: 3.04e-06 [receive_attached]: 4.99e-06 [after_resolve]: 2.734e-05 [a_after_grad]: 3.686e-05 [special_op_eliminate]: 2.179e-05 [renormalize]: 0.00245548 [add_forward_monad_depend]: 4.51e-06 [auto_monad_grad]: 2.01e-06 [auto_monad_eliminator]: 6.158e-05 [cse]: 0.00032201 [a_3]: 0.00015455 [Cycle 2]: 0.00204546, [43] [expand_dump_flag]: 1.51999e-06 [switch_simplify]: 2.296e-05 [loop_unroll]: 2.096e-05 [a_1]: 0.00074 [recompute_prepare]: 2.161e-05 [updatestate_depend_eliminate]: 1.704e-05 [updatestate_assign_eliminate]: 1.641e-05 [updatestate_loads_eliminate]: 1.507e-05 [parameter_eliminate]: 2.65001e-06 [a_2]: 0.00032046 [accelerated_algorithm]: 2.474e-05 [shard]: 1.15999e-06 [meta_shard_fg_expand]: 6.69001e-06 [shard_inline]: 2.168e-05 [auto_parallel]: 1.923e-05 [parallel]: 3.96e-06 [flash_sp]: 3.79e-06 [merge_comm]: 1.543e-05 [allreduce_fusion]: 1.345e-05 [matmul_add_comm_reduction]: 1.906e-05 [allreduce_slice_to_reducescatter]: 3.09999e-07 [virtual_shard_identity]: 2.208e-05 [virtual_dataset]: 2.082e-05 [get_grad_eliminate_]: 1.972e-05 [virtual_output]: 1.975e-05 [merge_forward]: 1.249e-05 [cell_reuse_recompute_pass]: 2.17999e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.013e-05 [before_grad]: 3.656e-05 [inplace_validation]: 1.162e-05 [meta_fg_expand]: 1.336e-05 [inplace_validation_after_expand]: 1.592e-05 [flash_sp_send_recv_attached]: 9.39996e-07 [receive_attached]: 7.29997e-07 [after_resolve]: 2.518e-05 [a_after_grad]: 3.524e-05 [special_op_eliminate]: 2.034e-05 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 1.46001e-06 [auto_monad_grad]: 1.28e-06 [auto_monad_eliminator]: 4.269e-05 [cse]: 5.537e-05 [a_3]: 0.00014174 [py_interpret_to_execute_after_opt_a]: 1.999e-05 [slice_cell_reuse_recomputed_activation]: 2.67e-06 [rewriter_after_opt_a]: 0.00038154 [convert_after_rewriter]: 1.918e-05 [order_py_execute_after_rewriter]: 1.36e-05 [opt_b]: 0.00064779, [1] [Cycle 1]: 0.00064093, [7] [b_1]: 0.00047798 [b_2]: 2.364e-05 [updatestate_depend_eliminate]: 1.449e-05 [updatestate_assign_eliminate]: 1.616e-05 [updatestate_loads_eliminate]: 1.523e-05 [renormalize]: 3.79994e-07 [cse]: 5.472e-05 [optimize_parallel_all_gather_comm]: 1.963e-05 [overlap_param_gather]: 3.15999e-06 [cconv]: 2.825e-05 [loop_unroll]: 0.00053333 [opt_after_cconv]: 0.00031971, [1] [Cycle 1]: 0.00031158, [7] [c_1]: 0.00015827 [parameter_eliminate]: 2.68999e-06 [updatestate_depend_eliminate]: 1.791e-05 [updatestate_assign_eliminate]: 1.65e-05 [updatestate_loads_eliminate]: 1.628e-05 [cse]: 6.065e-05 [renormalize]: 4.40006e-07 [remove_dup_value]: 0.00015717 [tuple_transform]: 0.00018295, [1] [Cycle 1]: 0.00017629, [2] [d_1]: 0.00016405 [renormalize]: 3.19997e-07 [partial_unused_args_eliminate]: 2.58999e-06 [add_cache_embedding]: 2.528e-05 [add_recomputation]: 0.00015149 [cse_after_recomputation]: 6.036e-05, [1] [Cycle 1]: 5.357e-05, [1] [cse]: 4.715e-05 [environ_conv]: 1.896e-05 [swap_dp_allreduce_reducescatter]: 1.871e-05 [bias_add_comm_swap]: 2.27001e-06 [label_micro_interleaved_index]: 2.12e-06 [label_fine_grained_interleaved_index]: 2.38999e-06 [merge_cast_opt]: 1.14001e-06 [slice_recompute_activation]: 1.94e-06 [micro_interleaved_order_control]: 2.25e-06 [assign_add_opt]: 6.133e-05 [ForceFp32Comm]: 8.99992e-07 [remove_cast_before_assign_add]: 1.691e-05 [full_micro_interleaved_order_control]: 2.07001e-06 [reorder_send_recv_between_fp_bp]: 2.81e-06 [comm_op_add_attrs]: 5.843e-05 [add_comm_op_reuse_tag]: 1.95e-06 [interleave_split_concat_branches]: 1.12e-06 [interleave_parallel_branches]: 8.79998e-07 [overlap_opt_shard_in_pipeline]: 2.45e-06 [overlap_opt_shard_grad_in_pipeline]: 2.38999e-06 [control_data_broadcast_order]: 1.62001e-06 [grouped_pairwise_exchange_alltoall]: 1.038e-05 [offloading_packed_experts]: 2.29001e-06 [overlap_recompute_and_grad_model_parallel]: 2.18001e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.13e-06 [overlap_recompute_allgather_and_fa_grad]: 4.534e-05 [overlap_grad_ring_attention]: 2.5e-06 [overlap_grad_flash_sp]: 2.87e-05 [begin_end_overlap_inline]: 8.10003e-07 [split_matmul_comm_elemetwise]: 2.37001e-06 [split_layernorm_comm]: 2.02e-06 [handle_group_info] TotalTime = 0.179668, [21] [bootstrap]: 0.00035151 [type_inference]: 0.0421993 [auto_monad]: 0.00051322 [graph_reusing]: 4.65001e-06 [inline]: 1.42e-06 [parallel-infer-symbol]: 1.76999e-06 [pre_auto_parallel]: 8.529e-05 [insert-virtual-dataset]: 2.27999e-06 [parallel-infer-symbol-second]: 8.9001e-07 [dataset_repeat_opt]: 9.50007e-07 [pipeline_split]: 1.18e-06 [optimize]: 0.0160137, [52] [py_interpret_to_execute]: 6.431e-05 [rewriter_before_opt_a]: 0.00019737 [opt_a]: 0.0126485, [2] [Cycle 1]: 0.0071568, [43] [expand_dump_flag]: 5.64e-06 [switch_simplify]: 0.00020397 [loop_unroll]: 7.821e-05 [a_1]: 0.00249354 [recompute_prepare]: 2.695e-05 [updatestate_depend_eliminate]: 0.00010051 [updatestate_assign_eliminate]: 2.217e-05 [updatestate_loads_eliminate]: 1.599e-05 [parameter_eliminate]: 3.46001e-06 [a_2]: 0.00032141 [accelerated_algorithm]: 4.383e-05 [shard]: 1.59e-06 [meta_shard_fg_expand]: 8.69001e-06 [shard_inline]: 2.148e-05 [auto_parallel]: 1.931e-05 [parallel]: 5.89e-06 [flash_sp]: 1.171e-05 [merge_comm]: 1.567e-05 [allreduce_fusion]: 1.352e-05 [matmul_add_comm_reduction]: 1.999e-05 [allreduce_slice_to_reducescatter]: 3.6e-07 [virtual_shard_identity]: 2.256e-05 [virtual_dataset]: 2.258e-05 [get_grad_eliminate_]: 2.103e-05 [virtual_output]: 2.001e-05 [merge_forward]: 1.354e-05 [cell_reuse_recompute_pass]: 1.96e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.187e-05 [before_grad]: 3.822e-05 [inplace_validation]: 1.262e-05 [meta_fg_expand]: 1.725e-05 [inplace_validation_after_expand]: 1.494e-05 [flash_sp_send_recv_attached]: 2.11e-06 [receive_attached]: 3.27e-06 [after_resolve]: 2.845e-05 [a_after_grad]: 3.565e-05 [special_op_eliminate]: 2.28e-05 [renormalize]: 0.00260662 [add_forward_monad_depend]: 2.99e-06 [auto_monad_grad]: 1.40999e-06 [auto_monad_eliminator]: 5.06e-05 [cse]: 0.00032997 [a_3]: 0.00015251 [Cycle 2]: 0.00201753, [43] [expand_dump_flag]: 1.14999e-06 [switch_simplify]: 2.273e-05 [loop_unroll]: 2.057e-05 [a_1]: 0.00069068 [recompute_prepare]: 2.068e-05 [updatestate_depend_eliminate]: 1.509e-05 [updatestate_assign_eliminate]: 1.631e-05 [updatestate_loads_eliminate]: 1.48e-05 [parameter_eliminate]: 1.86001e-06 [a_2]: 0.00030198 [accelerated_algorithm]: 2.401e-05 [shard]: 1.01e-06 [meta_shard_fg_expand]: 7.29e-06 [shard_inline]: 2.127e-05 [auto_parallel]: 1.859e-05 [parallel]: 3.65e-06 [flash_sp]: 2.52e-06 [merge_comm]: 1.536e-05 [allreduce_fusion]: 1.338e-05 [matmul_add_comm_reduction]: 1.809e-05 [allreduce_slice_to_reducescatter]: 2.69996e-07 [virtual_shard_identity]: 2.168e-05 [virtual_dataset]: 2.074e-05 [get_grad_eliminate_]: 2.007e-05 [virtual_output]: 1.996e-05 [merge_forward]: 1.167e-05 [cell_reuse_recompute_pass]: 1.96e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.035e-05 [before_grad]: 3.621e-05 [inplace_validation]: 1.161e-05 [meta_fg_expand]: 1.329e-05 [inplace_validation_after_expand]: 1.548e-05 [flash_sp_send_recv_attached]: 1.15001e-06 [receive_attached]: 6.79996e-07 [after_resolve]: 2.453e-05 [a_after_grad]: 3.442e-05 [special_op_eliminate]: 1.985e-05 [renormalize]: 6.99947e-08 [add_forward: 1.035e-05 [symbol_engine_optimizer]: 0.00018477, [1] [Cycle 1]: 0.0001785, [6] [build]: 1.807e-05 [elim_shapecalc]: 2.841e-05 [elim_not_effective]: 4.095e-05 [opt_reshape]: 2.239e-05 [fold_const_symbol]: 3.802e-05 [renormalize]: 3.40005e-07 [pipeline_parallel_scheduler]: 2.27e-06 [auto_monad_reorder]: 0.00010357 [get_jit_bprop_graph]: 5.19998e-07 [rewriter_after_jit_bprop_graph]: 7.99992e-07 [eliminate_special_op_node]: 0.0005733 [distribtued_split]: 1.52001e-06 [validate]: 7.834e-05 [task_emit]: 0.1212 [execute]: 1.304e-05 Sums bootstrap : 0.000386s : 0.22% type_inference : 0.044130s : 24.65% auto_monad : 0.000560s : 0.31% graph_reusing : 0.000005s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000090s : 0.05% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000066s : 0.04% optimize.rewriter_before_opt_a : 0.000201s : 0.11% optimize.opt_a.expand_dump_flag : 0.000008s : 0.00% optimize.opt_a.switch_simplify : 0.000244s : 0.14% optimize.opt_a.loop_unroll : 0.000100s : 0.06% optimize.opt_a.a_1 : 0.003206s : 1.79% optimize.opt_a.recompute_prepare : 0.000050s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000119s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000039s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000033s : 0.02% optimize.opt_a.parameter_eliminate : 0.000007s : 0.00% optimize.opt_a.a_2 : 0.000704s : 0.39% optimize.opt_a.accelerated_algorithm : 0.000070s : 0.04% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000044s : 0.02% optimize.opt_a.auto_parallel : 0.000039s : 0.02% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000018s : 0.01% optimize.opt_a.merge_comm : 0.000033s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000042s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000046s : 0.03% optimize.opt_a.virtual_dataset : 0.000041s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000040s : 0.02% optimize.opt_a.virtual_output : 0.000040s : 0.02% optimize.opt_a.merge_forward : 0.000026s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000082s : 0.05% optimize.opt_a.before_grad : 0.000077s : 0.04% optimize.opt_a.inplace_validation : 0.000025s : 0.01% optimize.opt_a.meta_fg_expand : 0.000031s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000032s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000053s : 0.03% optimize.opt_a.a_after_grad : 0.000072s : 0.04% optimize.opt_a.special_op_eliminate : 0.000042s : 0.02% optimize.opt_a.renormalize : 0.002456s : 1.37% optimize.opt_a.add_forward_m_monad_depend]: 9.5999e-07 [auto_monad_grad]: 1.06001e-06 [auto_monad_eliminator]: 3.891e-05 [cse]: 5.434e-05 [a_3]: 0.00022012 [py_interpret_to_execute_after_opt_a]: 2.102e-05 [slice_cell_reuse_recomputed_activation]: 1.77999e-06 [rewriter_after_opt_a]: 0.00038163 [convert_after_rewriter]: 1.728e-05 [order_py_execute_after_rewriter]: 1.245e-05 [opt_b]: 0.00063727, [1] [Cycle 1]: 0.00063154, [7] [b_1]: 0.00047718 [b_2]: 2.318e-05 [updatestate_depend_eliminate]: 1.412e-05 [updatestate_assign_eliminate]: 1.577e-05 [updatestate_loads_eliminate]: 1.457e-05 [renormalize]: 3.00002e-07 [cse]: 5.288e-05 [optimize_parallel_all_gather_comm]: 1.891e-05 [overlap_param_gather]: 3.20001e-06 [cconv]: 2.134e-05 [loop_unroll]: 0.00052871 [opt_after_cconv]: 0.00030866, [1] [Cycle 1]: 0.00030217, [7] [c_1]: 0.00015691 [parameter_eliminate]: 2.1e-06 [updatestate_depend_eliminate]: 1.598e-05 [updatestate_assign_eliminate]: 1.642e-05 [updatestate_loads_eliminate]: 1.519e-05 [cse]: 6.11e-05 [renormalize]: 3.50003e-07 [remove_dup_value]: 5.886e-05 [tuple_transform]: 0.00017669, [1] [Cycle 1]: 0.00017188, [2] [d_1]: 0.0001614 [renormalize]: 2.30008e-07 [partial_unused_args_eliminate]: 2.26e-06 [add_cache_embedding]: 2.209e-05 [add_recomputation]: 0.00013821 [cse_after_recomputation]: 5.443e-05, [1] [Cycle 1]: 4.908e-05, [1] [cse]: 4.342e-05 [environ_conv]: 1.913e-05 [swap_dp_allreduce_reducescatter]: 1.718e-05 [bias_add_comm_swap]: 1.98001e-06 [label_micro_interleaved_index]: 1.51001e-06 [label_fine_grained_interleaved_index]: 1.34e-06 [merge_cast_opt]: 9.20001e-07 [slice_recompute_activation]: 1.40999e-06 [micro_interleaved_order_control]: 1.39e-06 [assign_add_opt]: 5.884e-05 [ForceFp32Comm]: 6.00005e-07 [remove_cast_before_assign_add]: 1.553e-05 [full_micro_interleaved_order_control]: 1.23e-06 [reorder_send_recv_between_fp_bp]: 1.48e-06 [comm_op_add_attrs]: 4.873e-05 [add_comm_op_reuse_tag]: 1.81e-06 [interleave_split_concat_branches]: 9.50007e-07 [interleave_parallel_branches]: 5.40007e-07 [overlap_opt_shard_in_pipeline]: 2.25e-06 [overlap_opt_shard_grad_in_pipeline]: 1.44001e-06 [control_data_broadcast_order]: 7.89994e-07 [grouped_pairwise_exchange_alltoall]: 8.10999e-06 [offloading_packed_experts]: 1.27e-06 [overlap_recompute_and_grad_model_parallel]: 1.59e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.79996e-07 [overlap_recompute_allgather_and_fa_grad]: 4.252e-05 [overlap_grad_ring_attention]: 1.46001e-06 [overlap_grad_flash_sp]: 2.71e-05 [begin_end_overlap_inline]: 4.80009e-07 [split_matmul_comm_elemetwise]: 1.14999e-06 [split_layernorm_comm]: 1.42e-06 [handle_group_info]: 9.38e-06 [symbol_engine_optimizer]: 0.00017598, [1] [Cycle 1]: 0.00017145, [6] [build]: 1.423e-05 [elim_shapecalc]: 2.684e-05 [elim_not_effective]: 4.154e-05 [opt_reshape]: 2.251e-05 [fold_const_symbol]: 3.736e-05 [renormalize]: 3.10014e-07 [pipeline_parallel_scheduler]: 1.6e-06 [auto_monad_reorder]: 8.69e-05 [get_jit_bprop_graph]: 3.90006e-07 [rewriter_after_jit_bprop_graph]: 3.50003e-07 [eliminate_special_op_node]: 0.00055492 [distribtued_split]: 1.15001e-06 [validate]: 7.328e-05 [task_emit]: 0.119396 [execute]: 8.69e-06 Sums bootstrap : 0.000352s : 0.20% type_inference : 0.042199s : 24.11% auto_monad : 0.000513s : 0.29% graph_reusing : 0.000005s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% onad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000104s : 0.06% optimize.opt_a.cse : 0.000377s : 0.21% optimize.opt_a.a_3 : 0.000296s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000020s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000382s : 0.21% optimize.convert_after_rewriter : 0.000019s : 0.01% optimize.order_py_execute_after_rewriter : 0.000014s : 0.01% optimize.opt_b.b_1 : 0.000478s : 0.27% optimize.opt_b.b_2 : 0.000024s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000055s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000020s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000028s : 0.02% optimize.loop_unroll : 0.000533s : 0.30% optimize.opt_after_cconv.c_1 : 0.000158s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000018s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.cse : 0.000061s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000157s : 0.09% optimize.tuple_transform.d_1 : 0.000164s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000025s : 0.01% optimize.add_recomputation : 0.000151s : 0.08% optimize.cse_after_recomputation.cse : 0.000047s : 0.03% optimize.environ_conv : 0.000019s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000019s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000061s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000017s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000058s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000002s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgathe pre_auto_parallel : 0.000085s : 0.05% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000064s : 0.04% optimize.rewriter_before_opt_a : 0.000197s : 0.11% optimize.opt_a.expand_dump_flag : 0.000007s : 0.00% optimize.opt_a.switch_simplify : 0.000227s : 0.13% optimize.opt_a.loop_unroll : 0.000099s : 0.06% optimize.opt_a.a_1 : 0.003184s : 1.82% optimize.opt_a.recompute_prepare : 0.000048s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000116s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000038s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000031s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000623s : 0.36% optimize.opt_a.accelerated_algorithm : 0.000068s : 0.04% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000043s : 0.02% optimize.opt_a.auto_parallel : 0.000038s : 0.02% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.01% optimize.opt_a.merge_comm : 0.000031s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000038s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000044s : 0.03% optimize.opt_a.virtual_dataset : 0.000043s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.02% optimize.opt_a.virtual_output : 0.000040s : 0.02% optimize.opt_a.merge_forward : 0.000025s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000082s : 0.05% optimize.opt_a.before_grad : 0.000074s : 0.04% optimize.opt_a.inplace_validation : 0.000024s : 0.01% optimize.opt_a.meta_fg_expand : 0.000031s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000030s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000053s : 0.03% optimize.opt_a.a_after_grad : 0.000070s : 0.04% optimize.opt_a.special_op_eliminate : 0.000043s : 0.02% optimize.opt_a.renormalize : 0.002607s : 1.49% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000090s : 0.05% optimize.opt_a.cse : 0.000384s : 0.22% optimize.opt_a.a_3 : 0.000373s : 0.21% optimize.py_interpret_to_execute_after_opt_a : 0.000021s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000382s : 0.22% optimize.convert_after_rewriter : 0.000017s : 0.01% optimize.order_py_execute_after_rewriter : 0.000012s : 0.01% optimize.opt_b.b_1 : 0.000477s : 0.27% optimize.opt_b.b_2 : 0.000023s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opr_and_fa_grad : 0.000045s : 0.03% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000029s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000010s : 0.01% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000041s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000038s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000104s : 0.06% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000573s : 0.32% distribtued_split : 0.000002s : 0.00% validate : 0.000078s : 0.04% task_emit : 0.121200s : 67.71% execute : 0.000013s : 0.01% t_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000053s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000019s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000021s : 0.01% optimize.loop_unroll : 0.000529s : 0.30% optimize.opt_after_cconv.c_1 : 0.000157s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_after_cconv.cse : 0.000061s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000059s : 0.03% optimize.tuple_transform.d_1 : 0.000161s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000022s : 0.01% optimize.add_recomputation : 0.000138s : 0.08% optimize.cse_after_recomputation.cse : 0.000043s : 0.02% optimize.environ_conv : 0.000019s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000017s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000059s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000016s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000049s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000043s : 0.02% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000027s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000009s : 0.01% optimize.symbol_engine_optimizer.build : 0.000014s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000027s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000042s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000023s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000087s : 0.05% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000555s : 0.32% distribtued_split : 0.000001s : 0.00% validate : 0.000073s : 0.04% task_emit : 0.119396s : 68.23% execute : 0.000009s : 0.00% TotalTime = 0.184958, [21] [bootstrap]: 0.0003656 [type_inference]: 0.0451803 [auto_monad]: 0.00057344 [graph_reusing]: 6.06e-06 [inline]: 1.67999e-06 [parallel-infer-symbol]: 3.07e-06 [pre_auto_parallel]: 8.703e-05 [insert-virtual-dataset]: 3.04001e-06 [parallel-infer-symbol-second]: 6.40008e-07 [dataset_repeat_opt]: 1.35e-06 [pipeline_split]: 1.66e-06 [optimize]: 0.0161829, [52] [py_interpret_to_execute]: 6.697e-05 [rewriter_before_opt_a]: 0.00019963 [opt_a]: 0.012703, [2] [Cycle 1]: 0.0071998, [43] [expand_dump_flag]: 7.35e-06 [switch_simplify]: 0.0002211 [loop_unroll]: 8.035e-05 [a_1]: 0.00248065 [recompute_prepare]: 2.821e-05 [updatestate_depend_eliminate]: 0.000102 [updatestate_assign_eliminate]: 2.255e-05 [updatestate_loads_eliminate]: 1.803e-05 [parameter_eliminate]: 4.05e-06 [a_2]: 0.00033275 [accelerated_algorithm]: 4.345e-05 [shard]: 2.26e-06 [meta_shard_fg_expand]: 9.82001e-06 [shard_inline]: 2.204e-05 [auto_parallel]: 1.997e-05 [parallel]: 9.03e-06 [flash_sp]: 1.506e-05 [merge_comm]: 1.64e-05 [allreduce_fusion]: 1.362e-05 [matmul_add_comm_reduction]: 2.487e-05 [allreduce_slice_to_reducescatter]: 5.19998e-07 [virtual_shard_identity]: 2.308e-05 [virtual_dataset]: 2.075e-05 [get_grad_eliminate_]: 2.133e-05 [virtual_output]: 2.15e-05 [merge_forward]: 1.398e-05 [cell_reuse_recompute_pass]: 2.16e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.488e-05 [before_grad]: 3.821e-05 [inplace_validation]: 1.297e-05 [meta_fg_expand]: 1.753e-05 [inplace_validation_after_expand]: 1.66e-05 [flash_sp_send_recv_attached]: 3.13e-06 [receive_attached]: 4.85001e-06 [after_resolve]: 2.925e-05 [a_after_grad]: 3.622e-05 [special_op_eliminate]: 2.171e-05 [renormalize]: 0.00256356 [add_forward_monad_depend]: 4.84001e-06 [auto_monad_grad]: 2.29001e-06 [auto_monad_eliminator]: 6.333e-05 [cse]: 0.00034964 [a_3]: 0.00015642 [Cycle 2]: 0.00195561, [43] [expand_dump_flag]: 1.85e-06 [switch_simplify]: 2.35e-05 [loop_unroll]: 2.099e-05 [a_1]: 0.00068941 [recompute_prepare]: 2.061e-05 [updatestate_depend_eliminate]: 1.668e-05 [updatestate_assign_eliminate]: 1.638e-05 [updatestate_loads_eliminate]: 1.449e-05 [parameter_eliminate]: 2.29001e-06 [a_2]: 0.00030796 [accelerated_algorithm]: 2.401e-05 [shard]: 1.18e-06 [meta_shard_fg_expand]: 6.57e-06 [shard_inline]: 2.157e-05 [auto_parallel]: 1.97e-05 [parallel]: 3.86999e-06 [flash_sp]: 3.61999e-06 [merge_comm]: 1.561e-05 [allreduce_fusion]: 1.371e-05 [matmul_add_comm_reduction]: 1.998e-05 [allreduce_slice_to_reducescatter]: 2.69996e-07 [virtual_shard_identity]: 2.195e-05 [virtual_dataset]: 2.078e-05 [get_grad_eliminate_]: 2.074e-05 [virtual_output]: 1.972e-05 [merge_forward]: 1.156e-05 [cell_reuse_recompute_pass]: 2.23001e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.069e-05 [before_grad]: 3.596e-05 [inplace_validation]: 1.141e-05 [meta_fg_expand]: 1.327e-05 [inplace_validation_after_expand]: 1.581e-05 [flash_sp_send_recv_attached]: 9.49993e-07 [receive_attached]: 6.60002e-07 [after_resolve]: 2.474e-05 [a_after_grad]: 3.452e-05 [special_op_eliminate]: 1.973e-05 [renormalize]: 7.00093e-08 [add_forwar Time group info: ------[substitution.] 0.000658 300 0.80% : 0.000005s : 2: substitution.depend_value_elim 0.97% : 0.000006s : 18: substitution.elim_not_effective 0.86% : 0.000006s : 18: substitution.fold_const_symbol 2.27% : 0.000015s : 21: substitution.graph_param_transform 56.40% : 0.000371s : 15: substitution.inline 2.37% : 0.000016s : 36: substitution.j_node_and_user_rematch 3.23% : 0.000021s : 2: substitution.less_batch_normalization 2.44% : 0.000016s : 30: substitution.load_eliminater 0.76% : 0.000005s : 6: substitution.reduce_all_const_elim 3.07% : 0.000020s : 36: substitution.remove_not_recompute_node 0.81% : 0.000005s : 6: substitution.replace_old_param 2.00% : 0.000013s : 4: substitution.switch_simplify 3.14% : 0.000021s : 6: substitution.tuple_list_get_item_eliminator 7.28% : 0.000048s : 44: substitution.updatestate_pure_node_eliminater 13.61% : 0.000090s : 56: substitution.d_monad_depend]: 1.58e-06 [auto_monad_grad]: 1.44e-06 [auto_monad_eliminator]: 4.178e-05 [cse]: 5.458e-05 [a_3]: 0.00014148 [py_interpret_to_execute_after_opt_a]: 1.896e-05 [slice_cell_reuse_recomputed_activation]: 2.89999e-06 [rewriter_after_opt_a]: 0.00038625 [convert_after_rewriter]: 1.95e-05 [order_py_execute_after_rewriter]: 1.346e-05 [opt_b]: 0.00064394, [1] [Cycle 1]: 0.00063819, [7] [b_1]: 0.00047973 [b_2]: 2.408e-05 [updatestate_depend_eliminate]: 1.423e-05 [updatestate_assign_eliminate]: 1.633e-05 [updatestate_loads_eliminate]: 1.465e-05 [renormalize]: 2.70011e-07 [cse]: 5.496e-05 [optimize_parallel_all_gather_comm]: 2.245e-05 [overlap_param_gather]: 2.84999e-06 [cconv]: 2.869e-05 [loop_unroll]: 0.00054945 [opt_after_cconv]: 0.00031258, [1] [Cycle 1]: 0.00030586, [7] [c_1]: 0.00015662 [parameter_eliminate]: 2.36e-06 [updatestate_depend_eliminate]: 1.751e-05 [updatestate_assign_eliminate]: 1.619e-05 [updatestate_loads_eliminate]: 1.549e-05 [cse]: 6.26e-05 [renormalize]: 4.1e-07 [remove_dup_value]: 6.563e-05 [tuple_transform]: 0.00017985, [1] [Cycle 1]: 0.00017467, [2] [d_1]: 0.00016457 [renormalize]: 2.79993e-07 [partial_unused_args_eliminate]: 3.05001e-06 [add_cache_embedding]: 2.469e-05 [add_recomputation]: 0.00014882 [cse_after_recomputation]: 5.576e-05, [1] [Cycle 1]: 4.982e-05, [1] [cse]: 4.413e-05 [environ_conv]: 2.008e-05 [swap_dp_allreduce_reducescatter]: 1.823e-05 [bias_add_comm_swap]: 2.43e-06 [label_micro_interleaved_index]: 1.97001e-06 [label_fine_grained_interleaved_index]: 2.27e-06 [merge_cast_opt]: 1.59e-06 [slice_recompute_activation]: 1.89e-06 [micro_interleaved_order_control]: 1.85e-06 [assign_add_opt]: 6.084e-05 [ForceFp32Comm]: 8.89995e-07 [remove_cast_before_assign_add]: 1.73e-05 [full_micro_interleaved_order_control]: 2.4e-06 [reorder_send_recv_between_fp_bp]: 2.25e-06 [comm_op_add_attrs]: 5.821e-05 [add_comm_op_reuse_tag]: 2.21e-06 [interleave_split_concat_branches]: 9.39996e-07 [interleave_parallel_branches]: 8.9001e-07 [overlap_opt_shard_in_pipeline]: 2.21e-06 [overlap_opt_shard_grad_in_pipeline]: 2.44001e-06 [control_data_broadcast_order]: 1.22e-06 [grouped_pairwise_exchange_alltoall]: 9.51001e-06 [offloading_packed_experts]: 2.47e-06 [overlap_recompute_and_grad_model_parallel]: 2.26e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.9001e-07 [overlap_recompute_allgather_and_fa_grad]: 3.662e-05 [overlap_grad_ring_attention]: 2.53e-06 [overlap_grad_flash_sp]: 3.072e-05 [begin_end_overlap_inline]: 8.2e-07 [split_matmul_comm_elemetwise]: 2e-06 [split_layernorm_comm]: 1.98001e-06 [handle_group_info]: 9.47001e-06 [symbol_engine_optimizer]: 0.00018188, [1] [Cycle 1]: 0.00017638, [6] [build]: 1.757e-05 [elim_shapecalc]: 2.805e-05 [elim_not_effective]: 4.237e-05 [opt_reshape]: 2.214e-05 [fold_const_symbol]: 3.791e-05 [renormalize]: 3.6e-07 [pipeline_parallel_scheduler]: 1.83001e-06 [auto_monad_reorder]: 0.00010491 [get_jit_bprop_graph]: 5.20013e-07 [rewriter_after_jit_bprop_graph]: 4.29995e-07 [eliminate_special_op_node]: 0.00056889 [distribtued_split]: 1.44e-06 [validate]: 8.144e-05 [task_emit]: 0.121425 [execute]: 1.231e-05 Sums bootstrap : 0.000366s : 0.20% type_inference : 0.045180s : 25.07% auto_monad : 0.000573s : 0.32% graph_reusing : 0.000006s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel updatestate_useless_node_eliminater ------[type_inference.] 0.042139 2 93.35% : 0.039336s : 1: type_inference.infer 6.65% : 0.002802s : 1: type_inference.specialize ------[replace.] 0.000214 25 53.80% : 0.000115s : 15: replace.inline 29.18% : 0.000062s : 4: replace.switch_simplify 17.02% : 0.000036s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000390 25 92.83% : 0.000362s : 15: match.inline 2.71% : 0.000011s : 4: match.switch_simplify 4.45% : 0.000017s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000964 6511 1.00% : 0.000010s : 72: predicate.accumulaten_eliminater 0.66% : 0.000006s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000006s : 42: predicate.addn_check_dump 1.02% : 0.000010s : 72: predicate.addn_zero_filter 0.97% : 0.000009s : 72: predicate.adjust_all_reduce_mul_add 2.09% : 0.000020s : 114: predicate.arithmetic_simplify 1.02% : 0.000010s : 72: predicate.cast_eliminate 0.62% : 0.000006s : 42: predicate.check_bprop_eliminate 0.58% : 0.000006s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.31% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.42% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.59% : 0.000006s : 42: predicate.depend_value_elim 1.07% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.15% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.10% : 0.000011s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.39% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.31% : 0.000013s : 93: predicate.environ_add_const_eliminate 1.27% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.36% : 0.000013s : 93: predicate.environ_get_depend_swap 1.91% : 0.000018s : 135: predicate.environ_get_eliminate 1.25% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.30% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.66% : 0.000016s : 93: predicate.float_depend_g_call 0.58% : 0.000006s : 42: predicate.float_environ_get_switch 0.87% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.62% : 0.000006s : 42: predicate.get_grad_eliminate 0.19% : 0.000002s : 21: predicate.graph_param_transform 0.59% : 0.000006s : 42: predicate.incorporate_call 0.57% : 0.000005s : 42: predicate.incorporate_call_switch 5.42% : 0.000052s : 291: predicate.inline 0.79% : 0.000008s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.72% : 0.000007s : 42: predicate.less_batch_normalization 1.76% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.88% : 0.000028s : 192: predicate.load_eliminater 0.71% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.70% : 0.000016s : 110: predicate.loop_unroll_before_grad 1.79% : 0.000017s : 114: predicate.make_slice_get_slice_eliminator 0.60% : 0.000006s : 42: predicate.merge_addn 0.61% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.59% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.98% : 0.000009s : 72: predicate.minmaximum_grad 0.38% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000003s : 21: predicate.opt_reshape 0.36% : 0.000004s : 21: predicate.parallel_virtual_node 1.93% : 0.000019s : 93: predicate.partial_defer_inline 1.55% : 0.000015s : 99: predicate.partial_eliminate 1.08% : 0.000010s : 72: predicate.print_const_string_wrapper 0.65% : 0.000006s : 42: predicate.reduce_all_const_elim 1.29% : 0.000012s : 72: predicate.reduce : 0.000087s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000067s : 0.04% optimize.rewriter_before_opt_a : 0.000200s : 0.11% optimize.opt_a.expand_dump_flag : 0.000009s : 0.01% optimize.opt_a.switch_simplify : 0.000245s : 0.14% optimize.opt_a.loop_unroll : 0.000101s : 0.06% optimize.opt_a.a_1 : 0.003170s : 1.76% optimize.opt_a.recompute_prepare : 0.000049s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000119s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000039s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000033s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000641s : 0.36% optimize.opt_a.accelerated_algorithm : 0.000067s : 0.04% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000044s : 0.02% optimize.opt_a.auto_parallel : 0.000040s : 0.02% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000019s : 0.01% optimize.opt_a.merge_comm : 0.000032s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000045s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.02% optimize.opt_a.virtual_dataset : 0.000042s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000042s : 0.02% optimize.opt_a.virtual_output : 0.000041s : 0.02% optimize.opt_a.merge_forward : 0.000026s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000086s : 0.05% optimize.opt_a.before_grad : 0.000074s : 0.04% optimize.opt_a.inplace_validation : 0.000024s : 0.01% optimize.opt_a.meta_fg_expand : 0.000031s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000032s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000054s : 0.03% optimize.opt_a.a_after_grad : 0.000071s : 0.04% optimize.opt_a.special_op_eliminate : 0.000041s : 0.02% optimize.opt_a.renormalize : 0.002564s : 1.42% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000105s : 0.06% optimize.opt_a.cse : 0.000404s : 0.22% optimize.opt_a.a_3 : 0.000298s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000019s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000386s : 0.21% optimize.convert_after_rewriter : 0.000020s : 0.01% optimize.order_py_execute_after_rewriter : 0.000013s : 0.01% optimize.opt_b.b_1 : 0.000480s : 0.27% optimize.opt_b.b_2 : 0.000024s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize _eliminate 0.32% : 0.000003s : 42: predicate.remove_not_recompute_node 1.13% : 0.000011s : 120: predicate.replace_applicator 0.32% : 0.000003s : 42: predicate.replace_old_param 0.16% : 0.000002s : 21: predicate.reset_defer_inline 1.02% : 0.000010s : 72: predicate.reshape_eliminate 0.60% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.35% : 0.000003s : 21: predicate.row_tensor_eliminate 0.79% : 0.000008s : 42: predicate.same_eliminate 0.36% : 0.000003s : 46: predicate.set_cell_output_no_recompute 0.68% : 0.000007s : 42: predicate.shard_identity_eliminate 0.97% : 0.000009s : 63: predicate.special_op_eliminate 0.70% : 0.000007s : 42: predicate.specialize_transform 0.69% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.71% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.89% : 0.000028s : 192: predicate.stopgrad_eliminater 0.34% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.55% : 0.000015s : 93: predicate.switch_defer_inline 2.05% : 0.000020s : 135: predicate.switch_layer_defer_inline 4.64% : 0.000045s : 253: predicate.switch_simplify 1.05% : 0.000010s : 72: predicate.tile_eliminate 0.99% : 0.000010s : 72: predicate.transpose_eliminate 1.76% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.84% : 0.000018s : 114: predicate.tuple_list_get_item_const_eliminator 1.71% : 0.000017s : 114: predicate.tuple_list_get_item_depend_reorder 2.66% : 0.000026s : 162: predicate.tuple_list_get_item_eliminator 1.64% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.47% : 0.000024s : 156: predicate.tuple_list_set_item_eliminator 1.75% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.85% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.91% : 0.000038s : 234: predicate.updatestate_useless_node_eliminater 0.35% : 0.000003s : 21: predicate.value_based_eliminate 0.64% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.61% : 0.000006s : 42: predicate.virtual_output_eliminate 0.52% : 0.000005s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002739 41 62.73% : 0.001718s : 24: func_graph_cloner_run.FuncGraphClonerGraph 37.27% : 0.001021s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.204302 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000026s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000143s : 1: add_recomputation 0.03% : 0.000063s : 1: assign_add_opt 0.26% : 0.000534s : 1: auto_monad 0.05% : 0.000095s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.18% : 0.000376s : 1: bootstrap 0.01% : 0.000025s : 1: cconv 0.03% : 0.000053s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000021s : 1: convert_after_rewriter 0.03% : 0.000058s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.28% : 0.000569s : 1: eliminate_special_op_node 0.01% : 0.000023s : 1: environ_conv 0.01% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000012s : 1: graph_reusing 0.01% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000013s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.0: 0.000000s : 0.00% optimize.opt_b.cse : 0.000055s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000022s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000029s : 0.02% optimize.loop_unroll : 0.000549s : 0.30% optimize.opt_after_cconv.c_1 : 0.000157s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000018s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_after_cconv.cse : 0.000063s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000066s : 0.04% optimize.tuple_transform.d_1 : 0.000165s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000025s : 0.01% optimize.add_recomputation : 0.000149s : 0.08% optimize.cse_after_recomputation.cse : 0.000044s : 0.02% optimize.environ_conv : 0.000020s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000061s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000017s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000058s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000037s : 0.02% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000031s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000009s : 0.01% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000042s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000038s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000105s : 0.06% get_jit_bprop_graph Time group info: ------[substitution.] 0.000642 300 1.23% : 0.000008s : 2: substitution.depend_value_elim 1.00% : 0.000006s : 18: substitution.elim_not_effective 1.01% : 0.000006s : 18: substitution.fold_const_symbol 2.61% : 0.000017s : 21: substitution.graph_param_transform 60.35% : 0.000387s : 15: substitution.inline 2.42% : 0.000016s : 36: substitution.j_node_and_user_rematch 3.40% : 0.000022s : 2: substitution.less_batch_normalization 2.54% : 0.000016s : 30: substitution.load_eliminater 0.92% : 0.000006s : 6: substitution.reduce_all_const_elim 3.15% : 0.000020s : 36: substitution.remove_not_recompute_node 0.92% : 0.000006s : 6: substitution.replace_old_param 2.35% : 0.000015s : 4: substitution.switch_simplify 3.80% : 0.000024s : 6: substitution.tuple_list_get_item_eliminator 7.52% : 0.000048s : 44: substitution.updatestate_pure_node_eliminater 6.79% : 0.000044s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.044005 2 93.54% : 0.041161s : 1: type_inference.infer 6.46% : 0.002844s : 1: type_inference.specialize ------[replace.] 0.000231 25 54.00% : 0.000125s : 15: replace.inline 30.67% : 0.000071s : 4: replace.switch_simplify 15.32% : 0.000035s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000412 25 91.78% : 0.000378s : 15: match.inline 3.10% : 0.000013s : 4: match.switch_simplify 5.12% : 0.000021s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000959 6511 1.02% : 0.000010s : 72: predicate.accumulaten_eliminater 0.70% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000006s : 42: predicate.addn_check_dump 1.08% : 0.000010s : 72: predicate.addn_zero_filter 1.06% : 0.000010s : 72: predicate.adjust_all_reduce_mul_add 2.14% : 0.000021s : 114: predicate.arithmetic_simplify 1.00% : 0.000010s : 72: predicate.cast_eliminate 0.62% : 0.000006s : 42: predicate.check_bprop_eliminate 0.58% : 0.000006s : 42: predicate.compare_switch_simplify 0.18% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.44% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.60% : 0.000006s : 42: predicate.depend_value_elim 1.05% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.11% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.08% : 0.000010s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.37% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.27% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.26% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.25% : 0.000012s : 93: predicate.environ_get_depend_swap 1.95% : 0.000019s : 135: predicate.environ_get_eliminate 1.27% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.30% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.65% : 0.000016s : 93: predicate.float_depend_g_call 0.58% : 0.000006s : 42: predicate.float_environ_get_switch 0.85% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.63% : 0.000006s : 42: predicate.get_grad_eliminate 0.22% : 0.000002s : 21: predicate.graph_param_transform 0.63% : 0.000006s : 42: predicate.incorporate_call 0.59% : 0.000006s : 42: predicate.incorporate_call_switch 5.42% : 0.000052s : 291: predicate.inline 0.81% : 0.000008s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.75% : 0.000007s : 42: predicate.less_batch_normalization 1.84% : 0.000018s : 120: predica00003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.26% : 0.000538s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.01% : 0.000030s : 1: opt.transform.loop_unroll_optimizer 2.49% : 0.005082s : 80: opt.transform.opt_a 0.08% : 0.000155s : 1: opt.transform.opt_after_cconv 0.23% : 0.000479s : 27: opt.transform.opt_b 0.08% : 0.000160s : 1: opt.transform.opt_trans_graph 0.04% : 0.000074s : 3: opt.transform.special_op_eliminate 0.06% : 0.000124s : 4: opt.transform.symbol_engine_opt 6.19% : 0.012653s : 1: opt_a 0.15% : 0.000312s : 1: opt_after_cconv 0.31% : 0.000640s : 1: opt_b 7.84% : 0.016023s : 1: optimize 0.01% : 0.000022s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000016s : 1: order_py_execute_after_rewriter 0.02% : 0.000031s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.02% : 0.000047s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000095s : 1: pre_auto_parallel 0.03% : 0.000071s : 1: py_interpret_to_execute 0.01% : 0.000025s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000019s : 1: remove_cast_before_assign_add 0.03% : 0.000065s : 1: remove_dup_value 0.82% : 0.001682s : 1: renormalize.infer 0.45% : 0.000916s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000388s : 1: rewriter_after_opt_a 0.10% : 0.000203s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000021s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000179s : 1: symbol_engine_optimizer 58.45% : 0.119423s : 1: task_emit 0.09% : 0.000180s : 1: tuple_transform 20.67% : 0.042222s : 1: type_inference 0.10% : 0.000214s : 1: validate : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000569s : 0.32% distribtued_split : 0.000001s : 0.00% validate : 0.000081s : 0.05% task_emit : 0.121425s : 67.37% execute : 0.000012s : 0.01% te.list_to_tuple_eliminator_ 2.96% : 0.000028s : 192: predicate.load_eliminater 0.72% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.71% : 0.000016s : 110: predicate.loop_unroll_before_grad 1.66% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.59% : 0.000006s : 42: predicate.merge_addn 0.58% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.59% : 0.000006s : 42: predicate.mini_step_allgather_replace 1.00% : 0.000010s : 72: predicate.minmaximum_grad 0.42% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000003s : 21: predicate.opt_reshape 0.35% : 0.000003s : 21: predicate.parallel_virtual_node 1.91% : 0.000018s : 93: predicate.partial_defer_inline 1.57% : 0.000015s : 99: predicate.partial_eliminate 1.03% : 0.000010s : 72: predicate.print_const_string_wrapper 0.65% : 0.000006s : 42: predicate.reduce_all_const_elim 1.26% : 0.000012s : 72: predicate.reduce_eliminate 0.35% : 0.000003s : 42: predicate.remove_not_recompute_node 1.21% : 0.000012s : 120: predicate.replace_applicator 0.35% : 0.000003s : 42: predicate.replace_old_param 0.18% : 0.000002s : 21: predicate.reset_defer_inline 1.01% : 0.000010s : 72: predicate.reshape_eliminate 0.59% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.34% : 0.000003s : 21: predicate.row_tensor_eliminate 0.76% : 0.000007s : 42: predicate.same_eliminate 0.37% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.66% : 0.000006s : 42: predicate.shard_identity_eliminate 1.02% : 0.000010s : 63: predicate.special_op_eliminate 0.70% : 0.000007s : 42: predicate.specialize_transform 0.68% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.70% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.71% : 0.000026s : 192: predicate.stopgrad_eliminater 0.34% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.49% : 0.000014s : 93: predicate.switch_defer_inline 2.16% : 0.000021s : 135: predicate.switch_layer_defer_inline 4.77% : 0.000046s : 253: predicate.switch_simplify 1.02% : 0.000010s : 72: predicate.tile_eliminate 0.98% : 0.000009s : 72: predicate.transpose_eliminate 1.83% : 0.000018s : 114: predicate.tuple_list_convert_item_index_to_positive 1.79% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.58% : 0.000025s : 162: predicate.tuple_list_get_item_eliminator 1.63% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.36% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.73% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.92% : 0.000028s : 192: predicate.updatestate_pure_node_eliminater 3.84% : 0.000037s : 234: predicate.updatestate_useless_node_eliminater 0.36% : 0.000003s : 21: predicate.value_based_eliminate 0.60% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.60% : 0.000006s : 42: predicate.virtual_output_eliminate 0.58% : 0.000006s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002966 41 66.06% : 0.001960s : 24: func_graph_cloner_run.FuncGraphClonerGraph 33.94% : 0.001007s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.208755 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000029s : 1: add_cache_embedding 0.00% : 0.000006s : 1: add_comm_op_reuse_tag 0.08% : 0.000157s : 1: add_recomputation 0.03% : 0.000066s : 1: assign_add_opt 0.28% : 0.000584s : 1: auto_monad 0.05% : 0.000112s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.20% : 0.000418s : 1: bootstrap 0.02% : 0.000033s : 1: cconv 0.03% : 0.000064s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000024s : 1: convert_after_rewriter 0.03% : 0.000064s : 1: cse_after_recomputation 0.00% : 0.000007s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.28% : 0.000588s : 1: eliminate_special_op_node 0.01% : 0.000024s : 1: environ_conv 0.01% : 0.000023s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000014s : 1: graph_reusing 0.01% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000014s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000010s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.26% : 0.000544s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000031s : 1: opt.transform.loop_unroll_optimizer 2.46% : 0.005125s : 80: opt.transform.opt_a 0.07% : 0.000156s : 1: opt.transform.opt_after_cconv 0.23% : 0.000480s : 27: opt.transform.opt_b 0.08% : 0.000162s : 1: opt.transform.opt_trans_graph 0.04% : 0.000075s : 3: opt.transform.special_op_eliminate 0.06% : 0.000125s : 4: opt.transform.symbol_engine_opt 6.11% : 0.012756s : 1: opt_a 0.16% : 0.000325s : 1: opt_after_cconv 0.31% : 0.000651s : 1: opt_b 7.84% : 0.016368s : 1: optimize 0.01% : 0.000024s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000018s : 1: order_py_execute_after_rewriter 0.02% : 0.000033s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000006s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.02% : 0.000050s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000009s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.05% : 0.000101s : 1: pre_auto_parallel 0.03% : 0.000073s : 1: py_interpret_to_execute 0.01% : 0.000024s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000020s : 1: remove_cast_before_assign_add 0.08% : 0.000165s : 1: remove_dup_value 0.75% : 0.001566s : 1: renormalize.infer 0.42% : 0.000880s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000389s : 1: rewriter_after_opt_a 0.10% : 0.000209s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000023s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000188s : 1: symbol_engine_optimizer 58.08% : 0.121237s : 1: task_emit 0.09% : 0.000187s : 1: tuple_transform 21.15% : 0.044153s : 1: type_inference 0.09% : 0.000178s : 1: validate Time group info: ------[substitution.] 0.000638 300 1.10% : 0.000007s : 2: substitution.depend_value_elim 1.02% : 0.000006s : 18: substitution.elim_not_effective 0.96% : 0.000006s : 18: substitution.fold_const_symbol 2.54% : 0.000016s : 21: substitution.graph_param_transform 60.55% : 0.000386s : 15: substitution.inline 2.38% : 0.000015s : 36: substitution.j_node_and_user_rematch 3.24% : 0.000021s : 2: substitution.less_batch_normalization 2.62% : 0.000017s : 30: substitution.load_eliminater 0.86% : 0.000005s : 6: substitution.reduce_all_const_elim 3.28% : 0.000021s : 36: substitution.remove_not_recompute_node 0.88% : 0.000006s : 6: substitution.replace_old_param 2.33% : 0.000015s : 4: substitution.switch_simplify 3.98% : 0.000025s : 6: substitution.tuple_list_get_item_eliminator 7.53% : 0.000048s : 44: substitution.updatestate_pure_node_eliminater 6.75% : 0.000043s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.045114 2 93.36% : 0.042119s : 1: type_inference.infer 6.64% : 0.002995s : 1: type_inference.specialize ------[replace.] 0.000221 25 53.44% : 0.000118s : 15: replace.inline 30.82% : 0.000068s : 4: replace.switch_simplify 15.74% : 0.000035s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000411 25 91.68% : 0.000377s : 15: match.inline 2.99% : 0.000012s : 4: match.switch_simplify 5.33% : 0.000022s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000951 6511 1.00% : 0.000010s : 72: predicate.accumulaten_eliminater 0.67% : 0.000006s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000006s : 42: predicate.addn_check_dump 0.99% : 0.000009s : 72: predicate.addn_zero_filter 0.97% : 0.000009s : 72: predicate.adjust_all_reduce_mul_add 2.13% : 0.000020s : 114: predicate.arithmetic_simplify 1.05% : 0.000010s : 72: predicate.cast_eliminate 0.61% : 0.000006s : 42: predicate.check_bprop_eliminate 0.59% : 0.000006s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.46% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.60% : 0.000006s : 42: predicate.depend_value_elim 1.08% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.11% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.08% : 0.000010s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.40% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.30% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.29% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_depend_swap 1.95% : 0.000019s : 135: predicate.environ_get_eliminate 1.30% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.33% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.65% : 0.000016s : 93: predicate.float_depend_g_call 0.59% : 0.000006s : 42: predicate.float_environ_get_switch 0.88% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.17% : 0.000002s : 21: predicate.fold_const_symbol 0.62% : 0.000006s : 42: predicate.get_grad_eliminate 0.23% : 0.000002s : 21: predicate.graph_param_transform 0.59% : 0.000006s : 42: predicate.incorporate_call 0.58% : 0.000005s : 42: predicate.incorporate_call_switch 5.53% : 0.000053s : 291: predicate.inline 0.81% : 0.000008s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.76% : 0.000007s : 42: predicate.less_batch_normalization 1.74% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.93% : 0.000028s : 192: predicate.load_eliminater 0.76% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.75% : 0.000017s : 110: predicate.loop_unroll_before_grad 1.81% : 0.000017s : 114: predicate.make_slice_get_slice_eliminator 0.58% : 0.000006s : 42: predicate.merge_addn 0.58% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.59% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.98% : 0.000009s : 72: predicate.minmaximum_grad 0.43% : 0.000004s : 21: predicate.mutable_eliminate 0.35% : 0.000003s : 21: predicate.opt_reshape 0.37% : 0.000004s : 21: predicate.parallel_virtual_node 1.94% : 0.000018s : 93: predicate.partial_defer_inline 1.53% : 0.000015s : 99: predicate.partial_eliminate 1.02% : 0.000010s : 72: predicate.print_const_string_wrapper 0.66% : 0.000006s : 42: predicate.reduce_all_const_elim 1.27% : 0.000012s : 72: predicate.reduce_eliminate 0.34% : 0.000003s : 42: predicate.remove_not_recompute_node 1.15% : 0.000011s : 120: predicate.replace_applicator 0.33% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.05% : 0.000010s : 72: predicate.reshape_eliminate 0.61% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.37% : 0.000004s : 21: predicate.row_tensor_eliminate 0.82% : 0.000008s : 42: predicate.same_eliminate 0.37% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.68% : 0.000006s : 42: predicate.shard_identity_eliminate 1.01% : 0.000010s : 63: predicate.special_op_eliminate 0.72% : 0.000007s : 42: predicate.specialize_transform 0.68% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.72% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.70% : 0.000026s : 192: predicate.stopgrad_eliminater 0.33% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.47% : 0.000014s : 93: predicate.switch_defer_inline 2.00% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.75% : 0.000045s : 253: predicate.switch_simplify 1.02% : 0.000010s : 72: predicate.tile_eliminate 1.03% : 0.000010s : 72: predicate.transpose_eliminate 1.85% : 0.000018s : 114: predicate.tuple_list_convert_item_index_to_positive 1.77% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.67% : 0.000016s : 114: predicate.tuple_list_get_item_depend_reorder 2.63% : 0.000025s : 162: predicate.tuple_list_get_item_eliminator 1.66% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.38% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.74% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.88% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.69% : 0.000035s : 234: predicate.updatestate_useless_node_eliminater 0.34% : 0.000003s : 21: predicate.value_based_eliminate 0.61% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.61% : 0.000006s : 42: predicate.virtual_output_eliminate 0.36% : 0.000003s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.003016 41 66.69% : 0.002011s : 24: func_graph_cloner_run.FuncGraphClonerGraph 33.31% : 0.001005s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.209680 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000028s : 1: add_cache_embedding 0.00% : 0.000006s : 1: add_comm_op_reuse_tag 0.07% : 0.000154s : 1: add_recomputation 0.03% : 0.000065s : 1: assign_add_opt 0.28% : 0.000593s : 1: auto_monad 0.05% : 0.000113s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.19% : 0.000394s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000063s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000024s : 1: convert_after_rewriter 0.03% : 0.000059s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.28% : 0.000582s : 1: eliminate_special_op_node 0.01% : 0.000024s : 1: environ_conv 0.01% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000013s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000013s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.27% : 0.000559s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000033s : 1: opt.transform.loop_unroll_optimizer 2.40% : 0.005033s : 80: opt.transform.opt_a 0.07% : 0.000155s : 1: opt.transform.opt_after_cconv 0.23% : 0.000482s : 27: opt.transform.opt_b 0.08% : 0.000162s : 1: opt.transform.opt_trans_graph 0.04% : 0.000077s : 3: opt.transform.special_op_eliminate 0.06% : 0.000126s : 4: opt.transform.symbol_engine_opt 6.06% : 0.012707s : 1: opt_a 0.15% : 0.000317s : 1: opt_after_cconv 0.31% : 0.000647s : 1: opt_b 7.72% : 0.016191s : 1: optimize 0.01% : 0.000027s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.02% : 0.000034s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.02% : 0.000041s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000095s : 1: pre_auto_parallel 0.03% : 0.000073s : 1: py_interpret_to_execute 0.01% : 0.000023s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000020s : 1: remove_cast_before_assign_add 0.03% : 0.000072s : 1: remove_dup_value 0.77% : 0.001624s : 1: renormalize.infer 0.44% : 0.000930s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000393s : 1: rewriter_after_opt_a 0.10% : 0.000206s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000022s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000185s : 1: symbol_engine_optimizer 57.93% : 0.121477s : 1: task_emit 0.09% : 0.000183s : 1: tuple_transform 21.56% : 0.045201s : 1: type_inference 0.09% : 0.000182s : 1: validate TotalTime = 0.180708, [21] [bootstrap]: 0.0003491 [type_inference]: 0.0421499 [auto_monad]: 0.00051704 [graph_reusing]: 4.53e-06 [inline]: 1.29e-06 [parallel-infer-symbol]: 1.40999e-06 [pre_auto_parallel]: 8.435e-05 [insert-virtual-dataset]: 2.17999e-06 [parallel-infer-symbol-second]: 5.10001e-07 [dataset_repeat_opt]: 7.49991e-07 [pipeline_split]: 1.08e-06 [optimize]: 0.0160204, [52] [py_interpret_to_execute]: 6.515e-05 [rewriter_before_opt_a]: 0.00019498 [opt_a]: 0.0126385, [2] [Cycle 1]: 0.00708157, [43] [expand_dump_flag]: 5.33e-06 [switch_simplify]: 0.00020314 [loop_unroll]: 7.845e-05 [a_1]: 0.00242957 [recompute_prepare]: 2.68e-05 [updatestate_depend_eliminate]: 9.914e-05 [updatestate_assign_eliminate]: 2.153e-05 [updatestate_loads_eliminate]: 1.593e-05 [parameter_eliminate]: 2.68e-06 [a_2]: 0.00032168 [accelerated_algorithm]: 4.31e-05 [shard]: 1.58e-06 [meta_shard_fg_expand]: 8.22e-06 [shard_inline]: 2.145e-05 [auto_parallel]: 1.939e-05 [parallel]: 5.58e-06 [flash_sp]: 1.091e-05 [merge_comm]: 1.56e-05 [allreduce_fusion]: 1.325e-05 [matmul_add_comm_reduction]: 2.061e-05 [allreduce_slice_to_reducescatter]: 3.80009e-07 [virtual_shard_identity]: 2.28e-05 [virtual_dataset]: 2.079e-05 [get_grad_eliminate_]: 2.109e-05 [virtual_output]: 2.035e-05 [merge_forward]: 1.243e-05 [cell_reuse_recompute_pass]: 1.66001e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.147e-05 [before_grad]: 3.722e-05 [inplace_validation]: 1.177e-05 [meta_fg_expand]: 1.646e-05 [inplace_validation_after_expand]: 1.446e-05 [flash_sp_send_recv_attached]: 2.11e-06 [receive_attached]: 3.37001e-06 [after_resolve]: 2.812e-05 [a_after_grad]: 3.529e-05 [special_op_eliminate]: 2.116e-05 [renormalize]: 0.00264124 [add_forward_monad_depend]: 3.52999e-06 [auto_monad_grad]: 2.24001e-06 [auto_monad_eliminator]: 5.083e-05 [cse]: 0.00029166 [a_3]: 0.00015198 [Cycle 2]: 0.00202208, [43] [expand_dump_flag]: 1.18e-06 [switch_simplify]: 2.339e-05 [loop_unroll]: 2.127e-05 [a_1]: 0.00068457 [recompute_prepare]: 2.009e-05 [updatestate_depend_eliminate]: 1.601e-05 [updatestate_assign_eliminate]: 1.617e-05 [updatestate_loads_eliminate]: 1.462e-05 [parameter_eliminate]: 1.91999e-06 [a_2]: 0.0003052 [accelerated_algorithm]: 2.443e-05 [shard]: 9.79999e-07 [meta_shard_fg_expand]: 6.34999e-06 [shard_inline]: 2.269e-05 [auto_parallel]: 1.945e-05 [parallel]: 3.97001e-06 [flash_sp]: 2.43999e-06 [merge_comm]: 1.519e-05 [allreduce_fusion]: 1.312e-05 [matmul_add_comm_reduction]: 1.893e-05 [allreduce_slice_to_reducescatter]: 3.50003e-07 [virtual_shard_identity]: 2.216e-05 [virtual_dataset]: 2.1e-05 [get_grad_eliminate_]: 2.014e-05 [virtual_output]: 1.992e-05 [merge_forward]: 1.148e-05 [cell_reuse_recompute_pass]: 2.31e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00010675 [before_grad]: 3.668e-05 [inplace_validation]: 1.132e-05 [meta_fg_expand]: 1.357e-05 [inplace_validation_after_expand]: 1.528e-05 [flash_sp_send_recv_attached]: 9.20001e-07 [receive_attached]: 7.00005e-07 [after_resolve]: 2.472e-05 [a_after_grad]: 3.48e-05 [special_op_eliminate]: 1.978e-05 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 1.14999e-06 [auto_monad_grad]: 1.22e-06 [auto_monad_eliminator]: 3.812e-05 [cse]: 5.537e-05 [a_3]: 0.00014073 [py_interpret_to_execute_after_opt_a]: 2.044e-05 [slice_cell_reuse_recomputed_activation]: 1.82999e-06 [rewriter_after_opt_a]: 0.00037317 [convert_after_rewriter]: 1.71e-05 [order_py_execute_after_rewriter]: 1.256e-05 [opt_b]: 0.00064008, [1] [Cycle 1]: 0.00063478, [7] [b_1]: 0.00047606 [b_2]: 2.379e-05 [updatestate_depend_eliminate]: 1.39e-05 [updatestate_assign_eliminate]: 1.595e-05 [updatestate_loads_eliminate]: 1.467e-05 [renormalize]: 2.89991e-07 [cse]: 5.438e-05 [optimize_parallel_all_gather_comm]: 1.864e-05 [overlap_param_gather]: 3.3e-06 [cconv]: 2.11e-05 [loop_unroll]: 0.0005576 [opt_after_cconv]: 0.00030456, [1] [Cycle 1]: 0.00029793, [7] [c_1]: 0.00014968 [parameter_eliminate]: 1.71001e-06 [updatestate_depend_eliminate]: 1.595e-05 [updatestate_assign_eliminate]: 1.676e-05 [updatestate_loads_eliminate]: 1.922e-05 [cse]: 5.758e-05 [renormalize]: 5.19998e-07 [remove_dup_value]: 5.79e-05 [tuple_transform]: 0.00017752, [1] [Cycle 1]: 0.0001722, [2] [d_1]: 0.00016043 [renormalize]: 2.69996e-07 [partial_unused_args_eliminate]: 1.9e-06 [add_cache_embedding]: 2.19e-05 [add_recomputation]: 0.00013815 [cse_after_recomputation]: 5.632e-05, [1] [Cycle 1]: 5.092e-05, [1] [cse]: 4.501e-05 [environ_conv]: 1.623e-05 [swap_dp_allreduce_reducescatter]: 1.801e-05 [bias_add_comm_swap]: 1.86001e-06 [label_micro_interleaved_index]: 1.42e-06 [label_fine_grained_interleaved_index]: 1.22e-06 [merge_cast_opt]: 1.00001e-06 [slice_recompute_activation]: 1.30001e-06 [micro_interleaved_order_control]: 1.14999e-06 [assign_add_opt]: 5.704e-05 [ForceFp32Comm]: 6.70014e-07 [remove_cast_before_assign_add]: 1.528e-05 [full_micro_interleaved_order_control]: 1.33e-06 [reorder_send_recv_between_fp_bp]: 1.04001e-06 [comm_op_add_attrs]: 5.232e-05 [add_comm_op_reuse_tag]: 1.58e-06 [interleave_split_concat_branches]: 5.40007e-07 [interleave_parallel_branches]: 5.19998e-07 [overlap_opt_shard_in_pipeline]: 1.69e-06 [overlap_opt_shard_grad_in_pipeline]: 1.38e-06 [control_data_broadcast_order]: 9.29998e-07 [grouped_pairwise_exchange_alltoall]: 6.54999e-06 [offloading_packed_experts]: 1.7e-06 [overlap_recompute_and_grad_model_parallel]: 1.34e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.29995e-07 [overlap_recompute_allgather_and_fa_grad]: 3.909e-05 [overlap_grad_ring_attention]: 1.43e-06 [overlap_grad_flash_sp]: 2.736e-05 [begin_end_overlap_inline]: 4.60001e-07 [split_matmul_comm_elemetwise]: 1.23e-06 [split_layernorm_comm]: 1.19999e-06 [handle_group_info]: 7.99001e-06 [symbol_engine_optimizer]: 0.0001781, [1] [Cycle 1]: 0.00017288, [6] [build]: 1.466e-05 [elim_shapecalc]: 2.801e-05 [elim_not_effective]: 4.071e-05 [opt_reshape]: 2.23e-05 [fold_const_symbol]: 3.731e-05 [renormalize]: 3.09999e-07 [pipeline_parallel_scheduler]: 1.65001e-06 [auto_monad_reorder]: 8.752e-05 [get_jit_bprop_graph]: 4.39992e-07 [rewriter_after_jit_bprop_graph]: 2.89991e-07 [eliminate_special_op_node]: 0.00061421 [distribtued_split]: 1.09999e-06 [validate]: 7.071e-05 [task_emit]: 0.120486 [execute]: 8.03999e-06 Sums bootstrap : 0.000349s : 0.20% type_inference : 0.042150s : 23.95% auto_monad : 0.000517s : 0.29% graph_reusing : 0.000005s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000084s : 0.05% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000065s : 0.04% optimize.rewriter_before_opt_a : 0.000195s : 0.11% optimize.opt_a.expand_dump_flag : 0.000007s : 0.00% optimize.opt_a.switch_simplify : 0.000227s : 0.13% optimize.opt_a.loop_unroll : 0.000100s : 0.06% optimize.opt_a.a_1 : 0.003114s : 1.77% optimize.opt_a.recompute_prepare : 0.000047s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000115s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000038s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000031s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000627s : 0.36% optimize.opt_a.accelerated_algorithm : 0.000068s : 0.04% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000015s : 0.01% optimize.opt_a.shard_inline : 0.000044s : 0.03% optimize.opt_a.auto_parallel : 0.000039s : 0.02% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.01% optimize.opt_a.merge_comm : 0.000031s : 0.02% optimize.opt_a.allreduce_fusion : 0.000026s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000040s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.02% optimize.opt_a.virtual_output : 0.000040s : 0.02% optimize.opt_a.merge_forward : 0.000024s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000148s : 0.08% optimize.opt_a.before_grad : 0.000074s : 0.04% optimize.opt_a.inplace_validation : 0.000023s : 0.01% optimize.opt_a.meta_fg_expand : 0.000030s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000030s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000053s : 0.03% optimize.opt_a.a_after_grad : 0.000070s : 0.04% optimize.opt_a.special_op_eliminate : 0.000041s : 0.02% optimize.opt_a.renormalize : 0.002641s : 1.50% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000089s : 0.05% optimize.opt_a.cse : 0.000347s : 0.20% optimize.opt_a.a_3 : 0.000293s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000020s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000373s : 0.21% optimize.convert_after_rewriter : 0.000017s : 0.01% optimize.order_py_execute_after_rewriter : 0.000013s : 0.01% optimize.opt_b.b_1 : 0.000476s : 0.27% optimize.opt_b.b_2 : 0.000024s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000054s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000019s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000021s : 0.01% optimize.loop_unroll : 0.000558s : 0.32% optimize.opt_after_cconv.c_1 : 0.000150s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000019s : 0.01% optimize.opt_after_cconv.cse : 0.000058s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000058s : 0.03% optimize.tuple_transform.d_1 : 0.000160s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000022s : 0.01% optimize.add_recomputation : 0.000138s : 0.08% optimize.cse_after_recomputation.cse : 0.000045s : 0.03% optimize.environ_conv : 0.000016s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000057s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000015s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000052s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000039s : 0.02% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000027s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000008s : 0.00% optimize.symbol_engine_optimizer.build : 0.000015s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000041s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000088s : 0.05% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000614s : 0.35% distribtued_split : 0.000001s : 0.00% validate : 0.000071s : 0.04% task_emit : 0.120486s : 68.45% execute : 0.000008s : 0.00% Time group info: ------[substitution.] 0.000605 300 0.74% : 0.000004s : 2: substitution.depend_value_elim 1.00% : 0.000006s : 18: substitution.elim_not_effective 0.97% : 0.000006s : 18: substitution.fold_const_symbol 2.60% : 0.000016s : 21: substitution.graph_param_transform 61.14% : 0.000370s : 15: substitution.inline 2.42% : 0.000015s : 36: substitution.j_node_and_user_rematch 3.13% : 0.000019s : 2: substitution.less_batch_normalization 2.66% : 0.000016s : 30: substitution.load_eliminater 0.79% : 0.000005s : 6: substitution.reduce_all_const_elim 3.38% : 0.000020s : 36: substitution.remove_not_recompute_node 1.19% : 0.000007s : 6: substitution.replace_old_param 2.11% : 0.000013s : 4: substitution.switch_simplify 3.35% : 0.000020s : 6: substitution.tuple_list_get_item_eliminator 7.63% : 0.000046s : 44: substitution.updatestate_pure_node_eliminater 6.91% : 0.000042s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.042088 2 93.47% : 0.039339s : 1: type_inference.infer 6.53% : 0.002749s : 1: type_inference.specialize ------[replace.] 0.000213 25 53.81% : 0.000115s : 15: replace.inline 29.33% : 0.000063s : 4: replace.switch_simplify 16.87% : 0.000036s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000388 25 93.03% : 0.000361s : 15: match.inline 2.67% : 0.000010s : 4: match.switch_simplify 4.30% : 0.000017s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000947 6511 1.02% : 0.000010s : 72: predicate.accumulaten_eliminater 0.71% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.59% : 0.000006s : 42: predicate.addn_check_dump 0.98% : 0.000009s : 72: predicate.addn_zero_filter 1.03% : 0.000010s : 72: predicate.adjust_all_reduce_mul_add 2.11% : 0.000020s : 114: predicate.arithmetic_simplify 1.07% : 0.000010s : 72: predicate.cast_eliminate 0.62% : 0.000006s : 42: predicate.check_bprop_eliminate 0.60% : 0.000006s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.35% : 0.000013s : 78: predicate.convert_tensor_eliminate 0.62% : 0.000006s : 42: predicate.depend_value_elim 1.15% : 0.000011s : 72: predicate.dict_get_item_const_eliminator 1.12% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.12% : 0.000011s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.41% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.30% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.26% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_depend_swap 1.94% : 0.000018s : 135: predicate.environ_get_eliminate 1.27% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.31% : 0.000012s : 93: predicate.exchange_switch_depend_value 1.63% : 0.000015s : 93: predicate.float_depend_g_call 0.60% : 0.000006s : 42: predicate.float_environ_get_switch 0.90% : 0.000009s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.64% : 0.000006s : 42: predicate.get_grad_eliminate 0.20% : 0.000002s : 21: predicate.graph_param_transform 0.61% : 0.000006s : 42: predicate.incorporate_call 0.58% : 0.000005s : 42: predicate.incorporate_call_switch 5.41% : 0.000051s : 291: predicate.inline 0.80% : 0.000008s : 42: predicate.inline_without_move 0.32% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.75% : 0.000007s : 42: predicate.less_batch_normalization 1.74% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.84% : 0.000027s : 192: predicate.load_eliminater 0.75% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.80% : 0.000017s : 110: predicate.loop_unroll_before_grad 1.64% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.61% : 0.000006s : 42: predicate.merge_addn 0.59% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.62% : 0.000006s : 42: predicate.mini_step_allgather_replace 1.00% : 0.000009s : 72: predicate.minmaximum_grad 0.42% : 0.000004s : 21: predicate.mutable_eliminate 0.34% : 0.000003s : 21: predicate.opt_reshape 0.34% : 0.000003s : 21: predicate.parallel_virtual_node 1.93% : 0.000018s : 93: predicate.partial_defer_inline 1.56% : 0.000015s : 99: predicate.partial_eliminate 1.03% : 0.000010s : 72: predicate.print_const_string_wrapper 0.67% : 0.000006s : 42: predicate.reduce_all_const_elim 1.24% : 0.000012s : 72: predicate.reduce_eliminate 0.36% : 0.000003s : 42: predicate.remove_not_recompute_node 1.13% : 0.000011s : 120: predicate.replace_applicator 0.35% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.12% : 0.000011s : 72: predicate.reshape_eliminate 0.61% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.37% : 0.000004s : 21: predicate.row_tensor_eliminate 0.83% : 0.000008s : 42: predicate.same_eliminate 0.35% : 0.000003s : 46: predicate.set_cell_output_no_recompute 0.67% : 0.000006s : 42: predicate.shard_identity_eliminate 1.00% : 0.000010s : 63: predicate.special_op_eliminate 0.70% : 0.000007s : 42: predicate.specialize_transform 0.71% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.71% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.71% : 0.000026s : 192: predicate.stopgrad_eliminater 0.33% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.46% : 0.000014s : 93: predicate.switch_defer_inline 1.99% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.74% : 0.000045s : 253: predicate.switch_simplify 1.04% : 0.000010s : 72: predicate.tile_eliminate 1.02% : 0.000010s : 72: predicate.transpose_eliminate 1.77% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000016s : 114: predicate.tuple_list_get_item_const_eliminator 1.61% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.58% : 0.000024s : 162: predicate.tuple_list_get_item_eliminator 1.64% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.36% : 0.000022s : 156: predicate.tuple_list_set_item_eliminator 1.76% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.89% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.89% : 0.000037s : 234: predicate.updatestate_useless_node_eliminater 0.34% : 0.000003s : 21: predicate.value_based_eliminate 0.62% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.63% : 0.000006s : 42: predicate.virtual_output_eliminate 0.60% : 0.000006s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002757 41 65.06% : 0.001794s : 24: func_graph_cloner_run.FuncGraphClonerGraph 34.94% : 0.000963s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.205291 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000026s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000144s : 1: add_recomputation 0.03% : 0.000061s : 1: assign_add_opt 0.26% : 0.000537s : 1: auto_monad 0.05% : 0.000095s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.18% : 0.000372s : 1: bootstrap 0.01% : 0.000025s : 1: cconv 0.03% : 0.000057s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000022s : 1: convert_after_rewriter 0.03% : 0.000060s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.31% : 0.000628s : 1: eliminate_special_op_node 0.01% : 0.000020s : 1: environ_conv 0.01% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000012s : 1: graph_reusing 0.00% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000012s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.28% : 0.000567s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000031s : 1: opt.transform.loop_unroll_optimizer 2.44% : 0.005000s : 80: opt.transform.opt_a 0.07% : 0.000148s : 1: opt.transform.opt_after_cconv 0.23% : 0.000479s : 27: opt.transform.opt_b 0.08% : 0.000159s : 1: opt.transform.opt_trans_graph 0.04% : 0.000073s : 3: opt.transform.special_op_eliminate 0.06% : 0.000124s : 4: opt.transform.symbol_engine_opt 6.16% : 0.012643s : 1: opt_a 0.15% : 0.000309s : 1: opt_after_cconv 0.31% : 0.000643s : 1: opt_b 7.81% : 0.016029s : 1: optimize 0.01% : 0.000022s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000016s : 1: order_py_execute_after_rewriter 0.02% : 0.000031s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.02% : 0.000043s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.05% : 0.000095s : 1: pre_auto_parallel 0.03% : 0.000072s : 1: py_interpret_to_execute 0.01% : 0.000025s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000019s : 1: remove_cast_before_assign_add 0.03% : 0.000064s : 1: remove_dup_value 0.82% : 0.001675s : 1: renormalize.infer 0.47% : 0.000957s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000380s : 1: rewriter_after_opt_a 0.10% : 0.000201s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000022s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000181s : 1: symbol_engine_optimizer 58.70% : 0.120512s : 1: task_emit 0.09% : 0.000181s : 1: tuple_transform 20.54% : 0.042170s : 1: type_inference 0.08% : 0.000155s : 1: validate [WARNING] DISTRIBUTED(163844,ffffae145c10,python3.7):2025-02-07-13:54:20.139.496 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 1, 2, 3, 4, 5, 6, 7) [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163840,ffff92fd9c10,python3.7):2025-02-07-13:54:20.139.527 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 1, 2, 3, 4, 5, 6, 7) [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163843,ffffaa156c10,python3.7):2025-02-07-13:54:20.142.464 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 1, 2, 3, 4, 5, 6, 7) [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163839,ffff88d43c10,python3.7):2025-02-07-13:54:20.142.510 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 1, 2, 3, 4, 5, 6, 7) [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163845,ffff83f78c10,python3.7):2025-02-07-13:54:20.142.884 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 1, 2, 3, 4, 5, 6, 7) [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163841,ffff99d3ac10,python3.7):2025-02-07-13:54:20.142.884 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 1, 2, 3, 4, 5, 6, 7) [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] PARALLEL(163846,ffff97644c10,python3.7):2025-02-07-13:54:20.176.247 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. . TotalTime = 0.181783, [21] [bootstrap]: 0.00038285 [type_inference]: 0.0445182 [auto_monad]: 0.00054633 [graph_reusing]: 5.95e-06 [inline]: 1.82001e-06 [parallel-infer-symbol]: 2.23001e-06 [pre_auto_parallel]: 8.993e-05 [insert-virtual-dataset]: 3.3e-06 [parallel-infer-symbol-second]: 5.19998e-07 [dataset_repeat_opt]: 1.44e-06 [pipeline_split]: 1.74e-06 [optimize]: 0.0202571, [52] [py_interpret_to_execute]: 6.515e-05 [rewriter_before_opt_a]: 0.00020007 [opt_a]: 0.0166653, [2] [Cycle 1]: 0.00767269, [43] [expand_dump_flag]: 9.81e-06 [switch_simplify]: 0.00025462 [loop_unroll]: 8.106e-05 [a_1]: 0.00250049 [recompute_prepare]: 2.849e-05 [updatestate_depend_eliminate]: 0.00010521 [updatestate_assign_eliminate]: 2.406e-05 [updatestate_loads_eliminate]: 1.713e-05 [parameter_eliminate]: 4.36e-06 [a_2]: 0.00032521 [accelerated_algorithm]: 4.525e-05 [shard]: 2.45e-06 [meta_shard_fg_expand]: 1.072e-05 [shard_inline]: 2.148e-05 [auto_parallel]: 1.982e-05 [parallel]: 8.74e-06 [flash_sp]: 1.49e-05 [merge_comm]: 1.762e-05 [allreduce_fusion]: 1.382e-05 [matmul_add_comm_reduction]: 2.608e-05 [allreduce_slice_to_reducescatter]: 7.90009e-07 [virtual_shard_identity]: 2.3e-05 [virtual_dataset]: 2.08e-05 [get_grad_eliminate_]: 2.086e-05 [virtual_output]: 2.096e-05 [merge_forward]: 1.445e-05 [cell_reuse_recompute_pass]: 2.15e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.197e-05 [before_grad]: 3.809e-05 [inplace_validation]: 1.264e-05 [meta_fg_expand]: 1.809e-05 [inplace_validation_after_expand]: 1.64e-05 [flash_sp_send_recv_attached]: 3.27e-06 [receive_attached]: 4.63e-06 [after_resolve]: 3.027e-05 [a_after_grad]: 3.643e-05 [special_op_eliminate]: 2.223e-05 [renormalize]: 0.00294828 [add_forward_monad_depend]: 4.27999e-06 [auto_monad_grad]: 2.41e-06 [auto_monad_eliminator]: 6.117e-05 [cse]: 0.00036103 [a_3]: 0.00015553 [Cycle 2]: 0.00196635, [43] [expand_dump_flag]: 1.68999e-06 [switch_simplify]: 2.365e-05 [loop_unroll]: 2.097e-05 [a_1]: 0.00068412 [recompute_prepare]: 2.025e-05 [updatestate_depend_eliminate]: 1.631e-05 [updatestate_assign_eliminate]: 1.693e-05 [updatestate_loads_eliminate]: 1.475e-05 [parameter_eliminate]: 2.43e-06 [a_2]: 0.0003091 [accelerated_algorithm]: 2.444e-05 [shard]: 1.25001e-06 [meta_shard_fg_expand]: 6.99e-06 [shard_inline]: 2.168e-05 [auto_parallel]: 2.041e-05 [parallel]: 3.87001e-06 [flash_sp]: 3.86999e-06 [merge_comm]: 1.568e-05 [allreduce_fusion]: 1.359e-05 [matmul_add_comm_reduction]: 1.912e-05 [allreduce_slice_to_reducescatter]: 5.90007e-07 [virtual_shard_identity]: 2.195e-05 [virtual_dataset]: 2.082e-05 [get_grad_eliminate_]: 2.013e-05 [virtual_output]: 1.982e-05 [merge_forward]: 1.263e-05 [cell_reuse_recompute_pass]: 2.28999e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.062e-05 [before_grad]: 3.641e-05 [inplace_validation]: 1.167e-05 [meta_fg_expand]: 1.372e-05 [inplace_validation_after_expand]: 1.585e-05 [flash_sp_send_recv_attached]: 1.14999e-06 [receive_attached]: 7.10002e-07 [after_resolve]: 2.469e-05 [a_after_grad]: 3.535e-05 [special_op_eliminate]: 2.007e-05 [renormalize]: 6.99947e-08 [add_forward_monad_depend]: 1.50999e-06 [auto_monad_grad]: 1.29e-06 [auto_monad_eliminator]: 4.275e-05 [cse]: 5.806e-05 [a_3]: 0.00014027 [py_interpret_to_execute_after_opt_a]: 2.15e-05 [slice_cell_reuse_recomputed_activation]: 2.37999e-06 [rewriter_after_opt_a]: 0.00036848 [convert_after_rewriter]: 1.952e-05 [order_py_execute_after_rewriter]: 1.356e-05 [opt_b]: 0.00072607, [1] [Cycle 1]: 0.00071958, [7] [b_1]: 0.00055311 [b_2]: 2.399e-05 [updatestate_depend_eliminate]: 1.479e-05 [updatestate_assign_eliminate]: 1.642e-05 [updatestate_loads_eliminate]: 1.477e-05 [renormalize]: 4.00003e-07 [cse]: 5.882e-05 [optimize_parallel_all_gather_comm]: 1.979e-05 [overlap_param_gather]: 3.39e-06 [cconv]: 2.827e-05 [loop_unroll]: 0.00056013 [opt_after_cconv]: 0.00031199, [1] [Cycle 1]: 0.00030541, [7] [c_1]: 0.00015297 [parameter_eliminate]: 2.53e-06 [updatestate_depend_eliminate]: 1.79e-05 [updatestate_assign_eliminate]: 1.705e-05 [updatestate_loads_eliminate]: 1.565e-05 [cse]: 6.295e-05 [renormalize]: 4.70012e-07 [remove_dup_value]: 6.885e-05 [tuple_transform]: 0.00018045, [1] [Cycle 1]: 0.00017503, [2] [d_1]: 0.00016441 [renormalize]: 2.50002e-07 [partial_unused_args_eliminate]: 2.74999e-06 [add_cache_embedding]: 2.425e-05 [add_recomputation]: 0.00015091 [cse_after_recomputation]: 6.103e-05, [1] [Cycle 1]: 5.425e-05, [1] [cse]: 4.812e-05 [environ_conv]: 2.094e-05 [swap_dp_allreduce_reducescatter]: 1.911e-05 [bias_add_comm_swap]: 2.39001e-06 [label_micro_interleaved_index]: 1.91001e-06 [label_fine_grained_interleaved_index]: 2.37999e-06 [merge_cast_opt]: 1.18e-06 [slice_recompute_activation]: 1.91999e-06 [micro_interleaved_order_control]: 2.67e-06 [assign_add_opt]: 6.256e-05 [ForceFp32Comm]: 1.09001e-06 [remove_cast_before_assign_add]: 1.654e-05 [full_micro_interleaved_order_control]: 2.35e-06 [reorder_send_recv_between_fp_bp]: 2.15e-06 [comm_op_add_attrs]: 6.097e-05 [add_comm_op_reuse_tag]: 2.21e-06 [interleave_split_concat_branches]: 9.20001e-07 [interleave_parallel_branches]: 1.04999e-06 [overlap_opt_shard_in_pipeline]: 2.18001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.43e-06 [control_data_broadcast_order]: 1.13001e-06 [grouped_pairwise_exchange_alltoall]: 1.166e-05 [offloading_packed_experts]: 2.09999e-06 [overlap_recompute_and_grad_model_parallel]: 1.85e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.10003e-07 [overlap_recompute_allgather_and_fa_grad]: 4.806e-05 [overlap_grad_ring_attention]: 2.16e-06 [overlap_grad_flash_sp]: 2.904e-05 [begin_end_overlap_inline]: 8.29998e-07 [split_matmul_comm_elemetwise]: 2.16e-06 [split_layernorm_comm]: 2e-06 [handle_group_info]: 9.3e-06 [symbol_engine_optimizer]: 0.00018433, [1] [Cycle 1]: 0.00017881, [6] [build]: 1.897e-05 [elim_shapecalc]: 2.833e-05 [elim_not_effective]: 4.235e-05 [opt_reshape]: 2.237e-05 [fold_const_symbol]: 3.815e-05 [renormalize]: 3.69997e-07 [pipeline_parallel_scheduler]: 1.59e-06 [auto_monad_reorder]: 0.00010223 [get_jit_bprop_graph]: 8.29998e-07 [rewriter_after_jit_bprop_graph]: 5.79996e-07 [eliminate_special_op_node]: 0.00059771 [distribtued_split]: 1.51999e-06 [validate]: 7.933e-05 [task_emit]: 0.114827 [execute]: 1.101e-05 Sums bootstrap : 0.000383s : 0.22% type_inference : 0.044518s : 25.65% auto_monad : 0.000546s : 0.31% graph_reusing : 0.000006s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000090s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000065s : 0.04% optimize.rewriter_before_opt_a : 0.000200s : 0.12% optimize.opt_a.expand_dump_flag : 0.000011s : 0.01% optimize.opt_a.switch_simplify : 0.000278s : 0.16% optimize.opt_a.loop_unroll : 0.000102s : 0.06% optimize.opt_a.a_1 : 0.003185s : 1.84% optimize.opt_a.recompute_prepare : 0.000049s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000122s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000041s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000032s : 0.02% optimize.opt_a.parameter_eliminate : 0.000007s : 0.00% optimize.opt_a.a_2 : 0.000634s : 0.37% optimize.opt_a.accelerated_algorithm : 0.000070s : 0.04% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000018s : 0.01% optimize.opt_a.shard_inline : 0.000043s : 0.02% optimize.opt_a.auto_parallel : 0.000040s : 0.02% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000019s : 0.01% optimize.opt_a.merge_comm : 0.000033s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000045s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.02% optimize.opt_a.virtual_output : 0.000041s : 0.02% optimize.opt_a.merge_forward : 0.000027s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000083s : 0.05% optimize.opt_a.before_grad : 0.000074s : 0.04% optimize.opt_a.inplace_validation : 0.000024s : 0.01% optimize.opt_a.meta_fg_expand : 0.000032s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000032s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000055s : 0.03% optimize.opt_a.a_after_grad : 0.000072s : 0.04% optimize.opt_a.special_op_eliminate : 0.000042s : 0.02% optimize.opt_a.renormalize : 0.002948s : 1.70% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000104s : 0.06% optimize.opt_a.cse : 0.000419s : 0.24% optimize.opt_a.a_3 : 0.000296s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000022s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000368s : 0.21% optimize.convert_after_rewriter : 0.000020s : 0.01% optimize.order_py_execute_after_rewriter : 0.000014s : 0.01% optimize.opt_b.b_1 : 0.000553s : 0.32% optimize.opt_b.b_2 : 0.000024s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000015s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000059s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000020s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000028s : 0.02% optimize.loop_unroll : 0.000560s : 0.32% optimize.opt_after_cconv.c_1 : 0.000153s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000018s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.cse : 0.000063s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000069s : 0.04% optimize.tuple_transform.d_1 : 0.000164s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000024s : 0.01% optimize.add_recomputation : 0.000151s : 0.09% optimize.cse_after_recomputation.cse : 0.000048s : 0.03% optimize.environ_conv : 0.000021s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000019s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000063s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000017s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000061s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000012s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000048s : 0.03% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000029s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000009s : 0.01% optimize.symbol_engine_optimizer.build : 0.000019s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000042s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000038s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000102s : 0.06% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000598s : 0.34% distribtued_split : 0.000002s : 0.00% validate : 0.000079s : 0.05% task_emit : 0.114827s : 66.17% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000678 300 1.02% : 0.000007s : 2: substitution.depend_value_elim 0.99% : 0.000007s : 18: substitution.elim_not_effective 0.97% : 0.000007s : 18: substitution.fold_const_symbol 2.39% : 0.000016s : 21: substitution.graph_param_transform 61.95% : 0.000420s : 15: substitution.inline 2.26% : 0.000015s : 36: substitution.j_node_and_user_rematch 3.24% : 0.000022s : 2: substitution.less_batch_normalization 2.41% : 0.000016s : 30: substitution.load_eliminater 0.82% : 0.000006s : 6: substitution.reduce_all_const_elim 2.96% : 0.000020s : 36: substitution.remove_not_recompute_node 0.97% : 0.000007s : 6: substitution.replace_old_param 2.48% : 0.000017s : 4: substitution.switch_simplify 3.79% : 0.000026s : 6: substitution.tuple_list_get_item_eliminator 7.24% : 0.000049s : 44: substitution.updatestate_pure_node_eliminater 6.50% : 0.000044s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.044450 2 93.50% : 0.041561s : 1: type_inference.infer 6.50% : 0.002889s : 1: type_inference.specialize ------[replace.] 0.000259 25 46.64% : 0.000121s : 15: replace.inline 31.25% : 0.000081s : 4: replace.switch_simplify 22.11% : 0.000057s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000447 25 91.90% : 0.000411s : 15: match.inline 3.19% : 0.000014s : 4: match.switch_simplify 4.92% : 0.000022s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000969 6511 0.98% : 0.000009s : 72: predicate.accumulaten_eliminater 0.72% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.56% : 0.000005s : 42: predicate.addn_check_dump 0.96% : 0.000009s : 72: predicate.addn_zero_filter 0.98% : 0.000009s : 72: predicate.adjust_all_reduce_mul_add 2.23% : 0.000022s : 114: predicate.arithmetic_simplify 0.99% : 0.000010s : 72: predicate.cast_eliminate 0.61% : 0.000006s : 42: predicate.check_bprop_eliminate 0.58% : 0.000006s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.33% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.42% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.62% : 0.000006s : 42: predicate.depend_value_elim 1.06% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.19% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.07% : 0.000010s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.36% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.27% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.24% : 0.000012s : 93: predicate.environ_get_depend_swap 1.92% : 0.000019s : 135: predicate.environ_get_eliminate 1.23% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.32% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.69% : 0.000016s : 93: predicate.float_depend_g_call 0.57% : 0.000006s : 42: predicate.float_environ_get_switch 0.87% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.61% : 0.000006s : 42: predicate.get_grad_eliminate 0.21% : 0.000002s : 21: predicate.graph_param_transform 0.60% : 0.000006s : 42: predicate.incorporate_call 0.56% : 0.000005s : 42: predicate.incorporate_call_switch 5.35% : 0.000052s : 291: predicate.inline 0.79% : 0.000008s : 42: predicate.inline_without_move 0.30% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.74% : 0.000007s : 42: predicate.less_batch_normalization 1.80% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.83% : 0.000027s : 192: predicate.load_eliminater 0.80% : 0.000008s : 21: predicate.loop_unroll_after_grad 1.78% : 0.000017s : 110: predicate.loop_unroll_before_grad 1.64% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.57% : 0.000006s : 42: predicate.merge_addn 0.60% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.60% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.97% : 0.000009s : 72: predicate.minmaximum_grad 0.40% : 0.000004s : 21: predicate.mutable_eliminate 0.32% : 0.000003s : 21: predicate.opt_reshape 0.34% : 0.000003s : 21: predicate.parallel_virtual_node 1.94% : 0.000019s : 93: predicate.partial_defer_inline 1.55% : 0.000015s : 99: predicate.partial_eliminate 1.00% : 0.000010s : 72: predicate.print_const_string_wrapper 0.65% : 0.000006s : 42: predicate.reduce_all_const_elim 1.25% : 0.000012s : 72: predicate.reduce_eliminate 0.34% : 0.000003s : 42: predicate.remove_not_recompute_node 1.13% : 0.000011s : 120: predicate.replace_applicator 0.34% : 0.000003s : 42: predicate.replace_old_param 0.18% : 0.000002s : 21: predicate.reset_defer_inline 0.99% : 0.000010s : 72: predicate.reshape_eliminate 0.61% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.37% : 0.000004s : 21: predicate.row_tensor_eliminate 0.81% : 0.000008s : 42: predicate.same_eliminate 0.36% : 0.000003s : 46: predicate.set_cell_output_no_recompute 0.66% : 0.000006s : 42: predicate.shard_identity_eliminate 1.05% : 0.000010s : 63: predicate.special_op_eliminate 0.70% : 0.000007s : 42: predicate.specialize_transform 0.72% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.69% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.65% : 0.000026s : 192: predicate.stopgrad_eliminater 0.33% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.41% : 0.000014s : 93: predicate.switch_defer_inline 1.99% : 0.000019s : 135: predicate.switch_layer_defer_inline 5.65% : 0.000055s : 253: predicate.switch_simplify 1.00% : 0.000010s : 72: predicate.tile_eliminate 0.96% : 0.000009s : 72: predicate.transpose_eliminate 1.73% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.64% : 0.000016s : 114: predicate.tuple_list_get_item_depend_reorder 2.62% : 0.000025s : 162: predicate.tuple_list_get_item_eliminator 1.64% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.49% : 0.000024s : 156: predicate.tuple_list_set_item_eliminator 1.72% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.83% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.68% : 0.000036s : 234: predicate.updatestate_useless_node_eliminater 0.36% : 0.000003s : 21: predicate.value_based_eliminate 0.61% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.62% : 0.000006s : 42: predicate.virtual_output_eliminate 0.60% : 0.000006s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002974 41 65.84% : 0.001958s : 24: func_graph_cloner_run.FuncGraphClonerGraph 34.16% : 0.001016s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.211069 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000028s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000157s : 1: add_recomputation 0.03% : 0.000067s : 1: assign_add_opt 0.27% : 0.000568s : 1: auto_monad 0.05% : 0.000110s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.20% : 0.000414s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000066s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000024s : 1: convert_after_rewriter 0.03% : 0.000065s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.29% : 0.000612s : 1: eliminate_special_op_node 0.01% : 0.000025s : 1: environ_conv 0.01% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000013s : 1: graph_reusing 0.01% : 0.000015s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000013s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.27% : 0.000571s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000033s : 1: opt.transform.loop_unroll_optimizer 2.40% : 0.005073s : 80: opt.transform.opt_a 0.07% : 0.000151s : 1: opt.transform.opt_after_cconv 0.26% : 0.000556s : 27: opt.transform.opt_b 0.08% : 0.000163s : 1: opt.transform.opt_trans_graph 0.04% : 0.000077s : 3: opt.transform.special_op_eliminate 0.06% : 0.000126s : 4: opt.transform.symbol_engine_opt 7.90% : 0.016670s : 1: opt_a 0.15% : 0.000317s : 1: opt_after_cconv 0.35% : 0.000730s : 1: opt_b 9.60% : 0.020267s : 1: optimize 0.01% : 0.000024s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.02% : 0.000033s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.02% : 0.000052s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000102s : 1: pre_auto_parallel 0.03% : 0.000072s : 1: py_interpret_to_execute 0.01% : 0.000026s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000020s : 1: remove_cast_before_assign_add 0.04% : 0.000075s : 1: remove_dup_value 0.88% : 0.001851s : 1: renormalize.infer 0.52% : 0.001087s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000376s : 1: rewriter_after_opt_a 0.10% : 0.000208s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000023s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000187s : 1: symbol_engine_optimizer 54.42% : 0.114859s : 1: task_emit 0.09% : 0.000184s : 1: tuple_transform 21.10% : 0.044541s : 1: type_inference 0.08% : 0.000179s : 1: validate [WARNING] DISTRIBUTED(163846,ffff97644c10,python3.7):2025-02-07-13:54:20.320.908 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 1, 2, 3, 4, 5, 6, 7) [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163842,ffff8ac54c10,python3.7):2025-02-07-13:54:20.320.902 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: (0, 1, 2, 3, 4, 5, 6, 7) [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:54:20.321.750 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:54:20.321.754 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:54:20.321.754 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:54:20.321.758 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:54:20.321.760 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:54:20.321.757 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:54:20.321.766 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:54:20.321.763 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DEVICE(163840,fffe413ba0f0,python3.7):2025-02-07-13:54:20.321.885 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163842,fffe3943a0f0,python3.7):2025-02-07-13:54:20.321.904 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163846,fffe5cff90f0,python3.7):2025-02-07-13:54:20.321.904 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163843,fffe5cb390f0,python3.7):2025-02-07-13:54:20.321.921 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163845,fffe497fa0f0,python3.7):2025-02-07-13:54:20.321.927 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163841,fffe4bfff0f0,python3.7):2025-02-07-13:54:20.321.924 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffe60b390f0,python3.7):2025-02-07-13:54:20.321.938 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163839,fffe497fa0f0,python3.7):2025-02-07-13:54:20.321.949 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7), hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(163844,fffe60b390f0,python3.7):2025-02-07-13:54:20.391.453 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163844,fffeab7fe0f0,python3.7):2025-02-07-13:54:20.391.506 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DEVICE(163842,fffe3943a0f0,python3.7):2025-02-07-13:54:20.391.634 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163842,fffe7ffff0f0,python3.7):2025-02-07-13:54:20.391.691 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DEVICE(163839,fffe497fa0f0,python3.7):2025-02-07-13:54:20.391.832 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163839,fffe8e7fc0f0,python3.7):2025-02-07-13:54:20.391.901 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DEVICE(163840,fffe413ba0f0,python3.7):2025-02-07-13:54:20.395.335 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163840,fffe9cff90f0,python3.7):2025-02-07-13:54:20.395.397 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DEVICE(163841,fffe4bfff0f0,python3.7):2025-02-07-13:54:20.395.482 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163841,fffe96ffd0f0,python3.7):2025-02-07-13:54:20.395.541 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DEVICE(163843,fffe5cb390f0,python3.7):2025-02-07-13:54:20.395.640 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DEVICE(163845,fffe497fa0f0,python3.7):2025-02-07-13:54:20.395.629 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163845,fffe89ffb0f0,python3.7):2025-02-07-13:54:20.395.684 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163843,fffea77fe0f0,python3.7):2025-02-07-13:54:20.395.708 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] PARALLEL(163844,ffffae145c10,python3.7):2025-02-07-13:54:20.443.807 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163839,ffff88d43c10,python3.7):2025-02-07-13:54:20.445.494 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163842,ffff8ac54c10,python3.7):2025-02-07-13:54:20.446.243 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163840,ffff92fd9c10,python3.7):2025-02-07-13:54:20.449.490 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163843,ffffaa156c10,python3.7):2025-02-07-13:54:20.449.588 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163841,ffff99d3ac10,python3.7):2025-02-07-13:54:20.451.491 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(163845,ffff83f78c10,python3.7):2025-02-07-13:54:20.454.394 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.162853, [21] [bootstrap]: 0.00029787 [type_inference]: 0.033377 [auto_monad]: 0.00056365 [graph_reusing]: 5.2e-06 [inline]: 1.67999e-06 [parallel-infer-symbol]: 2.16e-06 [pre_auto_parallel]: 8.421e-05 [insert-virtual-dataset]: 3.08e-06 [parallel-infer-symbol-second]: 5.69999e-07 [dataset_repeat_opt]: 1.15001e-06 [pipeline_split]: 1.62999e-06 [optimize]: 0.0153426, [52] [py_interpret_to_execute]: 6.882e-05 [rewriter_before_opt_a]: 0.00019523 [opt_a]: 0.0119648, [2] [Cycle 1]: 0.00684578, [43] [expand_dump_flag]: 6.48999e-06 [switch_simplify]: 0.00021379 [loop_unroll]: 9.751e-05 [a_1]: 0.00239225 [recompute_prepare]: 2.737e-05 [updatestate_depend_eliminate]: 0.00010299 [updatestate_assign_eliminate]: 2.011e-05 [updatestate_loads_eliminate]: 1.652e-05 [parameter_eliminate]: 3.84e-06 [a_2]: 0.00032607 [accelerated_algorithm]: 4.417e-05 [shard]: 2.02001e-06 [meta_shard_fg_expand]: 8.89e-06 [shard_inline]: 2.179e-05 [auto_parallel]: 1.951e-05 [parallel]: 8.16e-06 [flash_sp]: 1.417e-05 [merge_comm]: 1.638e-05 [allreduce_fusion]: 1.372e-05 [matmul_add_comm_reduction]: 2.233e-05 [allreduce_slice_to_reducescatter]: 4.69998e-07 [virtual_shard_identity]: 2.184e-05 [virtual_dataset]: 2.096e-05 [get_grad_eliminate_]: 2.112e-05 [virtual_output]: 2.024e-05 [merge_forward]: 1.423e-05 [cell_reuse_recompute_pass]: 2.09e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.14e-05 [before_grad]: 3.859e-05 [inplace_validation]: 1.232e-05 [meta_fg_expand]: 1.644e-05 [inplace_validation_after_expand]: 1.475e-05 [flash_sp_send_recv_attached]: 2.70001e-06 [receive_attached]: 4.90001e-06 [after_resolve]: 2.635e-05 [a_after_grad]: 3.47e-05 [special_op_eliminate]: 2.156e-05 [renormalize]: 0.00237995 [add_forward_monad_depend]: 4.22e-06 [auto_monad_grad]: 2.39999e-06 [auto_monad_eliminator]: 6.053e-05 [cse]: 0.00028855 [a_3]: 0.00015511 [Cycle 2]: 0.00196985, [43] [expand_dump_flag]: 1.55999e-06 [switch_simplify]: 2.361e-05 [loop_unroll]: 2.133e-05 [a_1]: 0.00068844 [recompute_prepare]: 2.047e-05 [updatestate_depend_eliminate]: 1.631e-05 [updatestate_assign_eliminate]: 1.648e-05 [updatestate_loads_eliminate]: 1.506e-05 [parameter_eliminate]: 2.18001e-06 [a_2]: 0.00030428 [accelerated_algorithm]: 2.402e-05 [shard]: 1.17e-06 [meta_shard_fg_expand]: 6.65001e-06 [shard_inline]: 2.158e-05 [auto_parallel]: 1.946e-05 [parallel]: 3.98999e-06 [flash_sp]: 3.78001e-06 [merge_comm]: 1.58e-05 [allreduce_fusion]: 1.303e-05 [matmul_add_comm_reduction]: 1.964e-05 [allreduce_slice_to_reducescatter]: 3.00002e-07 [virtual_shard_identity]: 2.175e-05 [virtual_dataset]: 2.106e-05 [get_grad_eliminate_]: 2.017e-05 [virtual_output]: 1.986e-05 [merge_forward]: 1.29e-05 [cell_reuse_recompute_pass]: 2.06e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.093e-05 [before_grad]: 3.69e-05 [inplace_validation]: 1.16e-05 [meta_fg_expand]: 1.327e-05 [inplace_validation_after_expand]: 1.549e-05 [flash_sp_send_recv_attached]: 1.11001e-06 [receive_attached]: 7.50006e-07 [after_resolve]: 2.492e-05 [a_after_grad]: 3.464e-05 [special_op_eliminate]: 2.014e-05 [renormalize]: 7.00093e-08 [add_forward_monad_depend]: 1.73e-06 [auto_monad_grad]: 1.3e-06 [auto_monad_eliminator]: 4.295e-05 [cse]: 5.623e-05 [a_3]: 0.00014028 [py_interpret_to_execute_after_opt_a]: 1.944e-05 [slice_cell_reuse_recomputed_activation]: 2.37999e-06 [rewriter_after_opt_a]: 0.00037242 [convert_after_rewriter]: 1.813e-05 [order_py_execute_after_rewriter]: 1.319e-05 [opt_b]: 0.00063807, [1] [Cycle 1]: 0.00063213, [7] [b_1]: 0.00047388 [b_2]: 2.361e-05 [updatestate_depend_eliminate]: 1.419e-05 [updatestate_assign_eliminate]: 1.605e-05 [updatestate_loads_eliminate]: 1.469e-05 [renormalize]: 4.19997e-07 [cse]: 5.586e-05 [optimize_parallel_all_gather_comm]: 1.932e-05 [overlap_param_gather]: 1.09999e-06 [cconv]: 2.732e-05 [loop_unroll]: 0.00051644 [opt_after_cconv]: 0.000302, [1] [Cycle 1]: 0.00029528, [7] [c_1]: 0.0001511 [parameter_eliminate]: 2.38001e-06 [updatestate_depend_eliminate]: 1.717e-05 [updatestate_assign_eliminate]: 1.63e-05 [updatestate_loads_eliminate]: 1.536e-05 [cse]: 5.873e-05 [renormalize]: 4.40006e-07 [remove_dup_value]: 6.506e-05 [tuple_transform]: 0.00017918, [1] [Cycle 1]: 0.00017342, [2] [d_1]: 0.00016332 [renormalize]: 2.89991e-07 [partial_unused_args_eliminate]: 2.61e-06 [add_cache_embedding]: 2.379e-05 [add_recomputation]: 0.00014061 [cse_after_recomputation]: 5.899e-05, [1] [Cycle 1]: 5.443e-05, [1] [cse]: 4.893e-05 [environ_conv]: 1.83e-05 [swap_dp_allreduce_reducescatter]: 2.021e-05 [bias_add_comm_swap]: 2.24001e-06 [label_micro_interleaved_index]: 2.04e-06 [label_fine_grained_interleaved_index]: 2.21e-06 [merge_cast_opt]: 1.02e-06 [slice_recompute_activation]: 1.72001e-06 [micro_interleaved_order_control]: 1.76999e-06 [assign_add_opt]: 5.931e-05 [ForceFp32Comm]: 8.30012e-07 [remove_cast_before_assign_add]: 1.602e-05 [full_micro_interleaved_order_control]: 2.11e-06 [reorder_send_recv_between_fp_bp]: 2.13001e-06 [comm_op_add_attrs]: 5.027e-05 [add_comm_op_reuse_tag]: 2.47e-06 [interleave_split_concat_branches]: 8.29998e-07 [interleave_parallel_branches]: 8.39995e-07 [overlap_opt_shard_in_pipeline]: 9.29998e-07 [overlap_opt_shard_grad_in_pipeline]: 2.35e-06 [control_data_broadcast_order]: 1.09e-06 [grouped_pairwise_exchange_alltoall]: 9.51001e-06 [offloading_packed_experts]: 2.11001e-06 [overlap_recompute_and_grad_model_parallel]: 2.22999e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89994e-07 [overlap_recompute_allgather_and_fa_grad]: 4.334e-05 [overlap_grad_ring_attention]: 2.14e-06 [overlap_grad_flash_sp]: 3.274e-05 [begin_end_overlap_inline]: 7.89994e-07 [split_matmul_comm_elemetwise]: 1.96999e-06 [split_layernorm_comm]: 1.66e-06 [handle_group_info]: 6.85999e-06 [symbol_engine_optimizer]: 0.00017925, [1] [Cycle 1]: 0.00017415, [6] [build]: 1.781e-05 [elim_shapecalc]: 2.835e-05 [elim_not_effective]: 4.239e-05 [opt_reshape]: 2.195e-05 [fold_const_symbol]: 3.659e-05 [renormalize]: 4.69998e-07 [pipeline_parallel_scheduler]: 1.55999e-06 [auto_monad_reorder]: 0.0001012 [get_jit_bprop_graph]: 4.89992e-07 [rewriter_after_jit_bprop_graph]: 4.19997e-07 [eliminate_special_op_node]: 0.00059559 [distribtued_split]: 1.46001e-06 [validate]: 7.426e-05 [task_emit]: 0.112101 [execute]: 1.16e-05 Sums bootstrap : 0.000298s : 0.19% type_inference : 0.033377s : 21.05% auto_monad : 0.000564s : 0.36% graph_reusing : 0.000005s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000084s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000069s : 0.04% optimize.rewriter_before_opt_a : 0.000195s : 0.12% optimize.opt_a.expand_dump_flag : 0.000008s : 0.01% optimize.opt_a.switch_simplify : 0.000237s : 0.15% optimize.opt_a.loop_unroll : 0.000119s : 0.07% optimize.opt_a.a_1 : 0.003081s : 1.94% optimize.opt_a.recompute_prepare : 0.000048s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000119s : 0.08% optimize.opt_a.updatestate_assign_eliminate : 0.000037s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000032s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000630s : 0.40% optimize.opt_a.accelerated_algorithm : 0.000068s : 0.04% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000043s : 0.03% optimize.opt_a.auto_parallel : 0.000039s : 0.02% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000018s : 0.01% optimize.opt_a.merge_comm : 0.000032s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000042s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000044s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.03% optimize.opt_a.virtual_output : 0.000040s : 0.03% optimize.opt_a.merge_forward : 0.000027s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000082s : 0.05% optimize.opt_a.before_grad : 0.000075s : 0.05% optimize.opt_a.inplace_validation : 0.000024s : 0.02% optimize.opt_a.meta_fg_expand : 0.000030s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000030s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000051s : 0.03% optimize.opt_a.a_after_grad : 0.000069s : 0.04% optimize.opt_a.special_op_eliminate : 0.000042s : 0.03% optimize.opt_a.renormalize : 0.002380s : 1.50% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000103s : 0.07% optimize.opt_a.cse : 0.000345s : 0.22% optimize.opt_a.a_3 : 0.000295s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000019s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000372s : 0.23% optimize.convert_after_rewriter : 0.000018s : 0.01% optimize.order_py_execute_after_rewriter : 0.000013s : 0.01% optimize.opt_b.b_1 : 0.000474s : 0.30% optimize.opt_b.b_2 : 0.000024s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000056s : 0.04% optimize.optimize_parallel_all_gather_comm : 0.000019s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000027s : 0.02% optimize.loop_unroll : 0.000516s : 0.33% optimize.opt_after_cconv.c_1 : 0.000151s : 0.10% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_after_cconv.cse : 0.000059s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000065s : 0.04% optimize.tuple_transform.d_1 : 0.000163s : 0.10% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000024s : 0.02% optimize.add_recomputation : 0.000141s : 0.09% optimize.cse_after_recomputation.cse : 0.000049s : 0.03% optimize.environ_conv : 0.000018s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000020s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000059s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000016s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000050s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000043s : 0.03% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000033s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000042s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000101s : 0.06% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000596s : 0.38% distribtued_split : 0.000001s : 0.00% validate : 0.000074s : 0.05% task_emit : 0.112101s : 70.69% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000617 300 1.05% : 0.000006s : 2: substitution.depend_value_elim 1.08% : 0.000007s : 18: substitution.elim_not_effective 0.98% : 0.000006s : 18: substitution.fold_const_symbol 2.61% : 0.000016s : 21: substitution.graph_param_transform 59.55% : 0.000367s : 15: substitution.inline 2.52% : 0.000016s : 36: substitution.j_node_and_user_rematch 3.44% : 0.000021s : 2: substitution.less_batch_normalization 2.66% : 0.000016s : 30: substitution.load_eliminater 0.88% : 0.000005s : 6: substitution.reduce_all_const_elim 3.28% : 0.000020s : 36: substitution.remove_not_recompute_node 0.95% : 0.000006s : 6: substitution.replace_old_param 2.39% : 0.000015s : 4: substitution.switch_simplify 3.80% : 0.000023s : 6: substitution.tuple_list_get_item_eliminator 8.00% : 0.000049s : 44: substitution.updatestate_pure_node_eliminater 6.82% : 0.000042s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.033312 2 91.37% : 0.030437s : 1: type_inference.infer 8.63% : 0.002875s : 1: type_inference.specialize ------[replace.] 0.000211 25 51.96% : 0.000110s : 15: replace.inline 31.49% : 0.000066s : 4: replace.switch_simplify 16.55% : 0.000035s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000390 25 91.71% : 0.000358s : 15: match.inline 3.14% : 0.000012s : 4: match.switch_simplify 5.15% : 0.000020s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000962 6511 1.06% : 0.000010s : 72: predicate.accumulaten_eliminater 0.73% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000006s : 42: predicate.addn_check_dump 1.19% : 0.000011s : 72: predicate.addn_zero_filter 1.06% : 0.000010s : 72: predicate.adjust_all_reduce_mul_add 2.29% : 0.000022s : 114: predicate.arithmetic_simplify 1.08% : 0.000010s : 72: predicate.cast_eliminate 0.62% : 0.000006s : 42: predicate.check_bprop_eliminate 0.59% : 0.000006s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.31% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.41% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.60% : 0.000006s : 42: predicate.depend_value_elim 1.06% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.13% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.09% : 0.000010s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.38% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.28% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.32% : 0.000013s : 93: predicate.environ_get_add_eliminate 1.27% : 0.000012s : 93: predicate.environ_get_depend_swap 1.93% : 0.000019s : 135: predicate.environ_get_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.36% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.62% : 0.000016s : 93: predicate.float_depend_g_call 0.59% : 0.000006s : 42: predicate.float_environ_get_switch 0.88% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.62% : 0.000006s : 42: predicate.get_grad_eliminate 0.21% : 0.000002s : 21: predicate.graph_param_transform 0.59% : 0.000006s : 42: predicate.incorporate_call 0.57% : 0.000006s : 42: predicate.incorporate_call_switch 5.41% : 0.000052s : 291: predicate.inline 0.81% : 0.000008s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.76% : 0.000007s : 42: predicate.less_batch_normalization 1.96% : 0.000019s : 120: predicate.list_to_tuple_eliminator_ 2.82% : 0.000027s : 192: predicate.load_eliminater 0.71% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.81% : 0.000017s : 110: predicate.loop_unroll_before_grad 1.68% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.61% : 0.000006s : 42: predicate.merge_addn 0.57% : 0.000005s : 42: predicate.micro_step_allgather_replace 0.58% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.98% : 0.000009s : 72: predicate.minmaximum_grad 0.40% : 0.000004s : 21: predicate.mutable_eliminate 0.32% : 0.000003s : 21: predicate.opt_reshape 0.38% : 0.000004s : 21: predicate.parallel_virtual_node 1.85% : 0.000018s : 93: predicate.partial_defer_inline 1.55% : 0.000015s : 99: predicate.partial_eliminate 1.02% : 0.000010s : 72: predicate.print_const_string_wrapper 0.66% : 0.000006s : 42: predicate.reduce_all_const_elim 1.22% : 0.000012s : 72: predicate.reduce_eliminate 0.35% : 0.000003s : 42: predicate.remove_not_recompute_node 1.13% : 0.000011s : 120: predicate.replace_applicator 0.33% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.12% : 0.000011s : 72: predicate.reshape_eliminate 0.62% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.36% : 0.000003s : 21: predicate.row_tensor_eliminate 0.80% : 0.000008s : 42: predicate.same_eliminate 0.37% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.65% : 0.000006s : 42: predicate.shard_identity_eliminate 1.01% : 0.000010s : 63: predicate.special_op_eliminate 0.69% : 0.000007s : 42: predicate.specialize_transform 0.74% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.69% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.73% : 0.000026s : 192: predicate.stopgrad_eliminater 0.32% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.43% : 0.000014s : 93: predicate.switch_defer_inline 1.99% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.69% : 0.000045s : 253: predicate.switch_simplify 1.06% : 0.000010s : 72: predicate.tile_eliminate 0.98% : 0.000009s : 72: predicate.transpose_eliminate 1.80% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.77% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.61% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.67% : 0.000026s : 162: predicate.tuple_list_get_item_eliminator 1.65% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.32% : 0.000022s : 156: predicate.tuple_list_set_item_eliminator 1.87% : 0.000018s : 120: predicate.tuple_to_list_eliminator_ 2.84% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.68% : 0.000035s : 234: predicate.updatestate_useless_node_eliminater 0.36% : 0.000003s : 21: predicate.value_based_eliminate 0.62% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.62% : 0.000006s : 42: predicate.virtual_output_eliminate 0.35% : 0.000003s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002822 41 59.82% : 0.001688s : 24: func_graph_cloner_run.FuncGraphClonerGraph 40.18% : 0.001134s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.186439 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000027s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000146s : 1: add_recomputation 0.03% : 0.000063s : 1: assign_add_opt 0.31% : 0.000585s : 1: auto_monad 0.06% : 0.000109s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.18% : 0.000327s : 1: bootstrap 0.02% : 0.000031s : 1: cconv 0.03% : 0.000055s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000022s : 1: convert_after_rewriter 0.03% : 0.000062s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.33% : 0.000610s : 1: eliminate_special_op_node 0.01% : 0.000022s : 1: environ_conv 0.01% : 0.000019s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000012s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000010s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.28% : 0.000526s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000033s : 1: opt.transform.loop_unroll_optimizer 2.64% : 0.004931s : 80: opt.transform.opt_a 0.08% : 0.000149s : 1: opt.transform.opt_after_cconv 0.26% : 0.000477s : 27: opt.transform.opt_b 0.09% : 0.000161s : 1: opt.transform.opt_trans_graph 0.04% : 0.000078s : 3: opt.transform.special_op_eliminate 0.07% : 0.000125s : 4: opt.transform.symbol_engine_opt 6.42% : 0.011969s : 1: opt_a 0.16% : 0.000306s : 1: opt_after_cconv 0.34% : 0.000641s : 1: opt_b 8.23% : 0.015351s : 1: optimize 0.01% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.02% : 0.000036s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.03% : 0.000047s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000093s : 1: pre_auto_parallel 0.04% : 0.000075s : 1: py_interpret_to_execute 0.01% : 0.000024s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000019s : 1: remove_cast_before_assign_add 0.04% : 0.000071s : 1: remove_dup_value 0.65% : 0.001215s : 1: renormalize.infer 0.62% : 0.001156s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.20% : 0.000379s : 1: rewriter_after_opt_a 0.11% : 0.000201s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000024s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000182s : 1: symbol_engine_optimizer 60.15% : 0.112135s : 1: task_emit 0.10% : 0.000182s : 1: tuple_transform 17.91% : 0.033399s : 1: type_inference 0.07% : 0.000123s : 1: validate TotalTime = 0.165069, [21] [bootstrap]: 0.00027789 [type_inference]: 0.0322576 [auto_monad]: 0.00049912 [graph_reusing]: 4.73e-06 [inline]: 1.40999e-06 [parallel-infer-symbol]: 1.94e-06 [pre_auto_parallel]: 8.158e-05 [insert-virtual-dataset]: 2.79e-06 [parallel-infer-symbol-second]: 5.29995e-07 [dataset_repeat_opt]: 1.28e-06 [pipeline_split]: 1.12e-06 [optimize]: 0.0152723, [52] [py_interpret_to_execute]: 6.536e-05 [rewriter_before_opt_a]: 0.0001912 [opt_a]: 0.0119481, [2] [Cycle 1]: 0.00685539, [43] [expand_dump_flag]: 5.7e-06 [switch_simplify]: 0.00020218 [loop_unroll]: 7.969e-05 [a_1]: 0.00242372 [recompute_prepare]: 2.659e-05 [updatestate_depend_eliminate]: 9.907e-05 [updatestate_assign_eliminate]: 1.897e-05 [updatestate_loads_eliminate]: 1.533e-05 [parameter_eliminate]: 2.51e-06 [a_2]: 0.00032135 [accelerated_algorithm]: 4.226e-05 [shard]: 1.73e-06 [meta_shard_fg_expand]: 8.54999e-06 [shard_inline]: 2.17e-05 [auto_parallel]: 1.821e-05 [parallel]: 5.40999e-06 [flash_sp]: 1.183e-05 [merge_comm]: 1.596e-05 [allreduce_fusion]: 1.359e-05 [matmul_add_comm_reduction]: 2.053e-05 [allreduce_slice_to_reducescatter]: 4.00003e-07 [virtual_shard_identity]: 2.234e-05 [virtual_dataset]: 2.112e-05 [get_grad_eliminate_]: 2.144e-05 [virtual_output]: 2.055e-05 [merge_forward]: 1.355e-05 [cell_reuse_recompute_pass]: 1.59e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.187e-05 [before_grad]: 3.811e-05 [inplace_validation]: 1.206e-05 [meta_fg_expand]: 1.688e-05 [inplace_validation_after_expand]: 1.495e-05 [flash_sp_send_recv_attached]: 2.11e-06 [receive_attached]: 3.59e-06 [after_resolve]: 2.625e-05 [a_after_grad]: 3.562e-05 [special_op_eliminate]: 2.175e-05 [renormalize]: 0.00243619 [add_forward_monad_depend]: 2.99999e-06 [auto_monad_grad]: 1.83001e-06 [auto_monad_eliminator]: 5.235e-05 [cse]: 0.00028042 [a_3]: 0.00015384 [Cycle 2]: 0.00193903, [43] [expand_dump_flag]: 1.27e-06 [switch_simplify]: 2.328e-05 [loop_unroll]: 2.127e-05 [a_1]: 0.00068598 [recompute_prepare]: 2.026e-05 [updatestate_depend_eliminate]: 1.533e-05 [updatestate_assign_eliminate]: 1.626e-05 [updatestate_loads_eliminate]: 1.463e-05 [parameter_eliminate]: 1.78e-06 [a_2]: 0.0003039 [accelerated_algorithm]: 2.446e-05 [shard]: 9.00007e-07 [meta_shard_fg_expand]: 6.69999e-06 [shard_inline]: 2.169e-05 [auto_parallel]: 1.876e-05 [parallel]: 3.34e-06 [flash_sp]: 2.76e-06 [merge_comm]: 1.636e-05 [allreduce_fusion]: 1.316e-05 [matmul_add_comm_reduction]: 1.896e-05 [allreduce_slice_to_reducescatter]: 2.59999e-07 [virtual_shard_identity]: 2.158e-05 [virtual_dataset]: 2.058e-05 [get_grad_eliminate_]: 1.993e-05 [virtual_output]: 1.952e-05 [merge_forward]: 1.164e-05 [cell_reuse_recompute_pass]: 1.97999e-06 [cell_reuse_handle_not_recompute_node_pass]: 3.952e-05 [before_grad]: 3.641e-05 [inplace_validation]: 1.114e-05 [meta_fg_expand]: 1.357e-05 [inplace_validation_after_expand]: 1.515e-05 [flash_sp_send_recv_attached]: 9.29998e-07 [receive_attached]: 6.90008e-07 [after_resolve]: 2.459e-05 [a_after_grad]: 3.508e-05 [special_op_eliminate]: 1.983e-05 [renormalize]: 7.99919e-08 [add_forward_monad_depend]: 1.21001e-06 [auto_monad_grad]: 1.18e-06 [auto_monad_eliminator]: 3.991e-05 [cse]: 5.407e-05 [a_3]: 0.00013967 [py_interpret_to_execute_after_opt_a]: 2.03e-05 [slice_cell_reuse_recomputed_activation]: 2.37999e-06 [rewriter_after_opt_a]: 0.00036243 [convert_after_rewriter]: 1.708e-05 [order_py_execute_after_rewriter]: 1.24e-05 [opt_b]: 0.00063133, [1] [Cycle 1]: 0.00062538, [7] [b_1]: 0.00047114 [b_2]: 2.346e-05 [updatestate_depend_eliminate]: 1.365e-05 [updatestate_assign_eliminate]: 1.552e-05 [updatestate_loads_eliminate]: 1.478e-05 [renormalize]: 4.1e-07 [cse]: 5.324e-05 [optimize_parallel_all_gather_comm]: 3.612e-05 [overlap_param_gather]: 9.79999e-07 [cconv]: 2.173e-05 [loop_unroll]: 0.00052223 [opt_after_cconv]: 0.00029957, [1] [Cycle 1]: 0.00029267, [7] [c_1]: 0.00015256 [parameter_eliminate]: 1.85e-06 [updatestate_depend_eliminate]: 1.585e-05 [updatestate_assign_eliminate]: 1.585e-05 [updatestate_loads_eliminate]: 1.477e-05 [cse]: 5.793e-05 [renormalize]: 3.69997e-07 [remove_dup_value]: 5.87e-05 [tuple_transform]: 0.00017932, [1] [Cycle 1]: 0.00017377, [2] [d_1]: 0.00016392 [renormalize]: 2.29993e-07 [partial_unused_args_eliminate]: 1.81e-06 [add_cache_embedding]: 2.181e-05 [add_recomputation]: 0.00013351 [cse_after_recomputation]: 5.654e-05, [1] [Cycle 1]: 5.181e-05, [1] [cse]: 4.584e-05 [environ_conv]: 1.776e-05 [swap_dp_allreduce_reducescatter]: 1.772e-05 [bias_add_comm_swap]: 1.62001e-06 [label_micro_interleaved_index]: 1.4e-06 [label_fine_grained_interleaved_index]: 1.39e-06 [merge_cast_opt]: 8.10003e-07 [slice_recompute_activation]: 1.16999e-06 [micro_interleaved_order_control]: 1.19e-06 [assign_add_opt]: 6.009e-05 [ForceFp32Comm]: 7.60003e-07 [remove_cast_before_assign_add]: 1.543e-05 [full_micro_interleaved_order_control]: 1.69e-06 [reorder_send_recv_between_fp_bp]: 1.20001e-06 [comm_op_add_attrs]: 4.484e-05 [add_comm_op_reuse_tag]: 1.57999e-06 [interleave_split_concat_branches]: 5.70013e-07 [interleave_parallel_branches]: 5.50004e-07 [overlap_opt_shard_in_pipeline]: 7.29997e-07 [overlap_opt_shard_grad_in_pipeline]: 1.39e-06 [control_data_broadcast_order]: 6.90008e-07 [grouped_pairwise_exchange_alltoall]: 7.52001e-06 [offloading_packed_experts]: 1.52001e-06 [overlap_recompute_and_grad_model_parallel]: 1.31999e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.79998e-07 [overlap_recompute_allgather_and_fa_grad]: 4.503e-05 [overlap_grad_ring_attention]: 1.49e-06 [overlap_grad_flash_sp]: 3.205e-05 [begin_end_overlap_inline]: 5.19998e-07 [split_matmul_comm_elemetwise]: 1.16001e-06 [split_layernorm_comm]: 1.10999e-06 [handle_group_info]: 6.21e-06 [symbol_engine_optimizer]: 0.00017506, [1] [Cycle 1]: 0.00017035, [6] [build]: 1.563e-05 [elim_shapecalc]: 2.7e-05 [elim_not_effective]: 3.976e-05 [opt_reshape]: 2.24e-05 [fold_const_symbol]: 3.731e-05 [renormalize]: 2.69996e-07 [pipeline_parallel_scheduler]: 1.34e-06 [auto_monad_reorder]: 9.059e-05 [get_jit_bprop_graph]: 4.50003e-07 [rewriter_after_jit_bprop_graph]: 3.09999e-07 [eliminate_special_op_node]: 0.00056118 [distribtued_split]: 1.32999e-06 [validate]: 6.894e-05 [task_emit]: 0.115665 [execute]: 8.36e-06 Sums bootstrap : 0.000278s : 0.17% type_inference : 0.032258s : 20.06% auto_monad : 0.000499s : 0.31% graph_reusing : 0.000005s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000082s : 0.05% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000065s : 0.04% optimize.rewriter_before_opt_a : 0.000191s : 0.12% optimize.opt_a.expand_dump_flag : 0.000007s : 0.00% optimize.opt_a.switch_simplify : 0.000225s : 0.14% optimize.opt_a.loop_unroll : 0.000101s : 0.06% optimize.opt_a.a_1 : 0.003110s : 1.93% optimize.opt_a.recompute_prepare : 0.000047s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000114s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000035s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000030s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000625s : 0.39% optimize.opt_a.accelerated_algorithm : 0.000067s : 0.04% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000015s : 0.01% optimize.opt_a.shard_inline : 0.000043s : 0.03% optimize.opt_a.auto_parallel : 0.000037s : 0.02% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.01% optimize.opt_a.merge_comm : 0.000032s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000039s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000044s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.03% optimize.opt_a.virtual_output : 0.000040s : 0.02% optimize.opt_a.merge_forward : 0.000025s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000081s : 0.05% optimize.opt_a.before_grad : 0.000075s : 0.05% optimize.opt_a.inplace_validation : 0.000023s : 0.01% optimize.opt_a.meta_fg_expand : 0.000030s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000030s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000051s : 0.03% optimize.opt_a.a_after_grad : 0.000071s : 0.04% optimize.opt_a.special_op_eliminate : 0.000042s : 0.03% optimize.opt_a.renormalize : 0.002436s : 1.51% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000092s : 0.06% optimize.opt_a.cse : 0.000334s : 0.21% optimize.opt_a.a_3 : 0.000294s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000020s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000362s : 0.23% optimize.convert_after_rewriter : 0.000017s : 0.01% optimize.order_py_execute_after_rewriter : 0.000012s : 0.01% optimize.opt_b.b_1 : 0.000471s : 0.29% optimize.opt_b.b_2 : 0.000023s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000053s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000036s : 0.02% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.01% optimize.loop_unroll : 0.000522s : 0.32% optimize.opt_after_cconv.c_1 : 0.000153s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_after_cconv.cse : 0.000058s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000059s : 0.04% optimize.tuple_transform.d_1 : 0.000164s : 0.10% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000022s : 0.01% optimize.add_recomputation : 0.000134s : 0.08% optimize.cse_after_recomputation.cse : 0.000046s : 0.03% optimize.environ_conv : 0.000018s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000060s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000015s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000045s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000045s : 0.03% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000032s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000016s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000027s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000040s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000091s : 0.06% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000561s : 0.35% distribtued_split : 0.000001s : 0.00% validate : 0.000069s : 0.04% task_emit : 0.115665s : 71.92% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000601 300 0.99% : 0.000006s : 2: substitution.depend_value_elim 1.00% : 0.000006s : 18: substitution.elim_not_effective 1.06% : 0.000006s : 18: substitution.fold_const_symbol 2.54% : 0.000015s : 21: substitution.graph_param_transform 60.39% : 0.000363s : 15: substitution.inline 2.62% : 0.000016s : 36: substitution.j_node_and_user_rematch 3.26% : 0.000020s : 2: substitution.less_batch_normalization 2.65% : 0.000016s : 30: substitution.load_eliminater 0.84% : 0.000005s : 6: substitution.reduce_all_const_elim 3.29% : 0.000020s : 36: substitution.remove_not_recompute_node 0.83% : 0.000005s : 6: substitution.replace_old_param 2.14% : 0.000013s : 4: substitution.switch_simplify 3.68% : 0.000022s : 6: substitution.tuple_list_get_item_eliminator 7.86% : 0.000047s : 44: substitution.updatestate_pure_node_eliminater 6.85% : 0.000041s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.032194 2 91.19% : 0.029359s : 1: type_inference.infer 8.81% : 0.002835s : 1: type_inference.specialize ------[replace.] 0.000207 25 52.55% : 0.000109s : 15: replace.inline 29.84% : 0.000062s : 4: replace.switch_simplify 17.61% : 0.000036s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000382 25 92.62% : 0.000354s : 15: match.inline 2.55% : 0.000010s : 4: match.switch_simplify 4.83% : 0.000018s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000955 6511 1.03% : 0.000010s : 72: predicate.accumulaten_eliminater 0.71% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000006s : 42: predicate.addn_check_dump 1.03% : 0.000010s : 72: predicate.addn_zero_filter 0.98% : 0.000009s : 72: predicate.adjust_all_reduce_mul_add 2.12% : 0.000020s : 114: predicate.arithmetic_simplify 1.02% : 0.000010s : 72: predicate.cast_eliminate 0.62% : 0.000006s : 42: predicate.check_bprop_eliminate 0.58% : 0.000006s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.45% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.59% : 0.000006s : 42: predicate.depend_value_elim 1.11% : 0.000011s : 72: predicate.dict_get_item_const_eliminator 1.14% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.08% : 0.000010s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.39% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.30% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.30% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.29% : 0.000012s : 93: predicate.environ_get_depend_swap 1.95% : 0.000019s : 135: predicate.environ_get_eliminate 1.27% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.32% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.67% : 0.000016s : 93: predicate.float_depend_g_call 0.61% : 0.000006s : 42: predicate.float_environ_get_switch 0.88% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.17% : 0.000002s : 21: predicate.fold_const_symbol 0.63% : 0.000006s : 42: predicate.get_grad_eliminate 0.20% : 0.000002s : 21: predicate.graph_param_transform 0.60% : 0.000006s : 42: predicate.incorporate_call 0.58% : 0.000006s : 42: predicate.incorporate_call_switch 5.39% : 0.000051s : 291: predicate.inline 0.77% : 0.000007s : 42: predicate.inline_without_move 0.32% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.75% : 0.000007s : 42: predicate.less_batch_normalization 1.76% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.89% : 0.000028s : 192: predicate.load_eliminater 0.78% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.70% : 0.000016s : 110: predicate.loop_unroll_before_grad 1.72% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.59% : 0.000006s : 42: predicate.merge_addn 0.63% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.60% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.98% : 0.000009s : 72: predicate.minmaximum_grad 0.40% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000003s : 21: predicate.opt_reshape 0.36% : 0.000003s : 21: predicate.parallel_virtual_node 1.89% : 0.000018s : 93: predicate.partial_defer_inline 1.53% : 0.000015s : 99: predicate.partial_eliminate 1.03% : 0.000010s : 72: predicate.print_const_string_wrapper 0.67% : 0.000006s : 42: predicate.reduce_all_const_elim 1.30% : 0.000012s : 72: predicate.reduce_eliminate 0.32% : 0.000003s : 42: predicate.remove_not_recompute_node 1.14% : 0.000011s : 120: predicate.replace_applicator 0.33% : 0.000003s : 42: predicate.replace_old_param 0.16% : 0.000002s : 21: predicate.reset_defer_inline 1.07% : 0.000010s : 72: predicate.reshape_eliminate 0.65% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.37% : 0.000004s : 21: predicate.row_tensor_eliminate 0.82% : 0.000008s : 42: predicate.same_eliminate 0.37% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.67% : 0.000006s : 42: predicate.shard_identity_eliminate 0.99% : 0.000009s : 63: predicate.special_op_eliminate 0.70% : 0.000007s : 42: predicate.specialize_transform 0.72% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.72% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.75% : 0.000026s : 192: predicate.stopgrad_eliminater 0.34% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.45% : 0.000014s : 93: predicate.switch_defer_inline 1.98% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.70% : 0.000045s : 253: predicate.switch_simplify 1.05% : 0.000010s : 72: predicate.tile_eliminate 0.99% : 0.000009s : 72: predicate.transpose_eliminate 1.79% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.80% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.61% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000026s : 162: predicate.tuple_list_get_item_eliminator 1.78% : 0.000017s : 114: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.71% : 0.000016s : 120: predicate.tuple_to_list_eliminator_ 2.90% : 0.000028s : 192: predicate.updatestate_pure_node_eliminater 3.69% : 0.000035s : 234: predicate.updatestate_useless_node_eliminater 0.34% : 0.000003s : 21: predicate.value_based_eliminate 0.64% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.62% : 0.000006s : 42: predicate.virtual_output_eliminate 0.40% : 0.000004s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002634 41 57.37% : 0.001511s : 24: func_graph_cloner_run.FuncGraphClonerGraph 42.63% : 0.001123s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.188627 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000025s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000139s : 1: add_recomputation 0.03% : 0.000064s : 1: assign_add_opt 0.28% : 0.000520s : 1: auto_monad 0.05% : 0.000099s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.16% : 0.000303s : 1: bootstrap 0.01% : 0.000025s : 1: cconv 0.03% : 0.000050s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000021s : 1: convert_after_rewriter 0.03% : 0.000060s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.30% : 0.000575s : 1: eliminate_special_op_node 0.01% : 0.000022s : 1: environ_conv 0.01% : 0.000016s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000011s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000009s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.28% : 0.000532s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000032s : 1: opt.transform.loop_unroll_optimizer 2.61% : 0.004923s : 80: opt.transform.opt_a 0.08% : 0.000151s : 1: opt.transform.opt_after_cconv 0.25% : 0.000474s : 27: opt.transform.opt_b 0.09% : 0.000161s : 1: opt.transform.opt_trans_graph 0.04% : 0.000076s : 3: opt.transform.special_op_eliminate 0.06% : 0.000122s : 4: opt.transform.symbol_engine_opt 6.34% : 0.011952s : 1: opt_a 0.16% : 0.000303s : 1: opt_after_cconv 0.34% : 0.000635s : 1: opt_b 8.10% : 0.015281s : 1: optimize 0.02% : 0.000040s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000016s : 1: order_py_execute_after_rewriter 0.02% : 0.000036s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.03% : 0.000049s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.05% : 0.000090s : 1: pre_auto_parallel 0.04% : 0.000071s : 1: py_interpret_to_execute 0.01% : 0.000024s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000018s : 1: remove_cast_before_assign_add 0.03% : 0.000064s : 1: remove_dup_value 0.64% : 0.001215s : 1: renormalize.infer 0.64% : 0.001213s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.20% : 0.000369s : 1: rewriter_after_opt_a 0.10% : 0.000197s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000021s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000179s : 1: symbol_engine_optimizer 61.33% : 0.115690s : 1: task_emit 0.10% : 0.000183s : 1: tuple_transform 17.11% : 0.032281s : 1: type_inference 0.06% : 0.000116s : 1: validate TotalTime = 0.166401, [21] [bootstrap]: 0.00030949 [type_inference]: 0.0336731 [auto_monad]: 0.00092038 [graph_reusing]: 6.58999e-06 [inline]: 1.34e-06 [parallel-infer-symbol]: 2.58999e-06 [pre_auto_parallel]: 8.926e-05 [insert-virtual-dataset]: 3.34e-06 [parallel-infer-symbol-second]: 7.7e-07 [dataset_repeat_opt]: 1.63e-06 [pipeline_split]: 1.59e-06 [optimize]: 0.015503, [52] [py_interpret_to_execute]: 7.031e-05 [rewriter_before_opt_a]: 0.00019714 [opt_a]: 0.0120253, [2] [Cycle 1]: 0.00683754, [43] [expand_dump_flag]: 6.85001e-06 [switch_simplify]: 0.00021676 [loop_unroll]: 7.951e-05 [a_1]: 0.00241185 [recompute_prepare]: 2.74e-05 [updatestate_depend_eliminate]: 0.00010203 [updatestate_assign_eliminate]: 2.01e-05 [updatestate_loads_eliminate]: 1.719e-05 [parameter_eliminate]: 4e-06 [a_2]: 0.00033145 [accelerated_algorithm]: 4.5e-05 [shard]: 2.26e-06 [meta_shard_fg_expand]: 9.22001e-06 [shard_inline]: 2.194e-05 [auto_parallel]: 1.976e-05 [parallel]: 8.70999e-06 [flash_sp]: 1.438e-05 [merge_comm]: 1.659e-05 [allreduce_fusion]: 1.362e-05 [matmul_add_comm_reduction]: 2.401e-05 [allreduce_slice_to_reducescatter]: 5.10001e-07 [virtual_shard_identity]: 2.28e-05 [virtual_dataset]: 2.093e-05 [get_grad_eliminate_]: 2.068e-05 [virtual_output]: 1.992e-05 [merge_forward]: 3.326e-05 [cell_reuse_recompute_pass]: 2.06e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.255e-05 [before_grad]: 3.867e-05 [inplace_validation]: 1.265e-05 [meta_fg_expand]: 1.752e-05 [inplace_validation_after_expand]: 1.516e-05 [flash_sp_send_recv_attached]: 3.45e-06 [receive_attached]: 4.69999e-06 [after_resolve]: 2.744e-05 [a_after_grad]: 3.48e-05 [special_op_eliminate]: 2.152e-05 [renormalize]: 0.0023135 [add_forward_monad_depend]: 4.27999e-06 [auto_monad_grad]: 1.88999e-06 [auto_monad_eliminator]: 6.166e-05 [cse]: 0.0002897 [a_3]: 0.00015472 [Cycle 2]: 0.00200528, [43] [expand_dump_flag]: 1.50999e-06 [switch_simplify]: 2.349e-05 [loop_unroll]: 2.127e-05 [a_1]: 0.0006877 [recompute_prepare]: 2.064e-05 [updatestate_depend_eliminate]: 1.671e-05 [updatestate_assign_eliminate]: 1.653e-05 [updatestate_loads_eliminate]: 1.498e-05 [parameter_eliminate]: 2.48e-06 [a_2]: 0.00031036 [accelerated_algorithm]: 2.505e-05 [shard]: 1.22e-06 [meta_shard_fg_expand]: 6.64001e-06 [shard_inline]: 2.189e-05 [auto_parallel]: 1.939e-05 [parallel]: 4.01e-06 [flash_sp]: 3.6e-06 [merge_comm]: 1.616e-05 [allreduce_fusion]: 1.361e-05 [matmul_add_comm_reduction]: 2.023e-05 [allreduce_slice_to_reducescatter]: 3.19997e-07 [virtual_shard_identity]: 2.171e-05 [virtual_dataset]: 2.089e-05 [get_grad_eliminate_]: 2.053e-05 [virtual_output]: 1.984e-05 [merge_forward]: 1.253e-05 [cell_reuse_recompute_pass]: 2.33001e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.071e-05 [before_grad]: 3.654e-05 [inplace_validation]: 1.175e-05 [meta_fg_expand]: 1.327e-05 [inplace_validation_after_expand]: 1.574e-05 [flash_sp_send_recv_attached]: 1.04999e-06 [receive_attached]: 7.59988e-07 [after_resolve]: 2.462e-05 [a_after_grad]: 3.532e-05 [special_op_eliminate]: 2e-05 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 1.40999e-06 [auto_monad_grad]: 1.35999e-06 [auto_monad_eliminator]: 4.282e-05 [cse]: 5.643e-05 [a_3]: 0.00014025 [py_interpret_to_execute_after_opt_a]: 1.906e-05 [slice_cell_reuse_recomputed_activation]: 2.83e-06 [rewriter_after_opt_a]: 0.00037725 [convert_after_rewriter]: 1.837e-05 [order_py_execute_after_rewriter]: 1.35e-05 [opt_b]: 0.00064476, [1] [Cycle 1]: 0.00063766, [7] [b_1]: 0.00047808 [b_2]: 2.419e-05 [updatestate_depend_eliminate]: 1.392e-05 [updatestate_assign_eliminate]: 1.564e-05 [updatestate_loads_eliminate]: 1.498e-05 [renormalize]: 4.00003e-07 [cse]: 5.403e-05 [optimize_parallel_all_gather_comm]: 1.916e-05 [overlap_param_gather]: 1.14999e-06 [cconv]: 2.773e-05 [loop_unroll]: 0.00052826 [opt_after_cconv]: 0.00031094, [1] [Cycle 1]: 0.00030303, [7] [c_1]: 0.00015122 [parameter_eliminate]: 2.63e-06 [updatestate_depend_eliminate]: 1.707e-05 [updatestate_assign_eliminate]: 1.623e-05 [updatestate_loads_eliminate]: 1.553e-05 [cse]: 6.014e-05 [renormalize]: 4.79995e-07 [remove_dup_value]: 6.565e-05 [tuple_transform]: 0.00018162, [1] [Cycle 1]: 0.00017433, [2] [d_1]: 0.00016317 [renormalize]: 2.40005e-07 [partial_unused_args_eliminate]: 2.65001e-06 [add_cache_embedding]: 2.591e-05 [add_recomputation]: 0.0001391 [cse_after_recomputation]: 5.895e-05, [1] [Cycle 1]: 5.288e-05, [1] [cse]: 4.66e-05 [environ_conv]: 1.867e-05 [swap_dp_allreduce_reducescatter]: 1.821e-05 [bias_add_comm_swap]: 2.49001e-06 [label_micro_interleaved_index]: 2.22e-06 [label_fine_grained_interleaved_index]: 2.2e-06 [merge_cast_opt]: 1.14e-06 [slice_recompute_activation]: 1.85e-06 [micro_interleaved_order_control]: 2.42001e-06 [assign_add_opt]: 5.879e-05 [ForceFp32Comm]: 8.70001e-07 [remove_cast_before_assign_add]: 1.725e-05 [full_micro_interleaved_order_control]: 2.2e-06 [reorder_send_recv_between_fp_bp]: 2.16e-06 [comm_op_add_attrs]: 5.199e-05 [add_comm_op_reuse_tag]: 2.15e-06 [interleave_split_concat_branches]: 8.2e-07 [interleave_parallel_branches]: 8.9001e-07 [overlap_opt_shard_in_pipeline]: 1.09001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.53e-06 [control_data_broadcast_order]: 1.14e-06 [grouped_pairwise_exchange_alltoall]: 9.76e-06 [offloading_packed_experts]: 2.06e-06 [overlap_recompute_and_grad_model_parallel]: 1.99e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.89995e-07 [overlap_recompute_allgather_and_fa_grad]: 4.486e-05 [overlap_grad_ring_attention]: 2.07e-06 [overlap_grad_flash_sp]: 3.359e-05 [begin_end_overlap_inline]: 9.89996e-07 [split_matmul_comm_elemetwise]: 2.12999e-06 [split_layernorm_comm]: 1.91999e-06 [handle_group_info]: 7.69e-06 [symbol_engine_optimizer]: 0.00018409, [1] [Cycle 1]: 0.00017783, [6] [build]: 1.834e-05 [elim_shapecalc]: 2.915e-05 [elim_not_effective]: 4.118e-05 [opt_reshape]: 2.194e-05 [fold_const_symbol]: 3.707e-05 [renormalize]: 4.00003e-07 [pipeline_parallel_scheduler]: 2.07e-06 [auto_monad_reorder]: 0.00010468 [get_jit_bprop_graph]: 5.89993e-07 [rewriter_after_jit_bprop_graph]: 4.70012e-07 [eliminate_special_op_node]: 0.0005758 [distribtued_split]: 1.66e-06 [validate]: 7.455e-05 [task_emit]: 0.11482 [execute]: 1.183e-05 Sums bootstrap : 0.000309s : 0.19% type_inference : 0.033673s : 20.79% auto_monad : 0.000920s : 0.57% graph_reusing : 0.000007s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000089s : 0.06% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000070s : 0.04% optimize.rewriter_before_opt_a : 0.000197s : 0.12% optimize.opt_a.expand_dump_flag : 0.000008s : 0.01% optimize.opt_a.switch_simplify : 0.000240s : 0.15% optimize.opt_a.loop_unroll : 0.000101s : 0.06% optimize.opt_a.a_1 : 0.003100s : 1.91% optimize.opt_a.recompute_prepare : 0.000048s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000119s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000037s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000032s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000642s : 0.40% optimize.opt_a.accelerated_algorithm : 0.000070s : 0.04% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000044s : 0.03% optimize.opt_a.auto_parallel : 0.000039s : 0.02% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000018s : 0.01% optimize.opt_a.merge_comm : 0.000033s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000044s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.03% optimize.opt_a.virtual_output : 0.000040s : 0.02% optimize.opt_a.merge_forward : 0.000046s : 0.03% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000083s : 0.05% optimize.opt_a.before_grad : 0.000075s : 0.05% optimize.opt_a.inplace_validation : 0.000024s : 0.02% optimize.opt_a.meta_fg_expand : 0.000031s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000031s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000052s : 0.03% optimize.opt_a.a_after_grad : 0.000070s : 0.04% optimize.opt_a.special_op_eliminate : 0.000042s : 0.03% optimize.opt_a.renormalize : 0.002314s : 1.43% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000104s : 0.06% optimize.opt_a.cse : 0.000346s : 0.21% optimize.opt_a.a_3 : 0.000295s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000019s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000377s : 0.23% optimize.convert_after_rewriter : 0.000018s : 0.01% optimize.order_py_execute_after_rewriter : 0.000014s : 0.01% optimize.opt_b.b_1 : 0.000478s : 0.30% optimize.opt_b.b_2 : 0.000024s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000054s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000019s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000028s : 0.02% optimize.loop_unroll : 0.000528s : 0.33% optimize.opt_after_cconv.c_1 : 0.000151s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.cse : 0.000060s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000066s : 0.04% optimize.tuple_transform.d_1 : 0.000163s : 0.10% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000026s : 0.02% optimize.add_recomputation : 0.000139s : 0.09% optimize.cse_after_recomputation.cse : 0.000047s : 0.03% optimize.environ_conv : 0.000019s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000059s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000017s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000052s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000045s : 0.03% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000034s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000008s : 0.00% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000029s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000041s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000105s : 0.06% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000576s : 0.36% distribtued_split : 0.000002s : 0.00% validate : 0.000075s : 0.05% task_emit : 0.114820s : 70.89% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000630 300 1.08% : 0.000007s : 2: substitution.depend_value_elim 1.01% : 0.000006s : 18: substitution.elim_not_effective 0.97% : 0.000006s : 18: substitution.fold_const_symbol 2.57% : 0.000016s : 21: substitution.graph_param_transform 59.69% : 0.000376s : 15: substitution.inline 2.52% : 0.000016s : 36: substitution.j_node_and_user_rematch 3.39% : 0.000021s : 2: substitution.less_batch_normalization 2.66% : 0.000017s : 30: substitution.load_eliminater 0.93% : 0.000006s : 6: substitution.reduce_all_const_elim 3.27% : 0.000021s : 36: substitution.remove_not_recompute_node 0.98% : 0.000006s : 6: substitution.replace_old_param 2.67% : 0.000017s : 4: substitution.switch_simplify 3.73% : 0.000024s : 6: substitution.tuple_list_get_item_eliminator 7.79% : 0.000049s : 44: substitution.updatestate_pure_node_eliminater 6.73% : 0.000042s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.033611 2 92.42% : 0.031065s : 1: type_inference.infer 7.58% : 0.002546s : 1: type_inference.specialize ------[replace.] 0.000217 25 52.75% : 0.000114s : 15: replace.inline 31.31% : 0.000068s : 4: replace.switch_simplify 15.93% : 0.000035s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000402 25 91.40% : 0.000367s : 15: match.inline 3.52% : 0.000014s : 4: match.switch_simplify 5.08% : 0.000020s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000959 6511 1.01% : 0.000010s : 72: predicate.accumulaten_eliminater 0.65% : 0.000006s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000006s : 42: predicate.addn_check_dump 1.00% : 0.000010s : 72: predicate.addn_zero_filter 0.99% : 0.000010s : 72: predicate.adjust_all_reduce_mul_add 2.11% : 0.000020s : 114: predicate.arithmetic_simplify 1.04% : 0.000010s : 72: predicate.cast_eliminate 0.60% : 0.000006s : 42: predicate.check_bprop_eliminate 0.58% : 0.000006s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.39% : 0.000013s : 78: predicate.convert_tensor_eliminate 0.60% : 0.000006s : 42: predicate.depend_value_elim 1.08% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.10% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.12% : 0.000011s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.40% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.28% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.31% : 0.000013s : 93: predicate.environ_get_add_eliminate 1.30% : 0.000012s : 93: predicate.environ_get_depend_swap 1.99% : 0.000019s : 135: predicate.environ_get_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.32% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.69% : 0.000016s : 93: predicate.float_depend_g_call 0.59% : 0.000006s : 42: predicate.float_environ_get_switch 0.87% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.17% : 0.000002s : 21: predicate.fold_const_symbol 0.61% : 0.000006s : 42: predicate.get_grad_eliminate 0.22% : 0.000002s : 21: predicate.graph_param_transform 0.61% : 0.000006s : 42: predicate.incorporate_call 0.59% : 0.000006s : 42: predicate.incorporate_call_switch 5.56% : 0.000053s : 291: predicate.inline 0.83% : 0.000008s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.76% : 0.000007s : 42: predicate.less_batch_normalization 1.80% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.96% : 0.000028s : 192: predicate.load_eliminater 0.74% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.68% : 0.000016s : 110: predicate.loop_unroll_before_grad 1.68% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.62% : 0.000006s : 42: predicate.merge_addn 0.58% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.61% : 0.000006s : 42: predicate.mini_step_allgather_replace 1.05% : 0.000010s : 72: predicate.minmaximum_grad 0.42% : 0.000004s : 21: predicate.mutable_eliminate 0.32% : 0.000003s : 21: predicate.opt_reshape 0.35% : 0.000003s : 21: predicate.parallel_virtual_node 1.90% : 0.000018s : 93: predicate.partial_defer_inline 1.54% : 0.000015s : 99: predicate.partial_eliminate 1.02% : 0.000010s : 72: predicate.print_const_string_wrapper 0.67% : 0.000006s : 42: predicate.reduce_all_const_elim 1.29% : 0.000012s : 72: predicate.reduce_eliminate 0.35% : 0.000003s : 42: predicate.remove_not_recompute_node 1.18% : 0.000011s : 120: predicate.replace_applicator 0.33% : 0.000003s : 42: predicate.replace_old_param 0.18% : 0.000002s : 21: predicate.reset_defer_inline 1.00% : 0.000010s : 72: predicate.reshape_eliminate 0.62% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.37% : 0.000004s : 21: predicate.row_tensor_eliminate 0.80% : 0.000008s : 42: predicate.same_eliminate 0.36% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.70% : 0.000007s : 42: predicate.shard_identity_eliminate 1.02% : 0.000010s : 63: predicate.special_op_eliminate 0.74% : 0.000007s : 42: predicate.specialize_transform 0.75% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.70% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.69% : 0.000026s : 192: predicate.stopgrad_eliminater 0.32% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.45% : 0.000014s : 93: predicate.switch_defer_inline 2.04% : 0.000020s : 135: predicate.switch_layer_defer_inline 4.70% : 0.000045s : 253: predicate.switch_simplify 1.06% : 0.000010s : 72: predicate.tile_eliminate 0.98% : 0.000009s : 72: predicate.transpose_eliminate 1.82% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.76% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000026s : 162: predicate.tuple_list_get_item_eliminator 1.68% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.42% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.73% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.82% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.70% : 0.000035s : 234: predicate.updatestate_useless_node_eliminater 0.38% : 0.000004s : 21: predicate.value_based_eliminate 0.65% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.59% : 0.000006s : 42: predicate.virtual_output_eliminate 0.37% : 0.000004s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002871 41 60.81% : 0.001746s : 24: func_graph_cloner_run.FuncGraphClonerGraph 39.19% : 0.001125s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.190099 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000030s : 1: add_cache_embedding 0.00% : 0.000006s : 1: add_comm_op_reuse_tag 0.08% : 0.000145s : 1: add_recomputation 0.03% : 0.000064s : 1: assign_add_opt 0.50% : 0.000944s : 1: auto_monad 0.06% : 0.000114s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.18% : 0.000338s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.04% : 0.000079s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000023s : 1: convert_after_rewriter 0.03% : 0.000063s : 1: cse_after_recomputation 0.00% : 0.000007s : 1: dataset_repeat_opt 0.00% : 0.000009s : 1: distribtued_split 0.31% : 0.000590s : 1: eliminate_special_op_node 0.01% : 0.000024s : 1: environ_conv 0.01% : 0.000022s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000014s : 1: graph_reusing 0.01% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000012s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.28% : 0.000538s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000032s : 1: opt.transform.loop_unroll_optimizer 2.60% : 0.004950s : 80: opt.transform.opt_a 0.08% : 0.000150s : 1: opt.transform.opt_after_cconv 0.25% : 0.000482s : 27: opt.transform.opt_b 0.08% : 0.000161s : 1: opt.transform.opt_trans_graph 0.04% : 0.000075s : 3: opt.transform.special_op_eliminate 0.07% : 0.000125s : 4: opt.transform.symbol_engine_opt 6.33% : 0.012030s : 1: opt_a 0.17% : 0.000316s : 1: opt_after_cconv 0.34% : 0.000649s : 1: opt_b 8.16% : 0.015512s : 1: optimize 0.01% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000018s : 1: order_py_execute_after_rewriter 0.02% : 0.000038s : 1: overlap_grad_flash_sp 0.00% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.03% : 0.000049s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.05% : 0.000097s : 1: pre_auto_parallel 0.04% : 0.000077s : 1: py_interpret_to_execute 0.01% : 0.000024s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000021s : 1: remove_cast_before_assign_add 0.04% : 0.000072s : 1: remove_dup_value 0.62% : 0.001176s : 1: renormalize.infer 0.59% : 0.001128s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.20% : 0.000384s : 1: rewriter_after_opt_a 0.11% : 0.000204s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000022s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000188s : 1: symbol_engine_optimizer 60.42% : 0.114855s : 1: task_emit 0.10% : 0.000186s : 1: tuple_transform 17.73% : 0.033695s : 1: type_inference 0.06% : 0.000120s : 1: validate TotalTime = 0.163653, [21] [bootstrap]: 0.00030018 [type_inference]: 0.0334759 [auto_monad]: 0.00093101 [graph_reusing]: 6.80999e-06 [inline]: 1.68e-06 [parallel-infer-symbol]: 2.17001e-06 [pre_auto_parallel]: 9.26e-05 [insert-virtual-dataset]: 3.56999e-06 [parallel-infer-symbol-second]: 5.40007e-07 [dataset_repeat_opt]: 1.54e-06 [pipeline_split]: 1.63e-06 [optimize]: 0.0154597, [52] [py_interpret_to_execute]: 7.134e-05 [rewriter_before_opt_a]: 0.00019846 [opt_a]: 0.0120535, [2] [Cycle 1]: 0.00694215, [43] [expand_dump_flag]: 6.94e-06 [switch_simplify]: 0.00022276 [loop_unroll]: 8.079e-05 [a_1]: 0.00243522 [recompute_prepare]: 2.81e-05 [updatestate_depend_eliminate]: 0.00010135 [updatestate_assign_eliminate]: 2.04e-05 [updatestate_loads_eliminate]: 1.724e-05 [parameter_eliminate]: 4.05e-06 [a_2]: 0.00032787 [accelerated_algorithm]: 4.398e-05 [shard]: 2.37e-06 [meta_shard_fg_expand]: 8.90999e-06 [shard_inline]: 2.146e-05 [auto_parallel]: 1.931e-05 [parallel]: 9.56999e-06 [flash_sp]: 1.44e-05 [merge_comm]: 1.667e-05 [allreduce_fusion]: 1.366e-05 [matmul_add_comm_reduction]: 2.242e-05 [allreduce_slice_to_reducescatter]: 4.79995e-07 [virtual_shard_identity]: 2.333e-05 [virtual_dataset]: 2.119e-05 [get_grad_eliminate_]: 2.09e-05 [virtual_output]: 2.087e-05 [merge_forward]: 1.372e-05 [cell_reuse_recompute_pass]: 2.17e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.273e-05 [before_grad]: 3.798e-05 [inplace_validation]: 1.277e-05 [meta_fg_expand]: 1.665e-05 [inplace_validation_after_expand]: 1.613e-05 [flash_sp_send_recv_attached]: 3.35e-06 [receive_attached]: 5.28e-06 [after_resolve]: 2.637e-05 [a_after_grad]: 3.492e-05 [special_op_eliminate]: 2.239e-05 [renormalize]: 0.00243833 [add_forward_monad_depend]: 4.52e-06 [auto_monad_grad]: 2.27001e-06 [auto_monad_eliminator]: 6.323e-05 [cse]: 0.0002892 [a_3]: 0.00015552 [Cycle 2]: 0.00198582, [43] [expand_dump_flag]: 1.61999e-06 [switch_simplify]: 2.375e-05 [loop_unroll]: 2.139e-05 [a_1]: 0.00069161 [recompute_prepare]: 2.02e-05 [updatestate_depend_eliminate]: 1.644e-05 [updatestate_assign_eliminate]: 1.679e-05 [updatestate_loads_eliminate]: 1.455e-05 [parameter_eliminate]: 2.21e-06 [a_2]: 0.00030741 [accelerated_algorithm]: 2.397e-05 [shard]: 1.34e-06 [meta_shard_fg_expand]: 6.53e-06 [shard_inline]: 2.122e-05 [auto_parallel]: 2.041e-05 [parallel]: 3.91e-06 [flash_sp]: 3.62001e-06 [merge_comm]: 1.61e-05 [allreduce_fusion]: 1.306e-05 [matmul_add_comm_reduction]: 1.962e-05 [allreduce_slice_to_reducescatter]: 3.00002e-07 [virtual_shard_identity]: 2.228e-05 [virtual_dataset]: 2.086e-05 [get_grad_eliminate_]: 1.978e-05 [virtual_output]: 2.043e-05 [merge_forward]: 1.174e-05 [cell_reuse_recompute_pass]: 2.15e-06 [cell_reuse_handle_not_recompute_node_pass]: 5.584e-05 [before_grad]: 3.723e-05 [inplace_validation]: 1.21e-05 [meta_fg_expand]: 1.313e-05 [inplace_validation_after_expand]: 1.628e-05 [flash_sp_send_recv_attached]: 9.79999e-07 [receive_attached]: 7.40009e-07 [after_resolve]: 2.555e-05 [a_after_grad]: 3.56e-05 [special_op_eliminate]: 2.013e-05 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 1.65e-06 [auto_monad_grad]: 1.35e-06 [auto_monad_eliminator]: 4.405e-05 [cse]: 5.693e-05 [a_3]: 0.00014194 [py_interpret_to_execute_after_opt_a]: 1.947e-05 [slice_cell_reuse_recomputed_activation]: 2.81e-06 [rewriter_after_opt_a]: 0.00037074 [convert_after_rewriter]: 1.882e-05 [order_py_execute_after_rewriter]: 1.327e-05 [opt_b]: 0.00064298, [1] [Cycle 1]: 0.00063698, [7] [b_1]: 0.00047941 [b_2]: 2.339e-05 [updatestate_depend_eliminate]: 1.382e-05 [updatestate_assign_eliminate]: 1.593e-05 [updatestate_loads_eliminate]: 1.484e-05 [renormalize]: 3.69997e-07 [cse]: 5.582e-05 [optimize_parallel_all_gather_comm]: 1.958e-05 [overlap_param_gather]: 1.09e-06 [cconv]: 2.772e-05 [loop_unroll]: 0.00051807 [opt_after_cconv]: 0.0003054, [1] [Cycle 1]: 0.00029847, [7] [c_1]: 0.00015311 [parameter_eliminate]: 2.34999e-06 [updatestate_depend_eliminate]: 1.776e-05 [updatestate_assign_eliminate]: 1.643e-05 [updatestate_loads_eliminate]: 1.585e-05 [cse]: 5.931e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 6.758e-05 [tuple_transform]: 0.00017977, [1] [Cycle 1]: 0.00017387, [2] [d_1]: 0.00016419 [renormalize]: 1.90004e-07 [partial_unused_args_eliminate]: 2.63e-06 [add_cache_embedding]: 2.422e-05 [add_recomputation]: 0.00014023 [cse_after_recomputation]: 5.807e-05, [1] [Cycle 1]: 5.277e-05, [1] [cse]: 4.734e-05 [environ_conv]: 1.991e-05 [swap_dp_allreduce_reducescatter]: 1.817e-05 [bias_add_comm_swap]: 2.43001e-06 [label_micro_interleaved_index]: 1.86e-06 [label_fine_grained_interleaved_index]: 2.32e-06 [merge_cast_opt]: 1.16001e-06 [slice_recompute_activation]: 2.02999e-06 [micro_interleaved_order_control]: 2e-06 [assign_add_opt]: 6.139e-05 [ForceFp32Comm]: 8.2e-07 [remove_cast_before_assign_add]: 1.663e-05 [full_micro_interleaved_order_control]: 2.68e-06 [reorder_send_recv_between_fp_bp]: 2.87e-06 [comm_op_add_attrs]: 5.321e-05 [add_comm_op_reuse_tag]: 2.04e-06 [interleave_split_concat_branches]: 1.14e-06 [interleave_parallel_branches]: 1.09001e-06 [overlap_opt_shard_in_pipeline]: 1.00001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.19001e-06 [control_data_broadcast_order]: 1.42e-06 [grouped_pairwise_exchange_alltoall]: 1.061e-05 [offloading_packed_experts]: 2.55e-06 [overlap_recompute_and_grad_model_parallel]: 2.37e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.50006e-07 [overlap_recompute_allgather_and_fa_grad]: 3.326e-05 [overlap_grad_ring_attention]: 2e-06 [overlap_grad_flash_sp]: 3.733e-05 [begin_end_overlap_inline]: 1.13e-06 [split_matmul_comm_elemetwise]: 2.37999e-06 [split_layernorm_comm]: 1.99e-06 [handle_group_info]: 7.09e-06 [symbol_engine_optimizer]: 0.00018124, [1] [Cycle 1]: 0.00017568, [6] [build]: 1.852e-05 [elim_shapecalc]: 2.775e-05 [elim_not_effective]: 4.215e-05 [opt_reshape]: 2.218e-05 [fold_const_symbol]: 3.745e-05 [renormalize]: 3.50003e-07 [pipeline_parallel_scheduler]: 2.11e-06 [auto_monad_reorder]: 0.00010474 [get_jit_bprop_graph]: 7.79997e-07 [rewriter_after_jit_bprop_graph]: 4.29995e-07 [eliminate_special_op_node]: 0.00057897 [distribtued_split]: 1.55e-06 [validate]: 7.746e-05 [task_emit]: 0.112306 [execute]: 1.116e-05 Sums bootstrap : 0.000300s : 0.19% type_inference : 0.033476s : 21.00% auto_monad : 0.000931s : 0.58% graph_reusing : 0.000007s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000093s : 0.06% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000071s : 0.04% optimize.rewriter_before_opt_a : 0.000198s : 0.12% optimize.opt_a.expand_dump_flag : 0.000009s : 0.01% optimize.opt_a.switch_simplify : 0.000247s : 0.15% optimize.opt_a.loop_unroll : 0.000102s : 0.06% optimize.opt_a.a_1 : 0.003127s : 1.96% optimize.opt_a.recompute_prepare : 0.000048s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000118s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000037s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000032s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000635s : 0.40% optimize.opt_a.accelerated_algorithm : 0.000068s : 0.04% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000015s : 0.01% optimize.opt_a.shard_inline : 0.000043s : 0.03% optimize.opt_a.auto_parallel : 0.000040s : 0.02% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000018s : 0.01% optimize.opt_a.merge_comm : 0.000033s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000042s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000046s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.03% optimize.opt_a.virtual_output : 0.000041s : 0.03% optimize.opt_a.merge_forward : 0.000025s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000099s : 0.06% optimize.opt_a.before_grad : 0.000075s : 0.05% optimize.opt_a.inplace_validation : 0.000025s : 0.02% optimize.opt_a.meta_fg_expand : 0.000030s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000032s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000052s : 0.03% optimize.opt_a.a_after_grad : 0.000071s : 0.04% optimize.opt_a.special_op_eliminate : 0.000043s : 0.03% optimize.opt_a.renormalize : 0.002438s : 1.53% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000107s : 0.07% optimize.opt_a.cse : 0.000346s : 0.22% optimize.opt_a.a_3 : 0.000297s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000019s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000371s : 0.23% optimize.convert_after_rewriter : 0.000019s : 0.01% optimize.order_py_execute_after_rewriter : 0.000013s : 0.01% optimize.opt_b.b_1 : 0.000479s : 0.30% optimize.opt_b.b_2 : 0.000023s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000056s : 0.04% optimize.optimize_parallel_all_gather_comm : 0.000020s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000028s : 0.02% optimize.loop_unroll : 0.000518s : 0.32% optimize.opt_after_cconv.c_1 : 0.000153s : 0.10% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000018s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.cse : 0.000059s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000068s : 0.04% optimize.tuple_transform.d_1 : 0.000164s : 0.10% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000024s : 0.02% optimize.add_recomputation : 0.000140s : 0.09% optimize.cse_after_recomputation.cse : 0.000047s : 0.03% optimize.environ_conv : 0.000020s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000061s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000017s : 0.01% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000053s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000033s : 0.02% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000037s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000019s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000042s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000105s : 0.07% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000579s : 0.36% distribtued_split : 0.000002s : 0.00% validate : 0.000077s : 0.05% task_emit : 0.112306s : 70.45% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000646 300 1.18% : 0.000008s : 2: substitution.depend_value_elim 1.01% : 0.000007s : 18: substitution.elim_not_effective 0.97% : 0.000006s : 18: substitution.fold_const_symbol 2.47% : 0.000016s : 21: substitution.graph_param_transform 60.76% : 0.000392s : 15: substitution.inline 2.43% : 0.000016s : 36: substitution.j_node_and_user_rematch 3.27% : 0.000021s : 2: substitution.less_batch_normalization 2.56% : 0.000017s : 30: substitution.load_eliminater 0.86% : 0.000006s : 6: substitution.reduce_all_const_elim 3.27% : 0.000021s : 36: substitution.remove_not_recompute_node 0.92% : 0.000006s : 6: substitution.replace_old_param 2.38% : 0.000015s : 4: substitution.switch_simplify 3.83% : 0.000025s : 6: substitution.tuple_list_get_item_eliminator 7.50% : 0.000048s : 44: substitution.updatestate_pure_node_eliminater 6.58% : 0.000043s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.033412 2 92.21% : 0.030809s : 1: type_inference.infer 7.79% : 0.002603s : 1: type_inference.specialize ------[replace.] 0.000219 25 52.25% : 0.000114s : 15: replace.inline 32.07% : 0.000070s : 4: replace.switch_simplify 15.68% : 0.000034s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000418 25 91.75% : 0.000384s : 15: match.inline 3.09% : 0.000013s : 4: match.switch_simplify 5.17% : 0.000022s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000952 6511 1.06% : 0.000010s : 72: predicate.accumulaten_eliminater 0.72% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000006s : 42: predicate.addn_check_dump 1.02% : 0.000010s : 72: predicate.addn_zero_filter 1.00% : 0.000010s : 72: predicate.adjust_all_reduce_mul_add 2.22% : 0.000021s : 114: predicate.arithmetic_simplify 1.01% : 0.000010s : 72: predicate.cast_eliminate 0.60% : 0.000006s : 42: predicate.check_bprop_eliminate 0.58% : 0.000006s : 42: predicate.compare_switch_simplify 0.16% : 0.000002s : 21: predicate.const_output_eliminate 0.33% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.44% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.61% : 0.000006s : 42: predicate.depend_value_elim 1.14% : 0.000011s : 72: predicate.dict_get_item_const_eliminator 1.10% : 0.000010s : 72: predicate.dict_get_item_eliminator 1.10% : 0.000011s : 72: predicate.dict_set_item_eliminator 0.19% : 0.000002s : 21: predicate.elim_not_effective 0.38% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.28% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_depend_swap 1.93% : 0.000018s : 135: predicate.environ_get_eliminate 1.28% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.33% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.65% : 0.000016s : 93: predicate.float_depend_g_call 0.60% : 0.000006s : 42: predicate.float_environ_get_switch 0.89% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.61% : 0.000006s : 42: predicate.get_grad_eliminate 0.20% : 0.000002s : 21: predicate.graph_param_transform 0.61% : 0.000006s : 42: predicate.incorporate_call 0.58% : 0.000006s : 42: predicate.incorporate_call_switch 5.45% : 0.000052s : 291: predicate.inline 0.78% : 0.000007s : 42: predicate.inline_without_move 0.32% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.73% : 0.000007s : 42: predicate.less_batch_normalization 1.75% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.89% : 0.000028s : 192: predicate.load_eliminater 0.79% : 0.000008s : 21: predicate.loop_unroll_after_grad 1.75% : 0.000017s : 110: predicate.loop_unroll_before_grad 1.75% : 0.000017s : 114: predicate.make_slice_get_slice_eliminator 0.59% : 0.000006s : 42: predicate.merge_addn 0.59% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.60% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.99% : 0.000009s : 72: predicate.minmaximum_grad 0.42% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000003s : 21: predicate.opt_reshape 0.34% : 0.000003s : 21: predicate.parallel_virtual_node 1.88% : 0.000018s : 93: predicate.partial_defer_inline 1.54% : 0.000015s : 99: predicate.partial_eliminate 1.06% : 0.000010s : 72: predicate.print_const_string_wrapper 0.69% : 0.000007s : 42: predicate.reduce_all_const_elim 1.26% : 0.000012s : 72: predicate.reduce_eliminate 0.34% : 0.000003s : 42: predicate.remove_not_recompute_node 1.14% : 0.000011s : 120: predicate.replace_applicator 0.33% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.01% : 0.000010s : 72: predicate.reshape_eliminate 0.61% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.35% : 0.000003s : 21: predicate.row_tensor_eliminate 0.81% : 0.000008s : 42: predicate.same_eliminate 0.36% : 0.000003s : 46: predicate.set_cell_output_no_recompute 0.67% : 0.000006s : 42: predicate.shard_identity_eliminate 1.02% : 0.000010s : 63: predicate.special_op_eliminate 0.69% : 0.000007s : 42: predicate.specialize_transform 0.69% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.72% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.73% : 0.000026s : 192: predicate.stopgrad_eliminater 0.33% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.42% : 0.000014s : 93: predicate.switch_defer_inline 2.00% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.78% : 0.000046s : 253: predicate.switch_simplify 1.07% : 0.000010s : 72: predicate.tile_eliminate 1.04% : 0.000010s : 72: predicate.transpose_eliminate 1.79% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.76% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.58% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.65% : 0.000025s : 162: predicate.tuple_list_get_item_eliminator 1.73% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.43% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.88% : 0.000018s : 120: predicate.tuple_to_list_eliminator_ 2.85% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.66% : 0.000035s : 234: predicate.updatestate_useless_node_eliminater 0.35% : 0.000003s : 21: predicate.value_based_eliminate 0.61% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.61% : 0.000006s : 42: predicate.virtual_output_eliminate 0.34% : 0.000003s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002837 41 60.45% : 0.001715s : 24: func_graph_cloner_run.FuncGraphClonerGraph 39.55% : 0.001122s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.187489 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000028s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000145s : 1: add_recomputation 0.03% : 0.000065s : 1: assign_add_opt 0.52% : 0.000967s : 1: auto_monad 0.06% : 0.000112s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.17% : 0.000324s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000058s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000023s : 1: convert_after_rewriter 0.03% : 0.000061s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.32% : 0.000592s : 1: eliminate_special_op_node 0.01% : 0.000024s : 1: environ_conv 0.01% : 0.000020s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000014s : 1: graph_reusing 0.01% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000010s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.28% : 0.000527s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000033s : 1: opt.transform.loop_unroll_optimizer 2.67% : 0.004999s : 80: opt.transform.opt_a 0.08% : 0.000152s : 1: opt.transform.opt_after_cconv 0.26% : 0.000482s : 27: opt.transform.opt_b 0.09% : 0.000162s : 1: opt.transform.opt_trans_graph 0.04% : 0.000077s : 3: opt.transform.special_op_eliminate 0.07% : 0.000125s : 4: opt.transform.symbol_engine_opt 6.43% : 0.012058s : 1: opt_a 0.17% : 0.000310s : 1: opt_after_cconv 0.34% : 0.000646s : 1: opt_b 8.25% : 0.015469s : 1: optimize 0.01% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.02% : 0.000041s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.02% : 0.000037s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000100s : 1: pre_auto_parallel 0.04% : 0.000077s : 1: py_interpret_to_execute 0.01% : 0.000023s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000020s : 1: remove_cast_before_assign_add 0.04% : 0.000073s : 1: remove_dup_value 0.67% : 0.001253s : 1: renormalize.infer 0.63% : 0.001175s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.20% : 0.000377s : 1: rewriter_after_opt_a 0.11% : 0.000204s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000022s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000185s : 1: symbol_engine_optimizer 59.92% : 0.112338s : 1: task_emit 0.10% : 0.000183s : 1: tuple_transform 17.86% : 0.033494s : 1: type_inference 0.07% : 0.000127s : 1: validate TotalTime = 0.164112, [21] [bootstrap]: 0.00027394 [type_inference]: 0.033235 [auto_monad]: 0.00090198 [graph_reusing]: 5.38e-06 [inline]: 1.88001e-06 [parallel-infer-symbol]: 1.6e-06 [pre_auto_parallel]: 8.307e-05 [insert-virtual-dataset]: 2.07e-06 [parallel-infer-symbol-second]: 4.79995e-07 [dataset_repeat_opt]: 6.90008e-07 [pipeline_split]: 1.04e-06 [optimize]: 0.0153616, [52] [py_interpret_to_execute]: 7.108e-05 [rewriter_before_opt_a]: 0.00019198 [opt_a]: 0.0120343, [2] [Cycle 1]: 0.00692323, [43] [expand_dump_flag]: 5.71e-06 [switch_simplify]: 0.00020117 [loop_unroll]: 0.00012631 [a_1]: 0.00239679 [recompute_prepare]: 2.714e-05 [updatestate_depend_eliminate]: 9.925e-05 [updatestate_assign_eliminate]: 1.839e-05 [updatestate_loads_eliminate]: 1.524e-05 [parameter_eliminate]: 2.47e-06 [a_2]: 0.00032191 [accelerated_algorithm]: 4.156e-05 [shard]: 1.89e-06 [meta_shard_fg_expand]: 8.52e-06 [shard_inline]: 2.166e-05 [auto_parallel]: 1.797e-05 [parallel]: 5.75e-06 [flash_sp]: 1.094e-05 [merge_comm]: 1.583e-05 [allreduce_fusion]: 1.309e-05 [matmul_add_comm_reduction]: 2.047e-05 [allreduce_slice_to_reducescatter]: 3.69997e-07 [virtual_shard_identity]: 2.27e-05 [virtual_dataset]: 2.063e-05 [get_grad_eliminate_]: 2.114e-05 [virtual_output]: 2.04e-05 [merge_forward]: 1.315e-05 [cell_reuse_recompute_pass]: 1.48e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.129e-05 [before_grad]: 3.831e-05 [inplace_validation]: 1.172e-05 [meta_fg_expand]: 1.677e-05 [inplace_validation_after_expand]: 1.483e-05 [flash_sp_send_recv_attached]: 2.35e-06 [receive_attached]: 3.05001e-06 [after_resolve]: 2.576e-05 [a_after_grad]: 3.417e-05 [special_op_eliminate]: 2.093e-05 [renormalize]: 0.00247445 [add_forward_monad_depend]: 2.92e-06 [auto_monad_grad]: 1.53e-06 [auto_monad_eliminator]: 5.082e-05 [cse]: 0.00029531 [a_3]: 0.00015491 [Cycle 2]: 0.00199425, [43] [expand_dump_flag]: 1.39e-06 [switch_simplify]: 2.291e-05 [loop_unroll]: 2.131e-05 [a_1]: 0.00068836 [recompute_prepare]: 2.065e-05 [updatestate_depend_eliminate]: 1.524e-05 [updatestate_assign_eliminate]: 1.591e-05 [updatestate_loads_eliminate]: 1.43e-05 [parameter_eliminate]: 1.85e-06 [a_2]: 0.00030429 [accelerated_algorithm]: 2.423e-05 [shard]: 9.9001e-07 [meta_shard_fg_expand]: 6.27e-06 [shard_inline]: 2.095e-05 [auto_parallel]: 1.901e-05 [parallel]: 3.41999e-06 [flash_sp]: 2.41e-06 [merge_comm]: 1.589e-05 [allreduce_fusion]: 1.329e-05 [matmul_add_comm_reduction]: 5.901e-05 [allreduce_slice_to_reducescatter]: 2.29993e-07 [virtual_shard_identity]: 2.27e-05 [virtual_dataset]: 2.117e-05 [get_grad_eliminate_]: 2.007e-05 [virtual_output]: 1.993e-05 [merge_forward]: 1.234e-05 [cell_reuse_recompute_pass]: 1.97001e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.082e-05 [before_grad]: 3.685e-05 [inplace_validation]: 1.169e-05 [meta_fg_expand]: 1.358e-05 [inplace_validation_after_expand]: 1.519e-05 [flash_sp_send_recv_attached]: 1.02e-06 [receive_attached]: 1.11001e-06 [after_resolve]: 2.432e-05 [a_after_grad]: 3.448e-05 [special_op_eliminate]: 1.976e-05 [renormalize]: 6.99947e-08 [add_forward_monad_depend]: 1.17e-06 [auto_monad_grad]: 1.04001e-06 [auto_monad_eliminator]: 3.918e-05 [cse]: 5.714e-05 [a_3]: 0.00014012 [py_interpret_to_execute_after_opt_a]: 2.033e-05 [slice_cell_reuse_recomputed_activation]: 1.86001e-06 [rewriter_after_opt_a]: 0.00036252 [convert_after_rewriter]: 1.75e-05 [order_py_execute_after_rewriter]: 1.217e-05 [opt_b]: 0.00064463, [1] [Cycle 1]: 0.00063926, [7] [b_1]: 0.0004815 [b_2]: 2.347e-05 [updatestate_depend_eliminate]: 1.352e-05 [updatestate_assign_eliminate]: 1.57e-05 [updatestate_loads_eliminate]: 1.474e-05 [renormalize]: 3.19997e-07 [cse]: 5.546e-05 [optimize_parallel_all_gather_comm]: 1.885e-05 [overlap_param_gather]: 7.39994e-07 [cconv]: 1.99e-05 [loop_unroll]: 0.00051986 [opt_after_cconv]: 0.00030459, [1] [Cycle 1]: 0.00029771, [7] [c_1]: 0.00015237 [parameter_eliminate]: 1.92999e-06 [updatestate_depend_eliminate]: 1.597e-05 [updatestate_assign_eliminate]: 1.605e-05 [updatestate_loads_eliminate]: 1.529e-05 [cse]: 5.99e-05 [renormalize]: 3.50003e-07 [remove_dup_value]: 5.76e-05 [tuple_transform]: 0.00017981, [1] [Cycle 1]: 0.00017396, [2] [d_1]: 0.00016335 [renormalize]: 4.00003e-07 [partial_unused_args_eliminate]: 1.93999e-06 [add_cache_embedding]: 2.083e-05 [add_recomputation]: 0.00012937 [cse_after_recomputation]: 6.312e-05, [1] [Cycle 1]: 5.828e-05, [1] [cse]: 5.216e-05 [environ_conv]: 1.888e-05 [swap_dp_allreduce_reducescatter]: 1.786e-05 [bias_add_comm_swap]: 1.44e-06 [label_micro_interleaved_index]: 1.28e-06 [label_fine_grained_interleaved_index]: 1.12e-06 [merge_cast_opt]: 8.99992e-07 [slice_recompute_activation]: 9.70002e-07 [micro_interleaved_order_control]: 1.09999e-06 [assign_add_opt]: 5.822e-05 [ForceFp32Comm]: 6.79996e-07 [remove_cast_before_assign_add]: 1.458e-05 [full_micro_interleaved_order_control]: 1.49e-06 [reorder_send_recv_between_fp_bp]: 1.22e-06 [comm_op_add_attrs]: 4.508e-05 [add_comm_op_reuse_tag]: 1.63e-06 [interleave_split_concat_branches]: 5.8001e-07 [interleave_parallel_branches]: 5.3001e-07 [overlap_opt_shard_in_pipeline]: 6.90008e-07 [overlap_opt_shard_grad_in_pipeline]: 1.40999e-06 [control_data_broadcast_order]: 6.69999e-07 [grouped_pairwise_exchange_alltoall]: 8.38e-06 [offloading_packed_experts]: 1.45999e-06 [overlap_recompute_and_grad_model_parallel]: 1.19999e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.19998e-07 [overlap_recompute_allgather_and_fa_grad]: 4.273e-05 [overlap_grad_ring_attention]: 1.21999e-06 [overlap_grad_flash_sp]: 2.957e-05 [begin_end_overlap_inline]: 4.49989e-07 [split_matmul_comm_elemetwise]: 1.29001e-06 [split_layernorm_comm]: 1.06001e-06 [handle_group_info]: 5.78001e-06 [symbol_engine_optimizer]: 0.00018056, [1] [Cycle 1]: 0.00017541, [6] [build]: 1.61e-05 [elim_shapecalc]: 2.776e-05 [elim_not_effective]: 4.144e-05 [opt_reshape]: 2.241e-05 [fold_const_symbol]: 3.741e-05 [renormalize]: 3.09999e-07 [pipeline_parallel_scheduler]: 1.5e-06 [auto_monad_reorder]: 8.789e-05 [get_jit_bprop_graph]: 3.50003e-07 [rewriter_after_jit_bprop_graph]: 3.69997e-07 [eliminate_special_op_node]: 0.00057081 [distribtued_split]: 1.13e-06 [validate]: 6.776e-05 [task_emit]: 0.113241 [execute]: 8.06e-06 Sums bootstrap : 0.000274s : 0.17% type_inference : 0.033235s : 20.79% auto_monad : 0.000902s : 0.56% graph_reusing : 0.000005s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000083s : 0.05% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000071s : 0.04% optimize.rewriter_before_opt_a : 0.000192s : 0.12% optimize.opt_a.expand_dump_flag : 0.000007s : 0.00% optimize.opt_a.switch_simplify : 0.000224s : 0.14% optimize.opt_a.loop_unroll : 0.000148s : 0.09% optimize.opt_a.a_1 : 0.003085s : 1.93% optimize.opt_a.recompute_prepare : 0.000048s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000114s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000034s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000030s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000626s : 0.39% optimize.opt_a.accelerated_algorithm : 0.000066s : 0.04% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000015s : 0.01% optimize.opt_a.shard_inline : 0.000043s : 0.03% optimize.opt_a.auto_parallel : 0.000037s : 0.02% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.01% optimize.opt_a.merge_comm : 0.000032s : 0.02% optimize.opt_a.allreduce_fusion : 0.000026s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000079s : 0.05% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.03% optimize.opt_a.virtual_output : 0.000040s : 0.03% optimize.opt_a.merge_forward : 0.000025s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000082s : 0.05% optimize.opt_a.before_grad : 0.000075s : 0.05% optimize.opt_a.inplace_validation : 0.000023s : 0.01% optimize.opt_a.meta_fg_expand : 0.000030s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000030s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000050s : 0.03% optimize.opt_a.a_after_grad : 0.000069s : 0.04% optimize.opt_a.special_op_eliminate : 0.000041s : 0.03% optimize.opt_a.renormalize : 0.002475s : 1.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000090s : 0.06% optimize.opt_a.cse : 0.000352s : 0.22% optimize.opt_a.a_3 : 0.000295s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000020s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000363s : 0.23% optimize.convert_after_rewriter : 0.000017s : 0.01% optimize.order_py_execute_after_rewriter : 0.000012s : 0.01% optimize.opt_b.b_1 : 0.000481s : 0.30% optimize.opt_b.b_2 : 0.000023s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000055s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000019s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000020s : 0.01% optimize.loop_unroll : 0.000520s : 0.33% optimize.opt_after_cconv.c_1 : 0.000152s : 0.10% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_after_cconv.cse : 0.000060s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000058s : 0.04% optimize.tuple_transform.d_1 : 0.000163s : 0.10% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000021s : 0.01% optimize.add_recomputation : 0.000129s : 0.08% optimize.cse_after_recomputation.cse : 0.000052s : 0.03% optimize.environ_conv : 0.000019s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000058s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000015s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000045s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000043s : 0.03% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000030s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000016s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000041s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000022s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000088s : 0.05% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000571s : 0.36% distribtued_split : 0.000001s : 0.00% validate : 0.000068s : 0.04% task_emit : 0.113241s : 70.82% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000578 300 0.86% : 0.000005s : 2: substitution.depend_value_elim 1.02% : 0.000006s : 18: substitution.elim_not_effective 0.97% : 0.000006s : 18: substitution.fold_const_symbol 2.62% : 0.000015s : 21: substitution.graph_param_transform 59.44% : 0.000344s : 15: substitution.inline 2.63% : 0.000015s : 36: substitution.j_node_and_user_rematch 3.21% : 0.000019s : 2: substitution.less_batch_normalization 2.94% : 0.000017s : 30: substitution.load_eliminater 0.83% : 0.000005s : 6: substitution.reduce_all_const_elim 3.45% : 0.000020s : 36: substitution.remove_not_recompute_node 0.86% : 0.000005s : 6: substitution.replace_old_param 2.29% : 0.000013s : 4: substitution.switch_simplify 3.52% : 0.000020s : 6: substitution.tuple_list_get_item_eliminator 8.14% : 0.000047s : 44: substitution.updatestate_pure_node_eliminater 7.23% : 0.000042s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.033174 2 92.50% : 0.030686s : 1: type_inference.infer 7.50% : 0.002488s : 1: type_inference.specialize ------[replace.] 0.000206 25 52.79% : 0.000109s : 15: replace.inline 30.45% : 0.000063s : 4: replace.switch_simplify 16.76% : 0.000034s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000362 25 92.59% : 0.000335s : 15: match.inline 2.76% : 0.000010s : 4: match.switch_simplify 4.65% : 0.000017s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000953 6511 1.05% : 0.000010s : 72: predicate.accumulaten_eliminater 0.71% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.58% : 0.000006s : 42: predicate.addn_check_dump 1.01% : 0.000010s : 72: predicate.addn_zero_filter 0.97% : 0.000009s : 72: predicate.adjust_all_reduce_mul_add 2.05% : 0.000020s : 114: predicate.arithmetic_simplify 1.56% : 0.000015s : 72: predicate.cast_eliminate 0.60% : 0.000006s : 42: predicate.check_bprop_eliminate 0.58% : 0.000006s : 42: predicate.compare_switch_simplify 0.16% : 0.000002s : 21: predicate.const_output_eliminate 0.34% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.49% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.60% : 0.000006s : 42: predicate.depend_value_elim 1.07% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.14% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.13% : 0.000011s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.37% : 0.000003s : 21: predicate.elim_shapecalc_of_broadcastargs 1.31% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.29% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.27% : 0.000012s : 93: predicate.environ_get_depend_swap 1.93% : 0.000018s : 135: predicate.environ_get_eliminate 1.38% : 0.000013s : 93: predicate.environ_get_set_eliminate 1.32% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.70% : 0.000016s : 93: predicate.float_depend_g_call 0.58% : 0.000006s : 42: predicate.float_environ_get_switch 0.88% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.65% : 0.000006s : 42: predicate.get_grad_eliminate 0.20% : 0.000002s : 21: predicate.graph_param_transform 0.60% : 0.000006s : 42: predicate.incorporate_call 0.58% : 0.000006s : 42: predicate.incorporate_call_switch 5.52% : 0.000053s : 291: predicate.inline 0.77% : 0.000007s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.76% : 0.000007s : 42: predicate.less_batch_normalization 1.80% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.91% : 0.000028s : 192: predicate.load_eliminater 0.76% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.80% : 0.000017s : 110: predicate.loop_unroll_before_grad 1.71% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.59% : 0.000006s : 42: predicate.merge_addn 0.59% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.60% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.97% : 0.000009s : 72: predicate.minmaximum_grad 0.39% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000003s : 21: predicate.opt_reshape 0.34% : 0.000003s : 21: predicate.parallel_virtual_node 1.82% : 0.000017s : 93: predicate.partial_defer_inline 1.57% : 0.000015s : 99: predicate.partial_eliminate 1.07% : 0.000010s : 72: predicate.print_const_string_wrapper 0.67% : 0.000006s : 42: predicate.reduce_all_const_elim 1.24% : 0.000012s : 72: predicate.reduce_eliminate 0.34% : 0.000003s : 42: predicate.remove_not_recompute_node 1.13% : 0.000011s : 120: predicate.replace_applicator 0.34% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.02% : 0.000010s : 72: predicate.reshape_eliminate 0.61% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.34% : 0.000003s : 21: predicate.row_tensor_eliminate 0.79% : 0.000007s : 42: predicate.same_eliminate 0.37% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.67% : 0.000006s : 42: predicate.shard_identity_eliminate 1.02% : 0.000010s : 63: predicate.special_op_eliminate 0.69% : 0.000007s : 42: predicate.specialize_transform 0.69% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.72% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.72% : 0.000026s : 192: predicate.stopgrad_eliminater 0.34% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.45% : 0.000014s : 93: predicate.switch_defer_inline 1.96% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.58% : 0.000044s : 253: predicate.switch_simplify 1.01% : 0.000010s : 72: predicate.tile_eliminate 1.01% : 0.000010s : 72: predicate.transpose_eliminate 1.76% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.73% : 0.000016s : 114: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000016s : 114: predicate.tuple_list_get_item_depend_reorder 2.62% : 0.000025s : 162: predicate.tuple_list_get_item_eliminator 1.65% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.39% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.72% : 0.000016s : 120: predicate.tuple_to_list_eliminator_ 2.83% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.73% : 0.000036s : 234: predicate.updatestate_useless_node_eliminater 0.37% : 0.000004s : 21: predicate.value_based_eliminate 0.64% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.62% : 0.000006s : 42: predicate.virtual_output_eliminate 0.34% : 0.000003s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002647 41 56.72% : 0.001501s : 24: func_graph_cloner_run.FuncGraphClonerGraph 43.28% : 0.001146s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.187836 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000025s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000134s : 1: add_recomputation 0.03% : 0.000062s : 1: assign_add_opt 0.49% : 0.000924s : 1: auto_monad 0.05% : 0.000096s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.16% : 0.000298s : 1: bootstrap 0.01% : 0.000024s : 1: cconv 0.03% : 0.000050s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000022s : 1: convert_after_rewriter 0.04% : 0.000066s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.31% : 0.000584s : 1: eliminate_special_op_node 0.01% : 0.000023s : 1: environ_conv 0.01% : 0.000015s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000011s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000009s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.28% : 0.000529s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000032s : 1: opt.transform.loop_unroll_optimizer 2.63% : 0.004946s : 80: opt.transform.opt_a 0.08% : 0.000151s : 1: opt.transform.opt_after_cconv 0.26% : 0.000484s : 27: opt.transform.opt_b 0.09% : 0.000161s : 1: opt.transform.opt_trans_graph 0.04% : 0.000075s : 3: opt.transform.special_op_eliminate 0.07% : 0.000124s : 4: opt.transform.symbol_engine_opt 6.41% : 0.012038s : 1: opt_a 0.16% : 0.000309s : 1: opt_after_cconv 0.35% : 0.000648s : 1: opt_b 8.18% : 0.015370s : 1: optimize 0.01% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000016s : 1: order_py_execute_after_rewriter 0.02% : 0.000034s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.03% : 0.000047s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000092s : 1: pre_auto_parallel 0.04% : 0.000077s : 1: py_interpret_to_execute 0.01% : 0.000025s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000018s : 1: remove_cast_before_assign_add 0.03% : 0.000063s : 1: remove_dup_value 0.68% : 0.001270s : 1: renormalize.infer 0.64% : 0.001197s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.20% : 0.000369s : 1: rewriter_after_opt_a 0.11% : 0.000198s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000022s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000184s : 1: symbol_engine_optimizer 60.30% : 0.113265s : 1: task_emit 0.10% : 0.000184s : 1: tuple_transform 17.71% : 0.033258s : 1: type_inference 0.06% : 0.000112s : 1: validate TotalTime = 0.166608, [21] [bootstrap]: 0.00031155 [type_inference]: 0.0351961 [auto_monad]: 0.00089766 [graph_reusing]: 6.65001e-06 [inline]: 1.65e-06 [parallel-infer-symbol]: 2.48e-06 [pre_auto_parallel]: 9.092e-05 [insert-virtual-dataset]: 3.22e-06 [parallel-infer-symbol-second]: 6.19999e-07 [dataset_repeat_opt]: 1.85e-06 [pipeline_split]: 1.54e-06 [optimize]: 0.0155203, [52] [py_interpret_to_execute]: 7.076e-05 [rewriter_before_opt_a]: 0.00019881 [opt_a]: 0.0121186, [2] [Cycle 1]: 0.00690629, [43] [expand_dump_flag]: 6.38999e-06 [switch_simplify]: 0.00021357 [loop_unroll]: 7.883e-05 [a_1]: 0.0024254 [recompute_prepare]: 2.744e-05 [updatestate_depend_eliminate]: 9.958e-05 [updatestate_assign_eliminate]: 1.999e-05 [updatestate_loads_eliminate]: 1.611e-05 [parameter_eliminate]: 3.94e-06 [a_2]: 0.00032504 [accelerated_algorithm]: 4.635e-05 [shard]: 2.19001e-06 [meta_shard_fg_expand]: 9.57999e-06 [shard_inline]: 2.196e-05 [auto_parallel]: 1.957e-05 [parallel]: 8.37e-06 [flash_sp]: 1.501e-05 [merge_comm]: 1.719e-05 [allreduce_fusion]: 1.36e-05 [matmul_add_comm_reduction]: 2.223e-05 [allreduce_slice_to_reducescatter]: 4.79995e-07 [virtual_shard_identity]: 2.283e-05 [virtual_dataset]: 2.096e-05 [get_grad_eliminate_]: 2.101e-05 [virtual_output]: 2.042e-05 [merge_forward]: 1.389e-05 [cell_reuse_recompute_pass]: 1.91e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.181e-05 [before_grad]: 3.834e-05 [inplace_validation]: 1.25e-05 [meta_fg_expand]: 1.683e-05 [inplace_validation_after_expand]: 1.545e-05 [flash_sp_send_recv_attached]: 3.47001e-06 [receive_attached]: 4.34001e-06 [after_resolve]: 2.648e-05 [a_after_grad]: 3.571e-05 [special_op_eliminate]: 2.176e-05 [renormalize]: 0.00237296 [add_forward_monad_depend]: 4.2e-06 [auto_monad_grad]: 2.53001e-06 [auto_monad_eliminator]: 9.954e-05 [cse]: 0.00029411 [a_3]: 0.00015467 [Cycle 2]: 0.00196092, [43] [expand_dump_flag]: 1.50999e-06 [switch_simplify]: 2.385e-05 [loop_unroll]: 2.109e-05 [a_1]: 0.00068939 [recompute_prepare]: 2.041e-05 [updatestate_depend_eliminate]: 1.639e-05 [updatestate_assign_eliminate]: 1.664e-05 [updatestate_loads_eliminate]: 1.46e-05 [parameter_eliminate]: 2.37e-06 [a_2]: 0.00030439 [accelerated_algorithm]: 2.421e-05 [shard]: 1.16999e-06 [meta_shard_fg_expand]: 6.52e-06 [shard_inline]: 2.171e-05 [auto_parallel]: 2.024e-05 [parallel]: 3.96e-06 [flash_sp]: 3.50001e-06 [merge_comm]: 1.567e-05 [allreduce_fusion]: 1.336e-05 [matmul_add_comm_reduction]: 1.9e-05 [allreduce_slice_to_reducescatter]: 2.69996e-07 [virtual_shard_identity]: 2.225e-05 [virtual_dataset]: 2.109e-05 [get_grad_eliminate_]: 1.998e-05 [virtual_output]: 1.984e-05 [merge_forward]: 1.207e-05 [cell_reuse_recompute_pass]: 2.03001e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.005e-05 [before_grad]: 3.634e-05 [inplace_validation]: 1.122e-05 [meta_fg_expand]: 1.313e-05 [inplace_validation_after_expand]: 1.514e-05 [flash_sp_send_recv_attached]: 1.05999e-06 [receive_attached]: 7.49991e-07 [after_resolve]: 2.425e-05 [a_after_grad]: 3.534e-05 [special_op_eliminate]: 2.027e-05 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 1.56999e-06 [auto_monad_grad]: 1.45e-06 [auto_monad_eliminator]: 4.318e-05 [cse]: 5.838e-05 [a_3]: 0.00014007 [py_interpret_to_execute_after_opt_a]: 2.057e-05 [slice_cell_reuse_recomputed_activation]: 2.5e-06 [rewriter_after_opt_a]: 0.00035671 [convert_after_rewriter]: 1.876e-05 [order_py_execute_after_rewriter]: 1.375e-05 [opt_b]: 0.00063771, [1] [Cycle 1]: 0.00063141, [7] [b_1]: 0.0004729 [b_2]: 2.44e-05 [updatestate_depend_eliminate]: 1.408e-05 [updatestate_assign_eliminate]: 1.594e-05 [updatestate_loads_eliminate]: 1.487e-05 [renormalize]: 3.69997e-07 [cse]: 5.542e-05 [optimize_parallel_all_gather_comm]: 1.894e-05 [overlap_param_gather]: 1.00001e-06 [cconv]: 2.857e-05 [loop_unroll]: 0.0005365 [opt_after_cconv]: 0.00030918, [1] [Cycle 1]: 0.00030212, [7] [c_1]: 0.00015308 [parameter_eliminate]: 2.53999e-06 [updatestate_depend_eliminate]: 1.73e-05 [updatestate_assign_eliminate]: 1.663e-05 [updatestate_loads_eliminate]: 1.566e-05 [cse]: 6.092e-05 [renormalize]: 4.39992e-07 [remove_dup_value]: 6.409e-05 [tuple_transform]: 0.00017848, [1] [Cycle 1]: 0.00017251, [2] [d_1]: 0.00016268 [renormalize]: 2.09999e-07 [partial_unused_args_eliminate]: 2.84e-06 [add_cache_embedding]: 2.565e-05 [add_recomputation]: 0.00014076 [cse_after_recomputation]: 5.794e-05, [1] [Cycle 1]: 5.312e-05, [1] [cse]: 4.737e-05 [environ_conv]: 1.9e-05 [swap_dp_allreduce_reducescatter]: 1.8e-05 [bias_add_comm_swap]: 2.22999e-06 [label_micro_interleaved_index]: 1.9e-06 [label_fine_grained_interleaved_index]: 2.1e-06 [merge_cast_opt]: 1.06001e-06 [slice_recompute_activation]: 1.78e-06 [micro_interleaved_order_control]: 2.02e-06 [assign_add_opt]: 5.848e-05 [ForceFp32Comm]: 9.70002e-07 [remove_cast_before_assign_add]: 1.549e-05 [full_micro_interleaved_order_control]: 2.26e-06 [reorder_send_recv_between_fp_bp]: 2.06e-06 [comm_op_add_attrs]: 5.3e-05 [add_comm_op_reuse_tag]: 2.34001e-06 [interleave_split_concat_branches]: 8.99992e-07 [interleave_parallel_branches]: 8.10003e-07 [overlap_opt_shard_in_pipeline]: 1.01e-06 [overlap_opt_shard_grad_in_pipeline]: 2.47e-06 [control_data_broadcast_order]: 1.04999e-06 [grouped_pairwise_exchange_alltoall]: 8.57e-06 [offloading_packed_experts]: 2.2e-06 [overlap_recompute_and_grad_model_parallel]: 2.02999e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.2e-07 [overlap_recompute_allgather_and_fa_grad]: 4.417e-05 [overlap_grad_ring_attention]: 2.08999e-06 [overlap_grad_flash_sp]: 2.77e-05 [begin_end_overlap_inline]: 7.89994e-07 [split_matmul_comm_elemetwise]: 2.03999e-06 [split_layernorm_comm]: 2.23001e-06 [handle_group_info]: 7.6e-06 [symbol_engine_optimizer]: 0.00018918, [1] [Cycle 1]: 0.00018422, [6] [build]: 1.817e-05 [elim_shapecalc]: 2.78e-05 [elim_not_effective]: 4.749e-05 [opt_reshape]: 2.283e-05 [fold_const_symbol]: 3.765e-05 [renormalize]: 3.09999e-07 [pipeline_parallel_scheduler]: 1.71999e-06 [auto_monad_reorder]: 0.00010566 [get_jit_bprop_graph]: 5.29995e-07 [rewriter_after_jit_bprop_graph]: 5.10001e-07 [eliminate_special_op_node]: 0.00057205 [distribtued_split]: 1.34e-06 [validate]: 7.438e-05 [task_emit]: 0.113457 [execute]: 1.172e-05 Sums bootstrap : 0.000312s : 0.19% type_inference : 0.035196s : 21.70% auto_monad : 0.000898s : 0.55% graph_reusing : 0.000007s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000091s : 0.06% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000071s : 0.04% optimize.rewriter_before_opt_a : 0.000199s : 0.12% optimize.opt_a.expand_dump_flag : 0.000008s : 0.00% optimize.opt_a.switch_simplify : 0.000237s : 0.15% optimize.opt_a.loop_unroll : 0.000100s : 0.06% optimize.opt_a.a_1 : 0.003115s : 1.92% optimize.opt_a.recompute_prepare : 0.000048s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000116s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000037s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000031s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000629s : 0.39% optimize.opt_a.accelerated_algorithm : 0.000071s : 0.04% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000044s : 0.03% optimize.opt_a.auto_parallel : 0.000040s : 0.02% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000019s : 0.01% optimize.opt_a.merge_comm : 0.000033s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000041s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000041s : 0.03% optimize.opt_a.virtual_output : 0.000040s : 0.02% optimize.opt_a.merge_forward : 0.000026s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000082s : 0.05% optimize.opt_a.before_grad : 0.000075s : 0.05% optimize.opt_a.inplace_validation : 0.000024s : 0.01% optimize.opt_a.meta_fg_expand : 0.000030s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000031s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000051s : 0.03% optimize.opt_a.a_after_grad : 0.000071s : 0.04% optimize.opt_a.special_op_eliminate : 0.000042s : 0.03% optimize.opt_a.renormalize : 0.002373s : 1.46% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000143s : 0.09% optimize.opt_a.cse : 0.000352s : 0.22% optimize.opt_a.a_3 : 0.000295s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000021s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000357s : 0.22% optimize.convert_after_rewriter : 0.000019s : 0.01% optimize.order_py_execute_after_rewriter : 0.000014s : 0.01% optimize.opt_b.b_1 : 0.000473s : 0.29% optimize.opt_b.b_2 : 0.000024s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000055s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000019s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000029s : 0.02% optimize.loop_unroll : 0.000536s : 0.33% optimize.opt_after_cconv.c_1 : 0.000153s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.cse : 0.000061s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000064s : 0.04% optimize.tuple_transform.d_1 : 0.000163s : 0.10% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000026s : 0.02% optimize.add_recomputation : 0.000141s : 0.09% optimize.cse_after_recomputation.cse : 0.000047s : 0.03% optimize.environ_conv : 0.000019s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000058s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000015s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000053s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000044s : 0.03% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000028s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000008s : 0.00% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000028s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000047s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000023s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000038s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000106s : 0.07% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000572s : 0.35% distribtued_split : 0.000001s : 0.00% validate : 0.000074s : 0.05% task_emit : 0.113457s : 69.96% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000618 300 1.11% : 0.000007s : 2: substitution.depend_value_elim 1.28% : 0.000008s : 18: substitution.elim_not_effective 1.02% : 0.000006s : 18: substitution.fold_const_symbol 2.56% : 0.000016s : 21: substitution.graph_param_transform 58.68% : 0.000363s : 15: substitution.inline 2.55% : 0.000016s : 36: substitution.j_node_and_user_rematch 3.57% : 0.000022s : 2: substitution.less_batch_normalization 2.67% : 0.000017s : 30: substitution.load_eliminater 0.94% : 0.000006s : 6: substitution.reduce_all_const_elim 3.28% : 0.000020s : 36: substitution.remove_not_recompute_node 0.97% : 0.000006s : 6: substitution.replace_old_param 2.53% : 0.000016s : 4: substitution.switch_simplify 3.88% : 0.000024s : 6: substitution.tuple_list_get_item_eliminator 8.06% : 0.000050s : 44: substitution.updatestate_pure_node_eliminater 6.90% : 0.000043s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.035133 2 92.71% : 0.032570s : 1: type_inference.infer 7.29% : 0.002563s : 1: type_inference.specialize ------[replace.] 0.000212 25 52.15% : 0.000111s : 15: replace.inline 30.83% : 0.000065s : 4: replace.switch_simplify 17.02% : 0.000036s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000386 25 91.37% : 0.000353s : 15: match.inline 3.32% : 0.000013s : 4: match.switch_simplify 5.31% : 0.000021s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000981 6511 0.96% : 0.000009s : 72: predicate.accumulaten_eliminater 0.65% : 0.000006s : 21: predicate.ad_related_special_op_eliminate 0.56% : 0.000005s : 42: predicate.addn_check_dump 0.97% : 0.000010s : 72: predicate.addn_zero_filter 0.98% : 0.000010s : 72: predicate.adjust_all_reduce_mul_add 2.10% : 0.000021s : 114: predicate.arithmetic_simplify 1.01% : 0.000010s : 72: predicate.cast_eliminate 0.59% : 0.000006s : 42: predicate.check_bprop_eliminate 0.57% : 0.000006s : 42: predicate.compare_switch_simplify 0.15% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.45% : 0.000014s : 78: predicate.convert_tensor_eliminate 0.58% : 0.000006s : 42: predicate.depend_value_elim 1.18% : 0.000012s : 72: predicate.dict_get_item_const_eliminator 1.16% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.07% : 0.000010s : 72: predicate.dict_set_item_eliminator 0.18% : 0.000002s : 21: predicate.elim_not_effective 0.36% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.34% : 0.000013s : 93: predicate.environ_add_const_eliminate 1.24% : 0.000012s : 93: predicate.environ_get_add_eliminate 1.25% : 0.000012s : 93: predicate.environ_get_depend_swap 1.88% : 0.000018s : 135: predicate.environ_get_eliminate 1.25% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.28% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.61% : 0.000016s : 93: predicate.float_depend_g_call 0.58% : 0.000006s : 42: predicate.float_environ_get_switch 0.84% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.61% : 0.000006s : 42: predicate.get_grad_eliminate 0.21% : 0.000002s : 21: predicate.graph_param_transform 0.59% : 0.000006s : 42: predicate.incorporate_call 0.56% : 0.000005s : 42: predicate.incorporate_call_switch 5.42% : 0.000053s : 291: predicate.inline 0.78% : 0.000008s : 42: predicate.inline_without_move 0.31% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.73% : 0.000007s : 42: predicate.less_batch_normalization 1.73% : 0.000017s : 120: predicate.list_to_tuple_eliminator_ 2.77% : 0.000027s : 192: predicate.load_eliminater 0.74% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.64% : 0.000016s : 110: predicate.loop_unroll_before_grad 1.72% : 0.000017s : 114: predicate.make_slice_get_slice_eliminator 0.58% : 0.000006s : 42: predicate.merge_addn 0.56% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.58% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.95% : 0.000009s : 72: predicate.minmaximum_grad 0.39% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000003s : 21: predicate.opt_reshape 0.34% : 0.000003s : 21: predicate.parallel_virtual_node 1.78% : 0.000017s : 93: predicate.partial_defer_inline 1.52% : 0.000015s : 99: predicate.partial_eliminate 1.00% : 0.000010s : 72: predicate.print_const_string_wrapper 0.64% : 0.000006s : 42: predicate.reduce_all_const_elim 1.19% : 0.000012s : 72: predicate.reduce_eliminate 0.34% : 0.000003s : 42: predicate.remove_not_recompute_node 1.11% : 0.000011s : 120: predicate.replace_applicator 0.33% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.00% : 0.000010s : 72: predicate.reshape_eliminate 0.57% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.37% : 0.000004s : 21: predicate.row_tensor_eliminate 0.82% : 0.000008s : 42: predicate.same_eliminate 0.36% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.65% : 0.000006s : 42: predicate.shard_identity_eliminate 0.99% : 0.000010s : 63: predicate.special_op_eliminate 0.69% : 0.000007s : 42: predicate.specialize_transform 0.69% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.72% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.64% : 0.000026s : 192: predicate.stopgrad_eliminater 0.33% : 0.000003s : 21: predicate.switch_call_monad_eliminater 1.43% : 0.000014s : 93: predicate.switch_defer_inline 1.98% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.60% : 0.000045s : 253: predicate.switch_simplify 1.00% : 0.000010s : 72: predicate.tile_eliminate 0.98% : 0.000010s : 72: predicate.transpose_eliminate 1.82% : 0.000018s : 114: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000017s : 114: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.64% : 0.000026s : 162: predicate.tuple_list_get_item_eliminator 4.06% : 0.000040s : 114: predicate.tuple_list_get_set_item_eliminator 2.29% : 0.000022s : 156: predicate.tuple_list_set_item_eliminator 1.84% : 0.000018s : 120: predicate.tuple_to_list_eliminator_ 2.80% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.65% : 0.000036s : 234: predicate.updatestate_useless_node_eliminater 0.34% : 0.000003s : 21: predicate.value_based_eliminate 0.60% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.61% : 0.000006s : 42: predicate.virtual_output_eliminate 0.35% : 0.000003s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.003004 41 61.96% : 0.001861s : 24: func_graph_cloner_run.FuncGraphClonerGraph 38.04% : 0.001143s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.190390 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000029s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000146s : 1: add_recomputation 0.03% : 0.000062s : 1: assign_add_opt 0.48% : 0.000922s : 1: auto_monad 0.06% : 0.000114s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.18% : 0.000338s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000058s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000023s : 1: convert_after_rewriter 0.03% : 0.000061s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.31% : 0.000586s : 1: eliminate_special_op_node 0.01% : 0.000023s : 1: environ_conv 0.01% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000013s : 1: graph_reusing 0.01% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000011s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.29% : 0.000547s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000033s : 1: opt.transform.loop_unroll_optimizer 2.60% : 0.004948s : 80: opt.transform.opt_a 0.08% : 0.000151s : 1: opt.transform.opt_after_cconv 0.25% : 0.000477s : 27: opt.transform.opt_b 0.08% : 0.000160s : 1: opt.transform.opt_trans_graph 0.04% : 0.000078s : 3: opt.transform.special_op_eliminate 0.07% : 0.000131s : 4: opt.transform.symbol_engine_opt 6.37% : 0.012122s : 1: opt_a 0.16% : 0.000314s : 1: opt_after_cconv 0.34% : 0.000641s : 1: opt_b 8.16% : 0.015528s : 1: optimize 0.01% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.02% : 0.000031s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.03% : 0.000049s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.05% : 0.000098s : 1: pre_auto_parallel 0.04% : 0.000077s : 1: py_interpret_to_execute 0.01% : 0.000025s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000018s : 1: remove_cast_before_assign_add 0.04% : 0.000070s : 1: remove_dup_value 0.64% : 0.001215s : 1: renormalize.infer 0.60% : 0.001149s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000363s : 1: rewriter_after_opt_a 0.11% : 0.000204s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000021s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000193s : 1: symbol_engine_optimizer 59.64% : 0.113551s : 1: task_emit 0.10% : 0.000182s : 1: tuple_transform 18.50% : 0.035219s : 1: type_inference 0.06% : 0.000123s : 1: validate TotalTime = 0.171319, [21] [bootstrap]: 0.00029638 [type_inference]: 0.036298 [auto_monad]: 0.00091674 [graph_reusing]: 6.37e-06 [inline]: 1.44e-06 [parallel-infer-symbol]: 2.77e-06 [pre_auto_parallel]: 9.37e-05 [insert-virtual-dataset]: 2.81e-06 [parallel-infer-symbol-second]: 6.69999e-07 [dataset_repeat_opt]: 1.64e-06 [pipeline_split]: 1.64e-06 [optimize]: 0.0171003, [52] [py_interpret_to_execute]: 7.299e-05 [rewriter_before_opt_a]: 0.00023257 [opt_a]: 0.0133199, [2] [Cycle 1]: 0.0076577, [43] [expand_dump_flag]: 7.12e-06 [switch_simplify]: 0.00023773 [loop_unroll]: 9.757e-05 [a_1]: 0.00287526 [recompute_prepare]: 3.269e-05 [updatestate_depend_eliminate]: 0.00010212 [updatestate_assign_eliminate]: 2.159e-05 [updatestate_loads_eliminate]: 1.827e-05 [parameter_eliminate]: 4.12e-06 [a_2]: 0.00039242 [accelerated_algorithm]: 5.125e-05 [shard]: 2.46e-06 [meta_shard_fg_expand]: 9.69e-06 [shard_inline]: 2.641e-05 [auto_parallel]: 2.056e-05 [parallel]: 9.05999e-06 [flash_sp]: 1.475e-05 [merge_comm]: 1.862e-05 [allreduce_fusion]: 1.55e-05 [matmul_add_comm_reduction]: 2.298e-05 [allreduce_slice_to_reducescatter]: 4.79995e-07 [virtual_shard_identity]: 2.748e-05 [virtual_dataset]: 2.524e-05 [get_grad_eliminate_]: 2.519e-05 [virtual_output]: 2.433e-05 [merge_forward]: 1.513e-05 [cell_reuse_recompute_pass]: 2.1e-06 [cell_reuse_handle_not_recompute_node_pass]: 5.368e-05 [before_grad]: 4.855e-05 [inplace_validation]: 1.319e-05 [meta_fg_expand]: 2.059e-05 [inplace_validation_after_expand]: 1.734e-05 [flash_sp_send_recv_attached]: 3.15999e-06 [receive_attached]: 5.09e-06 [after_resolve]: 3.268e-05 [a_after_grad]: 4.343e-05 [special_op_eliminate]: 2.595e-05 [renormalize]: 0.00247346 [add_forward_monad_depend]: 4.3e-06 [auto_monad_grad]: 2.09e-06 [auto_monad_eliminator]: 6.383e-05 [cse]: 0.00027973 [a_3]: 0.00018501 [Cycle 2]: 0.00236887, [43] [expand_dump_flag]: 1.51999e-06 [switch_simplify]: 2.828e-05 [loop_unroll]: 2.543e-05 [a_1]: 0.0008678 [recompute_prepare]: 2.545e-05 [updatestate_depend_eliminate]: 1.739e-05 [updatestate_assign_eliminate]: 1.735e-05 [updatestate_loads_eliminate]: 1.591e-05 [parameter_eliminate]: 2.41e-06 [a_2]: 0.00038078 [accelerated_algorithm]: 2.984e-05 [shard]: 1.12e-06 [meta_shard_fg_expand]: 7.66e-06 [shard_inline]: 2.727e-05 [auto_parallel]: 2.175e-05 [parallel]: 4.11e-06 [flash_sp]: 3.9e-06 [merge_comm]: 1.845e-05 [allreduce_fusion]: 1.559e-05 [matmul_add_comm_reduction]: 2.11e-05 [allreduce_slice_to_reducescatter]: 2.59999e-07 [virtual_shard_identity]: 2.685e-05 [virtual_dataset]: 2.54e-05 [get_grad_eliminate_]: 2.413e-05 [virtual_output]: 2.415e-05 [merge_forward]: 1.308e-05 [cell_reuse_recompute_pass]: 2.32e-06 [cell_reuse_handle_not_recompute_node_pass]: 5.272e-05 [before_grad]: 4.702e-05 [inplace_validation]: 1.253e-05 [meta_fg_expand]: 1.621e-05 [inplace_validation_after_expand]: 1.724e-05 [flash_sp_send_recv_attached]: 1.03e-06 [receive_attached]: 8.30012e-07 [after_resolve]: 3.017e-05 [a_after_grad]: 4.303e-05 [special_op_eliminate]: 2.406e-05 [renormalize]: 7.99919e-08 [add_forward_monad_depend]: 1.91e-06 [auto_monad_grad]: 1.51001e-06 [auto_monad_eliminator]: 4.607e-05 [cse]: 5.779e-05 [a_3]: 0.00016768 [py_interpret_to_execute_after_opt_a]: 2.123e-05 [slice_cell_reuse_recomputed_activation]: 2.37001e-06 [rewriter_after_opt_a]: 0.00037852 [convert_after_rewriter]: 2.025e-05 [order_py_execute_after_rewriter]: 1.366e-05 [opt_b]: 0.00074491, [1] [Cycle 1]: 0.0007386, [7] [b_1]: 0.00056753 [b_2]: 2.831e-05 [updatestate_depend_eliminate]: 1.532e-05 [updatestate_assign_eliminate]: 1.724e-05 [updatestate_loads_eliminate]: 1.606e-05 [renormalize]: 4.29995e-07 [cse]: 5.665e-05 [optimize_parallel_all_gather_comm]: 1.966e-05 [overlap_param_gather]: 1.46001e-06 [cconv]: 2.817e-05 [loop_unroll]: 0.00055166 [opt_after_cconv]: 0.00034964, [1] [Cycle 1]: 0.00034229, [7] [c_1]: 0.0001875 [parameter_eliminate]: 2.48001e-06 [updatestate_depend_eliminate]: 1.815e-05 [updatestate_assign_eliminate]: 1.77e-05 [updatestate_loads_eliminate]: 1.651e-05 [cse]: 6.108e-05 [renormalize]: 4.60001e-07 [remove_dup_value]: 6.771e-05 [tuple_transform]: 0.00021841, [1] [Cycle 1]: 0.00021192, [2] [d_1]: 0.00020053 [renormalize]: 2.29993e-07 [partial_unused_args_eliminate]: 2.89e-06 [add_cache_embedding]: 2.473e-05 [add_recomputation]: 0.00014985 [cse_after_recomputation]: 6.203e-05, [1] [Cycle 1]: 5.625e-05, [1] [cse]: 4.946e-05 [environ_conv]: 2.016e-05 [swap_dp_allreduce_reducescatter]: 1.982e-05 [bias_add_comm_swap]: 2.45999e-06 [label_micro_interleaved_index]: 1.84e-06 [label_fine_grained_interleaved_index]: 2.23999e-06 [merge_cast_opt]: 1.11999e-06 [slice_recompute_activation]: 1.86999e-06 [micro_interleaved_order_control]: 1.86999e-06 [assign_add_opt]: 9.01e-05 [ForceFp32Comm]: 1.21999e-06 [remove_cast_before_assign_add]: 1.792e-05 [full_micro_interleaved_order_control]: 2.14e-06 [reorder_send_recv_between_fp_bp]: 2.07001e-06 [comm_op_add_attrs]: 5.594e-05 [add_comm_op_reuse_tag]: 2.16e-06 [interleave_split_concat_branches]: 8.80013e-07 [interleave_parallel_branches]: 8.9001e-07 [overlap_opt_shard_in_pipeline]: 1.26001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.70999e-06 [control_data_broadcast_order]: 1.05001e-06 [grouped_pairwise_exchange_alltoall]: 1.003e-05 [offloading_packed_experts]: 2.44001e-06 [overlap_recompute_and_grad_model_parallel]: 2.32999e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.40009e-07 [overlap_recompute_allgather_and_fa_grad]: 4.424e-05 [overlap_grad_ring_attention]: 2.04e-06 [overlap_grad_flash_sp]: 3.433e-05 [begin_end_overlap_inline]: 9.10004e-07 [split_matmul_comm_elemetwise]: 2.07e-06 [split_layernorm_comm]: 1.89e-06 [handle_group_info]: 7.29e-06 [symbol_engine_optimizer]: 0.00021936, [1] [Cycle 1]: 0.00021301, [6] [build]: 1.81e-05 [elim_shapecalc]: 3.315e-05 [elim_not_effective]: 5.649e-05 [opt_reshape]: 2.642e-05 [fold_const_symbol]: 4.719e-05 [renormalize]: 4.69998e-07 [pipeline_parallel_scheduler]: 1.65e-06 [auto_monad_reorder]: 0.00010751 [get_jit_bprop_graph]: 5.19998e-07 [rewriter_after_jit_bprop_graph]: 4.50003e-07 [eliminate_special_op_node]: 0.00060767 [distribtued_split]: 1.69e-06 [validate]: 7.766e-05 [task_emit]: 0.115482 [execute]: 1.201e-05 Sums bootstrap : 0.000296s : 0.18% type_inference : 0.036298s : 21.76% auto_monad : 0.000917s : 0.55% graph_reusing : 0.000006s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000094s : 0.06% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000073s : 0.04% optimize.rewriter_before_opt_a : 0.000233s : 0.14% optimize.opt_a.expand_dump_flag : 0.000009s : 0.01% optimize.opt_a.switch_simplify : 0.000266s : 0.16% optimize.opt_a.loop_unroll : 0.000123s : 0.07% optimize.opt_a.a_1 : 0.003743s : 2.24% optimize.opt_a.recompute_prepare : 0.000058s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000120s : 0.07% optimize.opt_a.updatestate_assign_eliminate : 0.000039s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000034s : 0.02% optimize.opt_a.parameter_eliminate : 0.000007s : 0.00% optimize.opt_a.a_2 : 0.000773s : 0.46% optimize.opt_a.accelerated_algorithm : 0.000081s : 0.05% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000017s : 0.01% optimize.opt_a.shard_inline : 0.000054s : 0.03% optimize.opt_a.auto_parallel : 0.000042s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000019s : 0.01% optimize.opt_a.merge_comm : 0.000037s : 0.02% optimize.opt_a.allreduce_fusion : 0.000031s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000044s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000054s : 0.03% optimize.opt_a.virtual_dataset : 0.000051s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000049s : 0.03% optimize.opt_a.virtual_output : 0.000048s : 0.03% optimize.opt_a.merge_forward : 0.000028s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000106s : 0.06% optimize.opt_a.before_grad : 0.000096s : 0.06% optimize.opt_a.inplace_validation : 0.000026s : 0.02% optimize.opt_a.meta_fg_expand : 0.000037s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000035s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000063s : 0.04% optimize.opt_a.a_after_grad : 0.000086s : 0.05% optimize.opt_a.special_op_eliminate : 0.000050s : 0.03% optimize.opt_a.renormalize : 0.002474s : 1.48% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000110s : 0.07% optimize.opt_a.cse : 0.000338s : 0.20% optimize.opt_a.a_3 : 0.000353s : 0.21% optimize.py_interpret_to_execute_after_opt_a : 0.000021s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000379s : 0.23% optimize.convert_after_rewriter : 0.000020s : 0.01% optimize.order_py_execute_after_rewriter : 0.000014s : 0.01% optimize.opt_b.b_1 : 0.000568s : 0.34% optimize.opt_b.b_2 : 0.000028s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000015s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000017s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000057s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000020s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000028s : 0.02% optimize.loop_unroll : 0.000552s : 0.33% optimize.opt_after_cconv.c_1 : 0.000188s : 0.11% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000018s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000018s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.cse : 0.000061s : 0.04% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000068s : 0.04% optimize.tuple_transform.d_1 : 0.000201s : 0.12% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000025s : 0.01% optimize.add_recomputation : 0.000150s : 0.09% optimize.cse_after_recomputation.cse : 0.000049s : 0.03% optimize.environ_conv : 0.000020s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000020s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000090s : 0.05% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000018s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000056s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000044s : 0.03% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000034s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000033s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000056s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000026s : 0.02% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000047s : 0.03% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000108s : 0.06% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000608s : 0.36% distribtued_split : 0.000002s : 0.00% validate : 0.000078s : 0.05% task_emit : 0.115482s : 69.23% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000678 300 1.13% : 0.000008s : 2: substitution.depend_value_elim 1.50% : 0.000010s : 18: substitution.elim_not_effective 1.20% : 0.000008s : 18: substitution.fold_const_symbol 2.91% : 0.000020s : 21: substitution.graph_param_transform 53.90% : 0.000365s : 15: substitution.inline 3.15% : 0.000021s : 36: substitution.j_node_and_user_rematch 3.39% : 0.000023s : 2: substitution.less_batch_normalization 3.01% : 0.000020s : 30: substitution.load_eliminater 0.98% : 0.000007s : 6: substitution.reduce_all_const_elim 4.33% : 0.000029s : 36: substitution.remove_not_recompute_node 1.05% : 0.000007s : 6: substitution.replace_old_param 2.43% : 0.000016s : 4: substitution.switch_simplify 3.80% : 0.000026s : 6: substitution.tuple_list_get_item_eliminator 8.47% : 0.000057s : 44: substitution.updatestate_pure_node_eliminater 8.77% : 0.000059s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.036232 2 92.51% : 0.033518s : 1: type_inference.infer 7.49% : 0.002713s : 1: type_inference.specialize ------[replace.] 0.000214 25 52.42% : 0.000112s : 15: replace.inline 30.99% : 0.000066s : 4: replace.switch_simplify 16.59% : 0.000035s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000391 25 90.90% : 0.000355s : 15: match.inline 3.47% : 0.000014s : 4: match.switch_simplify 5.64% : 0.000022s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.001125 6511 1.00% : 0.000011s : 72: predicate.accumulaten_eliminater 0.68% : 0.000008s : 21: predicate.ad_related_special_op_eliminate 0.60% : 0.000007s : 42: predicate.addn_check_dump 1.00% : 0.000011s : 72: predicate.addn_zero_filter 0.98% : 0.000011s : 72: predicate.adjust_all_reduce_mul_add 2.13% : 0.000024s : 114: predicate.arithmetic_simplify 1.05% : 0.000012s : 72: predicate.cast_eliminate 0.63% : 0.000007s : 42: predicate.check_bprop_eliminate 0.58% : 0.000007s : 42: predicate.compare_switch_simplify 0.17% : 0.000002s : 21: predicate.const_output_eliminate 0.32% : 0.000004s : 21: predicate.convert_tensor_all_eliminate 1.41% : 0.000016s : 78: predicate.convert_tensor_eliminate 0.61% : 0.000007s : 42: predicate.depend_value_elim 1.05% : 0.000012s : 72: predicate.dict_get_item_const_eliminator 1.08% : 0.000012s : 72: predicate.dict_get_item_eliminator 1.06% : 0.000012s : 72: predicate.dict_set_item_eliminator 0.19% : 0.000002s : 21: predicate.elim_not_effective 0.39% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.28% : 0.000014s : 93: predicate.environ_add_const_eliminate 1.28% : 0.000014s : 93: predicate.environ_get_add_eliminate 1.29% : 0.000015s : 93: predicate.environ_get_depend_swap 1.89% : 0.000021s : 135: predicate.environ_get_eliminate 1.29% : 0.000015s : 93: predicate.environ_get_set_eliminate 1.33% : 0.000015s : 93: predicate.exchange_switch_depend_value 1.72% : 0.000019s : 93: predicate.float_depend_g_call 0.59% : 0.000007s : 42: predicate.float_environ_get_switch 0.89% : 0.000010s : 63: predicate.float_tuple_getitem_switch 0.17% : 0.000002s : 21: predicate.fold_const_symbol 0.61% : 0.000007s : 42: predicate.get_grad_eliminate 0.21% : 0.000002s : 21: predicate.graph_param_transform 0.64% : 0.000007s : 42: predicate.incorporate_call 0.59% : 0.000007s : 42: predicate.incorporate_call_switch 6.09% : 0.000068s : 291: predicate.inline 0.87% : 0.000010s : 42: predicate.inline_without_move 0.34% : 0.000004s : 42: predicate.j_node_and_user_rematch 0.73% : 0.000008s : 42: predicate.less_batch_normalization 1.75% : 0.000020s : 120: predicate.list_to_tuple_eliminator_ 2.87% : 0.000032s : 192: predicate.load_eliminater 0.69% : 0.000008s : 21: predicate.loop_unroll_after_grad 1.72% : 0.000019s : 110: predicate.loop_unroll_before_grad 1.72% : 0.000019s : 114: predicate.make_slice_get_slice_eliminator 0.61% : 0.000007s : 42: predicate.merge_addn 0.60% : 0.000007s : 42: predicate.micro_step_allgather_replace 0.60% : 0.000007s : 42: predicate.mini_step_allgather_replace 1.00% : 0.000011s : 72: predicate.minmaximum_grad 0.39% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000004s : 21: predicate.opt_reshape 0.36% : 0.000004s : 21: predicate.parallel_virtual_node 1.81% : 0.000020s : 93: predicate.partial_defer_inline 1.64% : 0.000018s : 99: predicate.partial_eliminate 1.01% : 0.000011s : 72: predicate.print_const_string_wrapper 0.66% : 0.000007s : 42: predicate.reduce_all_const_elim 1.18% : 0.000013s : 72: predicate.reduce_eliminate 0.36% : 0.000004s : 42: predicate.remove_not_recompute_node 1.15% : 0.000013s : 120: predicate.replace_applicator 0.35% : 0.000004s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 1.08% : 0.000012s : 72: predicate.reshape_eliminate 0.63% : 0.000007s : 42: predicate.row_tensor_add_zeros_like 0.35% : 0.000004s : 21: predicate.row_tensor_eliminate 0.81% : 0.000009s : 42: predicate.same_eliminate 0.39% : 0.000004s : 46: predicate.set_cell_output_no_recompute 0.68% : 0.000008s : 42: predicate.shard_identity_eliminate 0.98% : 0.000011s : 63: predicate.special_op_eliminate 0.78% : 0.000009s : 42: predicate.specialize_transform 0.70% : 0.000008s : 42: predicate.split_environ_get_set_with_tuple_value 0.68% : 0.000008s : 42: predicate.stack_unstack_eliminate 2.71% : 0.000030s : 192: predicate.stopgrad_eliminater 0.35% : 0.000004s : 21: predicate.switch_call_monad_eliminater 1.44% : 0.000016s : 93: predicate.switch_defer_inline 1.98% : 0.000022s : 135: predicate.switch_layer_defer_inline 4.54% : 0.000051s : 253: predicate.switch_simplify 1.07% : 0.000012s : 72: predicate.tile_eliminate 1.05% : 0.000012s : 72: predicate.transpose_eliminate 1.78% : 0.000020s : 114: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000020s : 114: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000018s : 114: predicate.tuple_list_get_item_depend_reorder 2.64% : 0.000030s : 162: predicate.tuple_list_get_item_eliminator 1.64% : 0.000018s : 114: predicate.tuple_list_get_set_item_eliminator 2.38% : 0.000027s : 156: predicate.tuple_list_set_item_eliminator 1.76% : 0.000020s : 120: predicate.tuple_to_list_eliminator_ 2.88% : 0.000032s : 192: predicate.updatestate_pure_node_eliminater 3.68% : 0.000041s : 234: predicate.updatestate_useless_node_eliminater 0.34% : 0.000004s : 21: predicate.value_based_eliminate 0.63% : 0.000007s : 42: predicate.virtual_dataset_eliminate 0.61% : 0.000007s : 42: predicate.virtual_output_eliminate 0.37% : 0.000004s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002828 41 59.24% : 0.001675s : 24: func_graph_cloner_run.FuncGraphClonerGraph 40.76% : 0.001153s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.198007 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.01% : 0.000029s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000155s : 1: add_recomputation 0.05% : 0.000095s : 1: assign_add_opt 0.48% : 0.000941s : 1: auto_monad 0.06% : 0.000116s : 1: auto_monad_reorder 0.00% : 0.000006s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.16% : 0.000323s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000061s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000025s : 1: convert_after_rewriter 0.03% : 0.000066s : 1: cse_after_recomputation 0.00% : 0.000007s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.31% : 0.000623s : 1: eliminate_special_op_node 0.01% : 0.000025s : 1: environ_conv 0.01% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000014s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000011s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.28% : 0.000562s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000039s : 1: opt.transform.loop_unroll_optimizer 3.01% : 0.005965s : 80: opt.transform.opt_a 0.09% : 0.000186s : 1: opt.transform.opt_after_cconv 0.29% : 0.000571s : 27: opt.transform.opt_b 0.10% : 0.000198s : 1: opt.transform.opt_trans_graph 0.05% : 0.000091s : 3: opt.transform.special_op_eliminate 0.08% : 0.000157s : 4: opt.transform.symbol_engine_opt 6.73% : 0.013324s : 1: opt_a 0.18% : 0.000354s : 1: opt_after_cconv 0.38% : 0.000748s : 1: opt_b 8.64% : 0.017110s : 1: optimize 0.01% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.02% : 0.000038s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.02% : 0.000049s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.05% : 0.000103s : 1: pre_auto_parallel 0.04% : 0.000079s : 1: py_interpret_to_execute 0.01% : 0.000026s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000021s : 1: remove_cast_before_assign_add 0.04% : 0.000074s : 1: remove_dup_value 0.65% : 0.001286s : 1: renormalize.infer 0.59% : 0.001178s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000386s : 1: rewriter_after_opt_a 0.12% : 0.000239s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000023s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000223s : 1: symbol_engine_optimizer 58.34% : 0.115519s : 1: task_emit 0.11% : 0.000222s : 1: tuple_transform 18.34% : 0.036321s : 1: type_inference 0.07% : 0.000130s : 1: validate [WARNING] DEVICE(163846,fffe5cff90f0,python3.7):2025-02-07-13:54:20.575.507 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] DISTRIBUTED(163846,fffea57fa0f0,python3.7):2025-02-07-13:54:20.575.584 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: (0, 1, 2, 3, 4, 5, 6, 7) [WARNING] PARALLEL(163846,ffff97644c10,python3.7):2025-02-07-13:54:20.630.750 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.161504, [21] [bootstrap]: 0.0003168 [type_inference]: 0.0342366 [auto_monad]: 0.00089743 [graph_reusing]: 6.36e-06 [inline]: 1.86999e-06 [parallel-infer-symbol]: 2.58e-06 [pre_auto_parallel]: 9.028e-05 [insert-virtual-dataset]: 3.28e-06 [parallel-infer-symbol-second]: 6.19999e-07 [dataset_repeat_opt]: 1.61001e-06 [pipeline_split]: 2.22001e-06 [optimize]: 0.015542, [52] [py_interpret_to_execute]: 7.171e-05 [rewriter_before_opt_a]: 0.00019829 [opt_a]: 0.0121085, [2] [Cycle 1]: 0.00683656, [43] [expand_dump_flag]: 7.32001e-06 [switch_simplify]: 0.00021706 [loop_unroll]: 7.994e-05 [a_1]: 0.00238577 [recompute_prepare]: 2.709e-05 [updatestate_depend_eliminate]: 0.00010134 [updatestate_assign_eliminate]: 2.05e-05 [updatestate_loads_eliminate]: 1.684e-05 [parameter_eliminate]: 4.04e-06 [a_2]: 0.00038366 [accelerated_algorithm]: 4.44e-05 [shard]: 2.39001e-06 [meta_shard_fg_expand]: 9.51e-06 [shard_inline]: 2.152e-05 [auto_parallel]: 1.996e-05 [parallel]: 8.83e-06 [flash_sp]: 1.48e-05 [merge_comm]: 1.682e-05 [allreduce_fusion]: 1.397e-05 [matmul_add_comm_reduction]: 2.363e-05 [allreduce_slice_to_reducescatter]: 5.69999e-07 [virtual_shard_identity]: 2.269e-05 [virtual_dataset]: 2.096e-05 [get_grad_eliminate_]: 2.15e-05 [virtual_output]: 2.05e-05 [merge_forward]: 1.46e-05 [cell_reuse_recompute_pass]: 2.07001e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.216e-05 [before_grad]: 3.833e-05 [inplace_validation]: 1.241e-05 [meta_fg_expand]: 1.724e-05 [inplace_validation_after_expand]: 1.554e-05 [flash_sp_send_recv_attached]: 3.47001e-06 [receive_attached]: 4.89999e-06 [after_resolve]: 2.737e-05 [a_after_grad]: 3.494e-05 [special_op_eliminate]: 2.213e-05 [renormalize]: 0.00230884 [add_forward_monad_depend]: 4.17999e-06 [auto_monad_grad]: 2.06e-06 [auto_monad_eliminator]: 6.257e-05 [cse]: 0.00028913 [a_3]: 0.00015561 [Cycle 2]: 0.0020141, [43] [expand_dump_flag]: 1.71999e-06 [switch_simplify]: 2.42e-05 [loop_unroll]: 2.178e-05 [a_1]: 0.00073971 [recompute_prepare]: 2.113e-05 [updatestate_depend_eliminate]: 1.675e-05 [updatestate_assign_eliminate]: 1.669e-05 [updatestate_loads_eliminate]: 1.516e-05 [parameter_eliminate]: 2.31e-06 [a_2]: 0.00030644 [accelerated_algorithm]: 2.412e-05 [shard]: 1.18e-06 [meta_shard_fg_expand]: 6.80001e-06 [shard_inline]: 2.15e-05 [auto_parallel]: 1.965e-05 [parallel]: 3.97999e-06 [flash_sp]: 3.57999e-06 [merge_comm]: 1.629e-05 [allreduce_fusion]: 1.298e-05 [matmul_add_comm_reduction]: 1.949e-05 [allreduce_slice_to_reducescatter]: 3.00002e-07 [virtual_shard_identity]: 2.211e-05 [virtual_dataset]: 2.102e-05 [get_grad_eliminate_]: 2e-05 [virtual_output]: 1.965e-05 [merge_forward]: 1.182e-05 [cell_reuse_recompute_pass]: 2.27999e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.043e-05 [before_grad]: 3.631e-05 [inplace_validation]: 1.134e-05 [meta_fg_expand]: 1.333e-05 [inplace_validation_after_expand]: 1.549e-05 [flash_sp_send_recv_attached]: 1.08e-06 [receive_attached]: 7.7e-07 [after_resolve]: 2.484e-05 [a_after_grad]: 3.535e-05 [special_op_eliminate]: 1.984e-05 [renormalize]: 7.99919e-08 [add_forward_monad_depend]: 1.50999e-06 [auto_monad_grad]: 1.38e-06 [auto_monad_eliminator]: 4.234e-05 [cse]: 5.799e-05 [a_3]: 0.0001399 [py_interpret_to_execute_after_opt_a]: 2.011e-05 [slice_cell_reuse_recomputed_activation]: 2.45e-06 [rewriter_after_opt_a]: 0.00034938 [convert_after_rewriter]: 1.923e-05 [order_py_execute_after_rewriter]: 1.344e-05 [opt_b]: 0.0006471, [1] [Cycle 1]: 0.00064083, [7] [b_1]: 0.00047619 [b_2]: 2.42e-05 [updatestate_depend_eliminate]: 1.438e-05 [updatestate_assign_eliminate]: 1.638e-05 [updatestate_loads_eliminate]: 1.526e-05 [renormalize]: 3.90006e-07 [cse]: 5.797e-05 [optimize_parallel_all_gather_comm]: 2.037e-05 [overlap_param_gather]: 1.09999e-06 [cconv]: 2.795e-05 [loop_unroll]: 0.00053216 [opt_after_cconv]: 0.00031092, [1] [Cycle 1]: 0.00030346, [7] [c_1]: 0.00015385 [parameter_eliminate]: 2.59001e-06 [updatestate_depend_eliminate]: 1.709e-05 [updatestate_assign_eliminate]: 1.677e-05 [updatestate_loads_eliminate]: 1.575e-05 [cse]: 6.147e-05 [renormalize]: 5.8001e-07 [remove_dup_value]: 6.776e-05 [tuple_transform]: 0.00019076, [1] [Cycle 1]: 0.00018462, [2] [d_1]: 0.00017375 [renormalize]: 2.90005e-07 [partial_unused_args_eliminate]: 2.83e-06 [add_cache_embedding]: 2.473e-05 [add_recomputation]: 0.00014406 [cse_after_recomputation]: 5.999e-05, [1] [Cycle 1]: 5.514e-05, [1] [cse]: 4.917e-05 [environ_conv]: 1.896e-05 [swap_dp_allreduce_reducescatter]: 1.819e-05 [bias_add_comm_swap]: 2.64999e-06 [label_micro_interleaved_index]: 2.09e-06 [label_fine_grained_interleaved_index]: 2.34001e-06 [merge_cast_opt]: 1.16999e-06 [slice_recompute_activation]: 1.79e-06 [micro_interleaved_order_control]: 2.29001e-06 [assign_add_opt]: 6.133e-05 [ForceFp32Comm]: 1.04e-06 [remove_cast_before_assign_add]: 1.698e-05 [full_micro_interleaved_order_control]: 2.11e-06 [reorder_send_recv_between_fp_bp]: 2.4e-06 [comm_op_add_attrs]: 5.235e-05 [add_comm_op_reuse_tag]: 2.1e-06 [interleave_split_concat_branches]: 1.04e-06 [interleave_parallel_branches]: 1.13e-06 [overlap_opt_shard_in_pipeline]: 1.31999e-06 [overlap_opt_shard_grad_in_pipeline]: 2.27e-06 [control_data_broadcast_order]: 1.10001e-06 [grouped_pairwise_exchange_alltoall]: 9.72999e-06 [offloading_packed_experts]: 2.39001e-06 [overlap_recompute_and_grad_model_parallel]: 2.22e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.49992e-07 [overlap_recompute_allgather_and_fa_grad]: 4.27e-05 [overlap_grad_ring_attention]: 2.45999e-06 [overlap_grad_flash_sp]: 2.808e-05 [begin_end_overlap_inline]: 7.90009e-07 [split_matmul_comm_elemetwise]: 1.96001e-06 [split_layernorm_comm]: 1.95e-06 [handle_group_info]: 7.29e-06 [symbol_engine_optimizer]: 0.00018767, [1] [Cycle 1]: 0.00018255, [6] [build]: 1.784e-05 [elim_shapecalc]: 2.859e-05 [elim_not_effective]: 4.163e-05 [opt_reshape]: 2.611e-05 [fold_const_symbol]: 4.033e-05 [renormalize]: 3.50003e-07 [pipeline_parallel_scheduler]: 2.11e-06 [auto_monad_reorder]: 0.00010189 [get_jit_bprop_graph]: 4.79995e-07 [rewriter_after_jit_bprop_graph]: 4.50003e-07 [eliminate_special_op_node]: 0.00057886 [distribtued_split]: 1.50999e-06 [validate]: 7.474e-05 [task_emit]: 0.109341 [execute]: 1.223e-05 Sums bootstrap : 0.000317s : 0.20% type_inference : 0.034237s : 21.79% auto_monad : 0.000897s : 0.57% graph_reusing : 0.000006s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000090s : 0.06% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000072s : 0.05% optimize.rewriter_before_opt_a : 0.000198s : 0.13% optimize.opt_a.expand_dump_flag : 0.000009s : 0.01% optimize.opt_a.switch_simplify : 0.000241s : 0.15% optimize.opt_a.loop_unroll : 0.000102s : 0.06% optimize.opt_a.a_1 : 0.003125s : 1.99% optimize.opt_a.recompute_prepare : 0.000048s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000118s : 0.08% optimize.opt_a.updatestate_assign_eliminate : 0.000037s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000032s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000690s : 0.44% optimize.opt_a.accelerated_algorithm : 0.000069s : 0.04% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000016s : 0.01% optimize.opt_a.shard_inline : 0.000043s : 0.03% optimize.opt_a.auto_parallel : 0.000040s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000018s : 0.01% optimize.opt_a.merge_comm : 0.000033s : 0.02% optimize.opt_a.allreduce_fusion : 0.000027s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000043s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000045s : 0.03% optimize.opt_a.virtual_dataset : 0.000042s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000042s : 0.03% optimize.opt_a.virtual_output : 0.000040s : 0.03% optimize.opt_a.merge_forward : 0.000026s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000083s : 0.05% optimize.opt_a.before_grad : 0.000075s : 0.05% optimize.opt_a.inplace_validation : 0.000024s : 0.02% optimize.opt_a.meta_fg_expand : 0.000031s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000031s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000052s : 0.03% optimize.opt_a.a_after_grad : 0.000070s : 0.04% optimize.opt_a.special_op_eliminate : 0.000042s : 0.03% optimize.opt_a.renormalize : 0.002309s : 1.47% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000105s : 0.07% optimize.opt_a.cse : 0.000347s : 0.22% optimize.opt_a.a_3 : 0.000296s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000020s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000349s : 0.22% optimize.convert_after_rewriter : 0.000019s : 0.01% optimize.order_py_execute_after_rewriter : 0.000013s : 0.01% optimize.opt_b.b_1 : 0.000476s : 0.30% optimize.opt_b.b_2 : 0.000024s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000016s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000015s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000058s : 0.04% optimize.optimize_parallel_all_gather_comm : 0.000020s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000028s : 0.02% optimize.loop_unroll : 0.000532s : 0.34% optimize.opt_after_cconv.c_1 : 0.000154s : 0.10% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000017s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000016s : 0.01% optimize.opt_after_cconv.cse : 0.000061s : 0.04% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000068s : 0.04% optimize.tuple_transform.d_1 : 0.000174s : 0.11% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000025s : 0.02% optimize.add_recomputation : 0.000144s : 0.09% optimize.cse_after_recomputation.cse : 0.000049s : 0.03% optimize.environ_conv : 0.000019s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000018s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000061s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000017s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000052s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000043s : 0.03% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000028s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000018s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000029s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000042s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000026s : 0.02% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000040s : 0.03% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000102s : 0.06% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000579s : 0.37% distribtued_split : 0.000002s : 0.00% validate : 0.000075s : 0.05% task_emit : 0.109341s : 69.59% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000611 300 1.19% : 0.000007s : 2: substitution.depend_value_elim 1.04% : 0.000006s : 18: substitution.elim_not_effective 1.14% : 0.000007s : 18: substitution.fold_const_symbol 2.75% : 0.000017s : 21: substitution.graph_param_transform 58.01% : 0.000354s : 15: substitution.inline 2.54% : 0.000015s : 36: substitution.j_node_and_user_rematch 3.52% : 0.000022s : 2: substitution.less_batch_normalization 2.84% : 0.000017s : 30: substitution.load_eliminater 0.91% : 0.000006s : 6: substitution.reduce_all_const_elim 3.35% : 0.000020s : 36: substitution.remove_not_recompute_node 1.00% : 0.000006s : 6: substitution.replace_old_param 2.47% : 0.000015s : 4: substitution.switch_simplify 4.04% : 0.000025s : 6: substitution.tuple_list_get_item_eliminator 8.03% : 0.000049s : 44: substitution.updatestate_pure_node_eliminater 7.17% : 0.000044s : 56: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.034172 2 92.59% : 0.031640s : 1: type_inference.infer 7.41% : 0.002532s : 1: type_inference.specialize ------[replace.] 0.000209 25 52.43% : 0.000110s : 15: replace.inline 32.00% : 0.000067s : 4: replace.switch_simplify 15.58% : 0.000033s : 6: replace.tuple_list_get_item_eliminator ------[match.] 0.000379 25 91.04% : 0.000345s : 15: match.inline 3.35% : 0.000013s : 4: match.switch_simplify 5.60% : 0.000021s : 6: match.tuple_list_get_item_eliminator ------[predicate.] 0.000998 6511 0.94% : 0.000009s : 72: predicate.accumulaten_eliminater 0.66% : 0.000007s : 21: predicate.ad_related_special_op_eliminate 0.55% : 0.000005s : 42: predicate.addn_check_dump 0.92% : 0.000009s : 72: predicate.addn_zero_filter 0.95% : 0.000009s : 72: predicate.adjust_all_reduce_mul_add 2.13% : 0.000021s : 114: predicate.arithmetic_simplify 0.96% : 0.000010s : 72: predicate.cast_eliminate 0.58% : 0.000006s : 42: predicate.check_bprop_eliminate 0.56% : 0.000006s : 42: predicate.compare_switch_simplify 0.16% : 0.000002s : 21: predicate.const_output_eliminate 0.30% : 0.000003s : 21: predicate.convert_tensor_all_eliminate 1.33% : 0.000013s : 78: predicate.convert_tensor_eliminate 0.60% : 0.000006s : 42: predicate.depend_value_elim 1.03% : 0.000010s : 72: predicate.dict_get_item_const_eliminator 1.07% : 0.000011s : 72: predicate.dict_get_item_eliminator 1.14% : 0.000011s : 72: predicate.dict_set_item_eliminator 0.17% : 0.000002s : 21: predicate.elim_not_effective 0.37% : 0.000004s : 21: predicate.elim_shapecalc_of_broadcastargs 1.23% : 0.000012s : 93: predicate.environ_add_const_eliminate 1.32% : 0.000013s : 93: predicate.environ_get_add_eliminate 1.22% : 0.000012s : 93: predicate.environ_get_depend_swap 1.88% : 0.000019s : 135: predicate.environ_get_eliminate 1.24% : 0.000012s : 93: predicate.environ_get_set_eliminate 1.26% : 0.000013s : 93: predicate.exchange_switch_depend_value 1.65% : 0.000016s : 93: predicate.float_depend_g_call 0.55% : 0.000006s : 42: predicate.float_environ_get_switch 0.82% : 0.000008s : 63: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 21: predicate.fold_const_symbol 0.59% : 0.000006s : 42: predicate.get_grad_eliminate 0.21% : 0.000002s : 21: predicate.graph_param_transform 0.58% : 0.000006s : 42: predicate.incorporate_call 0.55% : 0.000005s : 42: predicate.incorporate_call_switch 9.80% : 0.000098s : 291: predicate.inline 0.75% : 0.000007s : 42: predicate.inline_without_move 0.30% : 0.000003s : 42: predicate.j_node_and_user_rematch 0.70% : 0.000007s : 42: predicate.less_batch_normalization 1.81% : 0.000018s : 120: predicate.list_to_tuple_eliminator_ 2.73% : 0.000027s : 192: predicate.load_eliminater 0.71% : 0.000007s : 21: predicate.loop_unroll_after_grad 1.66% : 0.000017s : 110: predicate.loop_unroll_before_grad 1.62% : 0.000016s : 114: predicate.make_slice_get_slice_eliminator 0.57% : 0.000006s : 42: predicate.merge_addn 0.58% : 0.000006s : 42: predicate.micro_step_allgather_replace 0.57% : 0.000006s : 42: predicate.mini_step_allgather_replace 0.93% : 0.000009s : 72: predicate.minmaximum_grad 0.41% : 0.000004s : 21: predicate.mutable_eliminate 0.33% : 0.000003s : 21: predicate.opt_reshape 0.32% : 0.000003s : 21: predicate.parallel_virtual_node 1.81% : 0.000018s : 93: predicate.partial_defer_inline 1.51% : 0.000015s : 99: predicate.partial_eliminate 0.99% : 0.000010s : 72: predicate.print_const_string_wrapper 0.63% : 0.000006s : 42: predicate.reduce_all_const_elim 1.22% : 0.000012s : 72: predicate.reduce_eliminate 0.33% : 0.000003s : 42: predicate.remove_not_recompute_node 1.08% : 0.000011s : 120: predicate.replace_applicator 0.32% : 0.000003s : 42: predicate.replace_old_param 0.17% : 0.000002s : 21: predicate.reset_defer_inline 0.97% : 0.000010s : 72: predicate.reshape_eliminate 0.60% : 0.000006s : 42: predicate.row_tensor_add_zeros_like 0.34% : 0.000003s : 21: predicate.row_tensor_eliminate 0.78% : 0.000008s : 42: predicate.same_eliminate 0.34% : 0.000003s : 46: predicate.set_cell_output_no_recompute 0.65% : 0.000007s : 42: predicate.shard_identity_eliminate 0.97% : 0.000010s : 63: predicate.special_op_eliminate 0.66% : 0.000007s : 42: predicate.specialize_transform 0.66% : 0.000007s : 42: predicate.split_environ_get_set_with_tuple_value 0.67% : 0.000007s : 42: predicate.stack_unstack_eliminate 2.64% : 0.000026s : 192: predicate.stopgrad_eliminater 0.35% : 0.000004s : 21: predicate.switch_call_monad_eliminater 1.37% : 0.000014s : 93: predicate.switch_defer_inline 1.93% : 0.000019s : 135: predicate.switch_layer_defer_inline 4.61% : 0.000046s : 253: predicate.switch_simplify 0.98% : 0.000010s : 72: predicate.tile_eliminate 0.94% : 0.000009s : 72: predicate.transpose_eliminate 1.72% : 0.000017s : 114: predicate.tuple_list_convert_item_index_to_positive 1.65% : 0.000016s : 114: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000015s : 114: predicate.tuple_list_get_item_depend_reorder 2.49% : 0.000025s : 162: predicate.tuple_list_get_item_eliminator 1.56% : 0.000016s : 114: predicate.tuple_list_get_set_item_eliminator 2.30% : 0.000023s : 156: predicate.tuple_list_set_item_eliminator 1.67% : 0.000017s : 120: predicate.tuple_to_list_eliminator_ 2.68% : 0.000027s : 192: predicate.updatestate_pure_node_eliminater 3.55% : 0.000035s : 234: predicate.updatestate_useless_node_eliminater 0.34% : 0.000003s : 21: predicate.value_based_eliminate 0.60% : 0.000006s : 42: predicate.virtual_dataset_eliminate 0.57% : 0.000006s : 42: predicate.virtual_output_eliminate 0.37% : 0.000004s : 21: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.002899 41 61.36% : 0.001779s : 24: func_graph_cloner_run.FuncGraphClonerGraph 38.64% : 0.001120s : 17: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.185330 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000029s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000149s : 1: add_recomputation 0.04% : 0.000065s : 1: assign_add_opt 0.50% : 0.000920s : 1: auto_monad 0.06% : 0.000110s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.19% : 0.000344s : 1: bootstrap 0.02% : 0.000032s : 1: cconv 0.03% : 0.000057s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000024s : 1: convert_after_rewriter 0.03% : 0.000063s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.32% : 0.000594s : 1: eliminate_special_op_node 0.01% : 0.000023s : 1: environ_conv 0.01% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000013s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000011s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.29% : 0.000543s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000034s : 1: opt.transform.loop_unroll_optimizer 2.71% : 0.005025s : 80: opt.transform.opt_a 0.08% : 0.000152s : 1: opt.transform.opt_after_cconv 0.26% : 0.000479s : 27: opt.transform.opt_b 0.09% : 0.000171s : 1: opt.transform.opt_trans_graph 0.04% : 0.000076s : 3: opt.transform.special_op_eliminate 0.07% : 0.000131s : 4: opt.transform.symbol_engine_opt 6.54% : 0.012113s : 1: opt_a 0.17% : 0.000315s : 1: opt_after_cconv 0.35% : 0.000650s : 1: opt_b 8.39% : 0.015550s : 1: optimize 0.01% : 0.000024s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000017s : 1: order_py_execute_after_rewriter 0.02% : 0.000031s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.03% : 0.000047s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.05% : 0.000100s : 1: pre_auto_parallel 0.04% : 0.000078s : 1: py_interpret_to_execute 0.01% : 0.000024s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000020s : 1: remove_cast_before_assign_add 0.04% : 0.000074s : 1: remove_dup_value 0.63% : 0.001162s : 1: renormalize.infer 0.61% : 0.001137s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000356s : 1: rewriter_after_opt_a 0.11% : 0.000204s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000021s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000191s : 1: symbol_engine_optimizer 59.02% : 0.109373s : 1: task_emit 0.10% : 0.000194s : 1: tuple_transform 18.49% : 0.034259s : 1: type_inference 0.07% : 0.000124s : 1: validate PASSED =============================== warnings summary =============================== PASSED/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") PASSED/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") PASSED /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") PASSED/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") PASSED/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") PASSED/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") PASSED/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 =============================== warnings summary ============================================================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") ======================= 1 passed, 18 warnings in 43.27s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") ======================= 1 passed, 18 warnings in 43.30s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 ======================= 1 passed, 18 warnings in 43.30s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 ======================= 1 passed, 18 warnings in 43.28s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54-- Docs: https://docs.pytest.org/en/latest/warnings.html-- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") ======================= 1 passed, 18 warnings in 43.28s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 ======================= 1 passed, 18 warnings in 43.28s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") ======================= 1 passed, 18 warnings in 43.27s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 43.28s ======================== [WARNING] DEVICE(163842,ffff8ac54c10,python3.7):2025-02-07-13:54:31.208.768 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x19d6aa80 is not exist. [WARNING] DEVICE(163840,ffff92fd9c10,python3.7):2025-02-07-13:54:31.232.733 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x52eeb020 is not exist. [WARNING] DEVICE(163843,ffffaa156c10,python3.7):2025-02-07-13:54:33.372.006 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4378af90 is not exist. [WARNING] DEVICE(163844,ffffae145c10,python3.7):2025-02-07-13:54:33.378.444 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x31fd3ac0 is not exist. [WARNING] DEVICE(163841,ffff99d3ac10,python3.7):2025-02-07-13:54:33.401.748 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x187441e0 is not exist. [WARNING] DEVICE(163846,ffff97644c10,python3.7):2025-02-07-13:54:33.416.437 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x516920f0 is not exist. [WARNING] DEVICE(163839,ffff88d43c10,python3.7):2025-02-07-13:54:33.423.092 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3a4fb7f0 is not exist. [WARNING] DEVICE(163845,ffff83f78c10,python3.7):2025-02-07-13:54:33.437.512 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x51f13810 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 62.48s (0:01:02) =================== ././@LongLink0000644000000000000000000000016000000000000011600 Lustar rootrootff8c39f2e51611efac92c4447d93fe45/pass/test_compile_cache_test_compile_cache_pipeline_parallel_and_recompute.logff8c39f2e51611efac92c4447d93fe45/pass/test_compile_cache_test_compile_cache_pipeline_parallel_and_re0000644000175400017540000003630214751343157033117 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/compiler/compile_cache, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collected 1 item test_compile_cache.py [WARNING] ME(137380:281473448516624,MainProcess):2025-02-07-13:50:23.112.941 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137380:281473448516624,MainProcess):2025-02-07-13:50:23.198.411 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137380:281473448516624,MainProcess):2025-02-07-13:50:23.289.171 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137380:281473448516624,MainProcess):2025-02-07-13:50:23.387.239 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137380:281473448516624,MainProcess):2025-02-07-13:50:23.498.561 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:worker_4.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137380:281473448516624,MainProcess):2025-02-07-13:50:23.628.425 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:worker_5.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137380:281473448516624,MainProcess):2025-02-07-13:50:23.759.408 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:worker_6.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137380:281473448516624,MainProcess):2025-02-07-13:50:23.891.501 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:worker_7.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:50:31.106.511 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 0 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:50:36.282.547 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 1 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:50:41.431.621 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 2 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:50:46.581.357 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 3 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:50:51.732.842 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 4 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:50:56.893.970 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 5 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:02.534.83 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 6 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:07.196.334 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 7 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:12.333.351 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 8 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:17.476.563 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 9 check first train. check cache file. check first log. check first compile result. [WARNING] ME(157269:281473438325776,MainProcess):2025-02-07-13:51:27.878.236 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(157269:281473438325776,MainProcess):2025-02-07-13:51:27.949.286 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(157269:281473438325776,MainProcess):2025-02-07-13:51:28.646.31 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(157269:281473438325776,MainProcess):2025-02-07-13:51:28.141.122 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. [WARNING] ME(157269:281473438325776,MainProcess):2025-02-07-13:51:28.219.700 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:worker_4.log. Environment variable [RANK_ID] is exported. [WARNING] ME(157269:281473438325776,MainProcess):2025-02-07-13:51:28.302.401 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:worker_5.log. Environment variable [RANK_ID] is exported. [WARNING] ME(157269:281473438325776,MainProcess):2025-02-07-13:51:28.390.707 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:worker_6.log. Environment variable [RANK_ID] is exported. [WARNING] ME(157269:281473438325776,MainProcess):2025-02-07-13:51:28.482.327 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:worker_7.log. Environment variable [RANK_ID] is exported. [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:35.159.891 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 0 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:40.344.785 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 1 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:45.499.753 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 2 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:50.664.615 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 3 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:51:55.810.272 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 4 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:52:00.950.877 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 5 [WARNING] ME(137232:281473362807824,MainProcess):2025-02-07-13:52:06.100.935 [mindspore/testcases/testcases/tests/st/networks/utils.py:56] process is running, please wait 6 check second train. check second log. check second train result. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 119.29s (0:01:59) ================== ff8c39f2e51611efac92c4447d93fe45/pass/test_all_test_hccl_gather_into_tensor.log0000644000175400017540000007750414751343157026747 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_all.py ============================= test session starts ============================== ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sinkrootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collecting ... collecting ... [WARNING] ME(52563:281473758260240,MainProcess):2025-02-07-15:52:51.293.054 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(52562:281473885305872,MainProcess):2025-02-07-15:52:51.298.530 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] DISTRIBUTED(52563,ffffb75fec10,python3.7):2025-02-07-15:53:32.352.029 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(52562,ffffbef27c10,python3.7):2025-02-07-15:53:32.615.816 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(52563,fffeebfef0f0,python3.7):2025-02-07-15:53:33.475.720 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(52562,fffef37f60f0,python3.7):2025-02-07-15:53:33.497.422 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(52563,fffeeb7ee0f0,python3.7):2025-02-07-15:53:33.544.854 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(52562,fffef17f20f0,python3.7):2025-02-07-15:53:33.575.701 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(52562,fffef17f20f0,python3.7):2025-02-07-15:53:33.893.275 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(52562,fffef37f60f0,python3.7):2025-02-07-15:53:33.893.639 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(52563,fffeeb7ee0f0,python3.7):2025-02-07-15:53:34.059.689 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(52563,fffeebfef0f0,python3.7):2025-02-07-15:53:34.060.020 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PROFILER(52562,fffe9bfff0f0,python3.7):2025-02-07-15:53:34.147.540 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(52563,fffeeafed0f0,python3.7):2025-02-07-15:53:34.297.474 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [0.]  collecting 2 items  collected 2 items  test_gather_into_tensor.py [[ 0. 1. 2. 3. 4. 5. 6. 7.] [ 8. 9. 10. 11. 12. 13. 14. 15.] [16. 17. 18. 19. 20. 21. 22. 23.] [24. 25. 26. 27. 28. 29. 30. 31.] [32. 33. 34. 35. 36. 37. 38. 39.] [40. 41. 42. 43. 44. 45. 46. 47.] [48. 49. 50. 51. 52. 53. 54. 55.] [56. 57. 58. 59. 60. 61. 62. 63.] [ 0. 1. 2. 3. 4. 5. 6. 7.] [ 8. 9. 10. 11. 12. 13. 14. 15.] [16. 17. 18. 19. 20. 21. 22. 23.] [24. 25. 26. 27. 28. 29. 30. 31.] [32. 33. 34. 35. 36. 37. 38. 39.] [40. 41. 42. 43. 44. 45. 46. 47.] [48. 49. 50. 51. 52. 53. 54. 55.] [56. 57. 58. 59. 60. 61. 62. 63.]]  collecting 2 items  collected 2 items  test_gather_into_tensor.py [0.] .[[ 0. 1. 2. 3. 4. 5. 6. 7.] [ 8. 9. 10. 11. 12. 13. 14. 15.] [16. 17. 18. 19. 20. 21. 22. 23.] [24. 25. 26. 27. 28. 29. 30. 31.] [32. 33. 34. 35. 36. 37. 38. 39.] [40. 41. 42. 43. 44. 45. 46. 47.] [48. 49. 50. 51. 52. 53. 54. 55.] [56. 57. 58. 59. 60. 61. 62. 63.] [ 0. 1. 2. 3. 4. 5. 6. 7.] [ 8. 9. 10. 11. 12. 13. 14. 15.] [16. 17. 18. 19. 20. 21. 22. 23.] [24. 25. 26. 27. 28. 29. 30. 31.] [32. 33. 34. 35. 36. 37. 38. 39.] [40. 41. 42. 43. 44. 45. 46. 47.] [48. 49. 50. 51. 52. 53. 54. 55.] [56. 57. 58. 59. 60. 61. 62. 63.]] .[0.] . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 2 passed, 18 warnings in 49.23s ======================== [[ 0. 1. 2. 3. 4. 5. 6. 7.] [ 8. 9. 10. 11. 12. 13. 14. 15.] [16. 17. 18. 19. 20. 21. 22. 23.] [24. 25. 26. 27. 28. 29. 30. 31.] [32. 33. 34. 35. 36. 37. 38. 39.] [40. 41. 42. 43. 44. 45. 46. 47.] [48. 49. 50. 51. 52. 53. 54. 55.] [56. 57. 58. 59. 60. 61. 62. 63.] [ 0. 1. 2. 3. 4. 5. 6. 7.] [ 8. 9. 10. 11. 12. 13. 14. 15.] [16. 17. 18. 19. 20. 21. 22. 23.] [24. 25. 26. 27. 28. 29. 30. 31.] [32. 33. 34. 35. 36. 37. 38. 39.] [40. 41. 42. 43. 44. 45. 46. 47.] [48. 49. 50. 51. 52. 53. 54. 55.] [56. 57. 58. 59. 60. 61. 62. 63.]] . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 2 passed, 18 warnings in 49.27s ======================== [WARNING] DEVICE(52563,ffffb75fec10,python3.7):2025-02-07-15:53:40.202.075 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2a5ae8d0 is not exist. [WARNING] DEVICE(52562,ffffbef27c10,python3.7):2025-02-07-15:53:42.035.449 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x225b6060 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 70.55s (0:01:10) =================== ././@LongLink0000644000000000000000000000015400000000000011603 Lustar rootrootff8c39f2e51611efac92c4447d93fe45/pass/test_mc2_all2all_test_mc2_alltoall_allgather_batchmatmul_withsilu.logff8c39f2e51611efac92c4447d93fe45/pass/test_mc2_all2all_test_mc2_alltoall_allgather_batchmatmul_withs0000644000175400017540000000101414751343160032755 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/auto_parallel, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_mc2_all2all.py s ============================== 1 skipped in 0.14s ============================== ././@LongLink0000644000000000000000000000015700000000000011606 Lustar rootrootff8c39f2e51611efac92c4447d93fe45/pass/test_mc2_all2all_test_mc2_alltoall_allgather_batchmatmul_withoutsilu.logff8c39f2e51611efac92c4447d93fe45/pass/test_mc2_all2all_test_mc2_alltoall_allgather_batchmatmul_witho0000644000175400017540000000101414751343157032757 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/auto_parallel, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_mc2_all2all.py s ============================== 1 skipped in 0.15s ============================== ff8c39f2e51611efac92c4447d93fe45/pass/test_remove_redundancy_test_load_remove_redundancy_error.log0000644000175400017540000002131714751343157032732 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collected 1 item test_remove_redundancy.py . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================== 1 passed, 18 warnings in 9.43s ======================== [WARNING] DEVICE(20465,ffff85a2ec10,python3.7):2025-02-07-13:56:45.769.545 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4d710600 is not exist. ././@LongLink0000644000000000000000000000015500000000000011604 Lustar rootrootff8c39f2e51611efac92c4447d93fe45/pass/test_parallel_complex_input_test_graph_mode_parallel_complex_input.logff8c39f2e51611efac92c4447d93fe45/pass/test_parallel_complex_input_test_graph_mode_parallel_complex_i0000644000175400017540001747757714751343157033341 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/auto_parallel, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collected 1 item test_parallel_complex_input.py (Not all processes could be identified, non-owned process info will not be shown, you would have to be root to see it all.) Active Internet connections (only servers) Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name tcp 0 0 8.92.9.85:15564 0.0.0.0:* LISTEN 52658/python3 tcp 0 0 8.92.9.85:15565 0.0.0.0:* LISTEN 52973/python3 tcp 0 0 8.92.9.85:15566 0.0.0.0:* LISTEN 52707/python3 tcp 0 0 8.92.9.85:15567 0.0.0.0:* LISTEN 52608/python3 tcp 0 0 8.92.9.85:15568 0.0.0.0:* LISTEN 52976/python3 tcp 0 0 8.92.9.85:15569 0.0.0.0:* LISTEN 52953/python3 tcp 0 0 8.92.9.85:15570 0.0.0.0:* LISTEN 52868/python3 tcp 0 0 8.92.9.85:15571 0.0.0.0:* LISTEN 52900/python3 tcp 0 0 8.92.9.85:15572 0.0.0.0:* LISTEN 52898/python3 tcp 0 0 8.92.9.85:15573 0.0.0.0:* LISTEN 52951/python3 tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN - tcp 0 0 8.92.9.85:19000 0.0.0.0:* LISTEN 53699/python3 tcp 0 0 8.92.9.85:15161 0.0.0.0:* LISTEN 52944/python3 tcp 0 0 8.92.9.85:15162 0.0.0.0:* LISTEN 52784/python3 tcp 0 0 8.92.9.85:15163 0.0.0.0:* LISTEN 52554/python3 tcp 0 0 8.92.9.85:15164 0.0.0.0:* LISTEN 52658/python3 tcp 0 0 8.92.9.85:15165 0.0.0.0:* LISTEN 52973/python3 tcp 0 0 8.92.9.85:15166 0.0.0.0:* LISTEN 52707/python3 tcp 0 0 8.92.9.85:15167 0.0.0.0:* LISTEN 52608/python3 tcp 0 0 8.92.9.85:18080 0.0.0.0:* LISTEN 53699/python3 tcp 0 0 8.92.9.85:15168 0.0.0.0:* LISTEN 52976/python3 tcp 0 0 8.92.9.85:15169 0.0.0.0:* LISTEN 52953/python3 tcp 0 0 8.92.9.85:15170 0.0.0.0:* LISTEN 52868/python3 tcp 0 0 8.92.9.85:15171 0.0.0.0:* LISTEN 52900/python3 tcp 0 0 8.92.9.85:15172 0.0.0.0:* LISTEN 52898/python3 tcp 0 0 8.92.9.85:15173 0.0.0.0:* LISTEN 52951/python3 tcp 0 0 8.92.9.85:15561 0.0.0.0:* LISTEN 52944/python3 tcp 0 0 8.92.9.85:15562 0.0.0.0:* LISTEN 52784/python3 tcp 0 0 8.92.9.85:15563 0.0.0.0:* LISTEN 52554/python3 tcp6 0 0 :::22 :::* LISTEN - (Not all processes could be identified, non-owned process info will not be shown, you would have to be root to see it all.) [INFO] DEBUG(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.010.511 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.529.184 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.529.493 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [INFO] CORE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.643.721 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.644.345 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.645.544 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.645.619 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.645.653 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.645.764 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.645.792 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.645.855 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.645.876 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.647.100 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.647.122 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.648.100 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.721.527 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.759.377 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.760.155 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.760.903 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.761.322 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:46.764.719 [mindspore/run_check/_check_version.py:483] Setting the env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python` to prevent memory overflow during save or load checkpoint file. [INFO] GE_ADPT(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.765.103 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:46.765.176 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] CORE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:48.988.372 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:48.988.497 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:48.988.518 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [DEBUG] RUNTIME_FRAMEWORK(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:49.053.984 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187602,ffff9fb56c10,python3.7):2025-02-07-15:57:49.054.122 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [INFO] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:50.482.607 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:50.483.171 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:50.492.826 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:50.799.580 [mindspore/parallel/cluster/process_entity/_utils.py:107] IP address found on this node. Address info:{'family': 'inet', 'local': '127.0.0.1', 'prefixlen': 8, 'scope': 'host', 'label': 'lo', 'valid_life_time': 4294967295, 'preferred_life_time': 4294967295}. Found address:127.0.0.1 [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:50.846.807 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:log_output/worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:51.199.79 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:log_output/worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:51.189.250 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:log_output/worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:51.393.152 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:log_output/worker_3.log. Environment variable [RANK_ID] is exported. [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:57:51.497.026 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [INFO] CORE(187742,ffffa187dc10,python):2025-02-07-15:57:51.521.725 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 0 [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:51.602.412 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:log_output/worker_4.log. Environment variable [RANK_ID] is exported. [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:57:51.659.659 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [INFO] CORE(187753,ffff8292dc10,python):2025-02-07-15:57:51.683.940 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 1 [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:51.806.133 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:log_output/worker_5.log. Environment variable [RANK_ID] is exported. [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:57:51.830.208 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [INFO] CORE(187764,ffff97badc10,python):2025-02-07-15:57:51.854.773 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 2 [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:57:51.994.639 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:52.993.2 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:log_output/worker_6.log. Environment variable [RANK_ID] is exported. [INFO] CORE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.013.297 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 3 [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.086.334 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.086.748 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:52.216.502 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:log_output/worker_7.log. Environment variable [RANK_ID] is exported. [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.249.472 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.249.880 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [INFO] CORE(187742,ffffa187dc10,python):2025-02-07-15:57:52.259.147 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 0 [INFO] CORE(187742,ffffa187dc10,python):2025-02-07-15:57:52.259.219 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.259.776 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.261.013 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.261.102 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.261.168 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.261.286 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.261.317 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.261.394 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.261.419 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.262.701 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.262.740 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:52.263.767 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:57:52.281.997 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [INFO] CORE(187789,ffffaa419c10,python):2025-02-07-15:57:52.306.206 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 4 [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.339.837 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.340.193 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:57:52.355.640 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:57:52.397.491 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:57:52.398.269 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:57:52.398.972 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:57:52.399.395 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:57:52.402.748 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:52.402.872 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [WARNING] ME(187602:281473361210384,MainProcess):2025-02-07-15:57:52.417.058 [mindspore/parallel/cluster/process_entity/_api.py:223] Distributed job is spawned. Waiting all processes to exit... [INFO] CORE(187753,ffff8292dc10,python):2025-02-07-15:57:52.423.575 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 1 [INFO] CORE(187753,ffff8292dc10,python):2025-02-07-15:57:52.423.665 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.424.228 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.425.518 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.425.613 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.425.651 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.425.768 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.425.801 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.425.879 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.425.904 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.427.178 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.427.216 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:52.428.201 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:57:52.468.313 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [INFO] CORE(187803,ffff93d7bc10,python):2025-02-07-15:57:52.497.010 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 5 [INFO] CORE(187764,ffff97badc10,python):2025-02-07-15:57:52.498.986 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 2 [INFO] CORE(187764,ffff97badc10,python):2025-02-07-15:57:52.499.072 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.499.624 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.500.823 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.500.907 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.500.944 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.501.062 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.501.096 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.501.169 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.501.192 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.502.422 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.502.451 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:52.503.476 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.509.890 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.510.188 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:57:52.519.248 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:57:52.560.741 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:57:52.561.516 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:57:52.562.220 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:57:52.562.639 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:57:52.566.077 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:52.566.198 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:57:52.597.007 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] CORE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.639.690 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 3 [INFO] CORE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.639.783 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:57:52.639.932 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.640.426 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:57:52.640.794 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:57:52.641.492 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.641.827 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:57:52.641.909 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.641.919 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.641.998 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.642.157 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.642.193 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.642.309 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.642.337 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.643.666 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.643.701 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.644.865 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:57:52.645.360 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:52.645.468 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:57:52.671.613 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [INFO] CORE(187818,ffffbe0b2c10,python):2025-02-07-15:57:52.692.464 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 6 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.742.546 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.785.988 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.786.732 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.787.430 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.787.850 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:57:52.791.216 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:52.791.338 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.833.063 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.833.425 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:57:52.878.178 [mindspore/ccsrc/debug/data_dump/data_dumper.cc:43] CpuDumpRegister] Register DataDumper for cpu backend. [INFO] CORE(187834,ffffb35e0c10,python):2025-02-07-15:57:52.900.220 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 7 [INFO] CORE(187789,ffffaa419c10,python):2025-02-07-15:57:52.989.249 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 4 [INFO] CORE(187789,ffffaa419c10,python):2025-02-07-15:57:52.989.344 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.989.993 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.991.274 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.991.364 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.991.405 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.991.559 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.991.597 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.991.709 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.991.736 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.993.098 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:52.993.139 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:52.994.191 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.023.476 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.023.871 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:57:53.085.952 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:57:53.127.481 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:57:53.128.208 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:57:53.128.926 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:57:53.129.345 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:57:53.132.552 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:53.132.734 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.197.397 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.197.676 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [INFO] CORE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.203.866 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 5 [INFO] CORE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.203.968 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.204.552 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.205.857 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.205.949 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.206.021 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.206.184 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.206.223 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.206.343 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.206.372 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.207.702 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.207.739 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.208.863 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.312.458 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] CORE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.312.592 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 6 [INFO] CORE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.312.672 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.313.249 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.314.429 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.314.504 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.314.541 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.314.656 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.314.690 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.314.759 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.314.785 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.316.041 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.316.070 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.317.111 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.357.987 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.358.797 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.359.543 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.360.007 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:57:53.363.440 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:53.363.554 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.384.571 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.420.968 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.421.662 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.422.324 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.422.712 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.425.873 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:53.425.967 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.433.333 [mindspore/ccsrc/transform/acl_ir/acl_adapter_info.cc:129] RegGetGraphInfoFunc] Reg get graph info in ascend. [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.433.654 [mindspore/ccsrc/transform/acl_ir/acl_data_dumper.cc:100] AclDumpRegister] Register AclDataDumper for ascend backend [INFO] CORE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.569.282 [mindspore/core/utils/ms_context.cc:101] SetDeviceId] Set MS_CTX_DEVICE_ID by env DEVICE_ID to: 7 [INFO] CORE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.569.346 [mindspore/core/utils/ms_context.cc:161] set_backend_policy] ms set context backend policy:ge [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.569.941 [mindspore/ccsrc/transform/symbol/acl_base_symbol.cc:62] LoadAclBaseApiSymbol] Load acl base api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.571.158 [mindspore/ccsrc/transform/symbol/acl_compiler_symbol.cc:40] LoadAclOpCompilerApiSymbol] Load acl op compiler api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.571.235 [mindspore/ccsrc/transform/symbol/acl_mdl_symbol.cc:116] LoadAclMdlApiSymbol] Load acl mdl api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.571.274 [mindspore/ccsrc/transform/symbol/acl_op_symbol.cc:56] LoadAclOpApiSymbol] Load ascend op api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.571.390 [mindspore/ccsrc/transform/symbol/acl_prof_symbol.cc:48] LoadProfApiSymbol] Load acl prof api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.571.425 [mindspore/ccsrc/transform/symbol/acl_rt_allocator_symbol.cc:49] LoadAclAllocatorApiSymbol] Load acl allocator api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.571.497 [mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc:144] LoadAclRtApiSymbol] Load acl rt api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.571.524 [mindspore/ccsrc/transform/symbol/acl_symbol.cc:37] LoadAclApiSymbol] Load acl base api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.572.812 [mindspore/ccsrc/transform/symbol/acl_tdt_symbol.cc:83] LoadAcltdtApiSymbol] Load acl tdt api success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.572.844 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:80] LoadAscendApiSymbols] Load ascend api success! [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.573.880 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.651.981 [mindspore/ccsrc/pybind_api/ir/log_adapter_py.h:33] PyExceptionInitializer] Set exception handler [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.691.120 [mindspore/ccsrc/pipeline/jit/ps/init.cc:236] pybind11_init__c_expression] Start GraphExecutorPy... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.691.892 [mindspore/ccsrc/pipeline/jit/ps/init.cc:341] pybind11_init__c_expression] Start ParallelContext... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.692.613 [mindspore/ccsrc/pipeline/jit/ps/init.cc:468] pybind11_init__c_expression] Start CostModelContext... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.693.055 [mindspore/ccsrc/pipeline/jit/ps/init.cc:570] pybind11_init__c_expression] Start OffloadContext... [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:57:53.696.406 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:66] LoadAscendApiSymbols] Ascend api is already loaded. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:53.696.495 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:411] SetContextSocVersion] The soc version :Ascend910A [INFO] CORE(187742,ffffa187dc10,python):2025-02-07-15:57:54.694.981 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:57:54.695.109 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187742,ffffa187dc10,python):2025-02-07-15:57:54.695.134 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:57:54.769.226 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:54.769.455 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [INFO] CORE(187753,ffff8292dc10,python):2025-02-07-15:57:54.868.235 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:57:54.868.304 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187753,ffff8292dc10,python):2025-02-07-15:57:54.868.326 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [INFO] CORE(187764,ffff97badc10,python):2025-02-07-15:57:54.877.501 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:57:54.877.599 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187764,ffff97badc10,python):2025-02-07-15:57:54.877.621 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:57:54.939.324 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:54.939.456 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:57:54.949.711 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:54.949.890 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [INFO] CORE(187775,ffffba4dbc10,python):2025-02-07-15:57:55.084.888 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:57:55.085.030 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187775,ffffba4dbc10,python):2025-02-07-15:57:55.085.056 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:57:55.152.755 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:55.152.925 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [INFO] CORE(187789,ffffaa419c10,python):2025-02-07-15:57:55.312.012 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:57:55.312.103 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187789,ffffaa419c10,python):2025-02-07-15:57:55.312.123 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:57:55.378.643 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:55.378.791 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [INFO] CORE(187803,ffff93d7bc10,python):2025-02-07-15:57:55.604.468 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:57:55.604.594 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187803,ffff93d7bc10,python):2025-02-07-15:57:55.604.620 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:57:55.675.873 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:55.676.059 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [INFO] CORE(187818,ffffbe0b2c10,python):2025-02-07-15:57:55.762.053 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:57:55.762.144 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187818,ffffbe0b2c10,python):2025-02-07-15:57:55.762.168 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:57:55.833.067 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:55.833.222 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [INFO] CORE(187834,ffffb35e0c10,python):2025-02-07-15:57:56.023.581 [mindspore/core/utils/ms_context.cc:302] SetDeviceTargetFromInner] ms set context device target:Ascend [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:57:56.023.706 [mindspore/ccsrc/frontend/parallel/costmodel_context.cc:30] GetInstance] Create costmodel_context [INFO] CORE(187834,ffffb35e0c10,python):2025-02-07-15:57:56.023.731 [mindspore/core/utils/ms_context.cc:306] SetDeviceTargetFromInner] Set memory_optimize_level to O0 as default on other device [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:57:56.095.095 [mindspore/ccsrc/runtime/runtime_conf/runtime_conf.cc:44] operator()] Create new mindspore RuntimeConf [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:56.095.296 [mindspore/ccsrc/runtime/pynative/lazy_fusion_flags.cc:172] LazyFusionFlags] lazy_fusion_flags :{"disable_ops":[],"dump_as_text":false,"flush_threshold":100,"online_tuning":false,"opt_level":0,"synchronize":false} [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:57:56.275.423 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:57:56.276.004 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:57:56.285.923 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:57:56.387.886 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:57:56.388.472 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:57:56.398.059 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:57:56.461.615 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:57:56.461.718 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:57:56.462.127 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:57:56.462.175 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:57:56.462.414 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:57:56.462.458 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] PS(187742,ffffa187dc10,python):2025-02-07-15:57:56.462.660 [mindspore/ccsrc/ps/ps_context.cc:256] set_ms_role] MS_ROLE of this node is MS_WORKER [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.462.706 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:46] NodeBase] Cluster topo timeout is 600 seconds. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.462.758 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:51] NodeBase] Node timeout after exception is 30 seconds. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.462.789 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.019 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471260008688 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.137 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471251615984 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.163 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.267 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471243223280 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.398 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471234830576 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.446 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:170] Register] Start connecting heartbeat client. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.467 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.700 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:55546, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.463.755 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(187742,ffff217780f0,python):2025-02-07-15:57:56.463.755 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55546 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:57:56.549.637 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:57:56.550.267 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:57:56.560.059 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:57:56.566.401 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:57:56.566.491 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:57:56.566.876 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:57:56.566.922 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:57:56.567.154 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:57:56.567.197 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] PS(187764,ffff97badc10,python):2025-02-07-15:57:56.567.385 [mindspore/ccsrc/ps/ps_context.cc:256] set_ms_role] MS_ROLE of this node is MS_WORKER [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.567.421 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:46] NodeBase] Cluster topo timeout is 600 seconds. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.567.442 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:51] NodeBase] Node timeout after exception is 30 seconds. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.567.469 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.567.634 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471095595248 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.567.732 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471017283824 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.567.757 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.567.851 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471008891120 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.567.963 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471000498416 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.568.005 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:170] Register] Start connecting heartbeat client. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.568.027 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.568.201 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:55548, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:56.568.243 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(187764,ffff137fe0f0,python):2025-02-07-15:57:56.568.261 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55548 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:57:56.737.918 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:57:56.738.546 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:57:56.746.684 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:57:56.746.854 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:57:56.747.387 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:57:56.747.439 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:57:56.747.705 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:57:56.747.752 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] PS(187753,ffff8292dc10,python):2025-02-07-15:57:56.747.988 [mindspore/ccsrc/ps/ps_context.cc:256] set_ms_role] MS_ROLE of this node is MS_WORKER [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.034 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:46] NodeBase] Cluster topo timeout is 600 seconds. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.056 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:51] NodeBase] Node timeout after exception is 30 seconds. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.089 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.329 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281470740639984 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.443 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281470732247280 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.471 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:57:56.748.360 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.571 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281470723854576 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.726 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281470715461872 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.777 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:170] Register] Start connecting heartbeat client. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.748.800 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.749.060 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:55550, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187753,ffff028290f0,python):2025-02-07-15:57:56.749.096 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55550 to 127.0.0.1:10001 is successfully created. System errno: Success [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:56.749.105 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 1 [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:57:56.930.319 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:57:56.930.898 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:57:56.932.307 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:57:56.932.429 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:57:56.932.902 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:57:56.932.953 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:57:56.933.190 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:57:56.933.234 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] PS(187775,ffffba4dbc10,python):2025-02-07-15:57:56.933.429 [mindspore/ccsrc/ps/ps_context.cc:256] set_ms_role] MS_ROLE of this node is MS_WORKER [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.933.474 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:46] NodeBase] Cluster topo timeout is 600 seconds. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.933.527 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:51] NodeBase] Node timeout after exception is 30 seconds. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.933.559 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.933.894 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471675523312 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.934.060 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471667130608 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.934.092 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.934.249 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471658737904 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.934.420 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471650345200 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.934.455 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:170] Register] Start connecting heartbeat client. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.934.476 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.934.736 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:55552, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:56.934.776 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(187775,ffff3a3bc0f0,python):2025-02-07-15:57:56.934.775 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55552 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:57:56.940.506 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.963.895 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.963.918 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.963.931 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:178] Register] Start connecting business client. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.963.948 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.964.048 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:55554, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:56.964.069 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(187742,ffff2277a0f0,python):2025-02-07-15:57:56.964.097 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55554 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.068.328 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.068.353 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.068.366 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:178] Register] Start connecting business client. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.068.384 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.068.477 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:55556, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.068.498 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(187764,ffff18aae0f0,python):2025-02-07-15:57:57.068.521 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55556 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:57:57.108.469 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:57:57.108.596 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.035 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:57:57.109.086 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.332 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:57:57.109.375 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] PS(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.542 [mindspore/ccsrc/ps/ps_context.cc:256] set_ms_role] MS_ROLE of this node is MS_WORKER [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.585 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:46] NodeBase] Cluster topo timeout is 600 seconds. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.605 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:51] NodeBase] Node timeout after exception is 30 seconds. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.637 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.851 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471406301424 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.966 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471397908720 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.109.993 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.110.099 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471389516016 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.110.223 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471381123312 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.110.254 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:170] Register] Start connecting heartbeat client. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.110.275 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.110.506 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:55558, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.110.557 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(187789,ffff2a2fc0f0,python):2025-02-07-15:57:57.110.558 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55558 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:57:57.137.637 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:57:57.138.223 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:57:57.147.711 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.249.211 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.249.237 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.249.251 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:178] Register] Start connecting business client. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.249.266 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.249.369 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:55560, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.249.390 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(187753,ffff0382b0f0,python):2025-02-07-15:57:57.249.401 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55560 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:57:57.284.787 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:57:57.285.359 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:57:57.294.961 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:57:57.321.258 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:57:57.321.372 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:57:57.321.754 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:57:57.321.806 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.041 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:57:57.322.086 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] PS(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.269 [mindspore/ccsrc/ps/ps_context.cc:256] set_ms_role] MS_ROLE of this node is MS_WORKER [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.313 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:46] NodeBase] Cluster topo timeout is 600 seconds. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.335 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:51] NodeBase] Node timeout after exception is 30 seconds. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.368 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.606 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471030325488 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.816 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281470950174960 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.846 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.322.961 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281470941782256 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.323.092 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281470933389552 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.323.126 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:170] Register] Start connecting heartbeat client. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.323.148 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.323.387 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:55562, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.323.432 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(187803,ffff0f7fe0f0,python):2025-02-07-15:57:57.323.435 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55562 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.434.919 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.434.945 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.434.958 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:178] Register] Start connecting business client. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.434.974 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.435.081 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:55564, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.435.102 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(187775,ffff3b3be0f0,python):2025-02-07-15:57:57.435.119 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55564 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:57.464.146 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:57.464.168 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:57.464.481 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:222] Register] The compute graph node: 0 has been registered successfully. [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:57.464.598 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [INFO] DISTRIBUTED(187742,ffff13fff0f0,python):2025-02-07-15:57:57.464.688 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:270] Heartbeat] Interval of heartbeat lower and upper are 3 and 5 [INFO] DISTRIBUTED(187742,ffff13fff0f0,python):2025-02-07-15:57:57.464.720 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:274] Heartbeat] The heartbeat thread is started. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.467.728 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:57:57.467.811 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.468.179 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:57:57.468.225 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.468.455 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:57:57.468.493 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] PS(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.468.661 [mindspore/ccsrc/ps/ps_context.cc:256] set_ms_role] MS_ROLE of this node is MS_WORKER [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.468.699 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:46] NodeBase] Cluster topo timeout is 600 seconds. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.468.719 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:51] NodeBase] Node timeout after exception is 30 seconds. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.468.747 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.468.926 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471738298608 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.469.029 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471729905904 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.469.055 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.469.158 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471721513200 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.469.267 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471713120496 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.469.307 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:170] Register] Start connecting heartbeat client. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.469.327 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.469.514 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:55566, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.469.540 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(187818,ffff3df9a0f0,python):2025-02-07-15:57:57.469.571 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55566 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.568.571 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.568.594 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.568.888 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:222] Register] The compute graph node: 2 has been registered successfully. [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:57.569.003 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [INFO] DISTRIBUTED(187764,ffff127fc0f0,python):2025-02-07-15:57:57.569.072 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:270] Heartbeat] Interval of heartbeat lower and upper are 3 and 5 [INFO] DISTRIBUTED(187764,ffff127fc0f0,python):2025-02-07-15:57:57.569.101 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:274] Heartbeat] The heartbeat thread is started. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.610.653 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.610.677 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.610.691 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:178] Register] Start connecting business client. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.610.708 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.610.807 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:55568, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:57.610.829 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(187789,ffff2b2fe0f0,python):2025-02-07-15:57:57.610.837 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55568 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:57:57.614.686 [mindspore/profiler/common/registry.py:36] registered module: CpuProfiler with name: CPU [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:57:57.615.260 [mindspore/profiler/common/registry.py:36] registered module: GpuProfiler with name: GPU [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:57:57.625.000 [mindspore/profiler/common/registry.py:36] registered module: NpuProfiler with name: Ascend [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.749.467 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.749.491 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.749.715 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:222] Register] The compute graph node: 1 has been registered successfully. [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:57.749.837 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [INFO] DISTRIBUTED(187753,ffff018270f0,python):2025-02-07-15:57:57.749.879 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:270] Heartbeat] Interval of heartbeat lower and upper are 3 and 5 [INFO] DISTRIBUTED(187753,ffff018270f0,python):2025-02-07-15:57:57.749.919 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:274] Heartbeat] The heartbeat thread is started. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:57:57.797.097 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:57:57.797.204 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:57:57.797.617 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:57:57.797.668 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:57:57.797.909 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:466] GetDeviceContext] Device context of device Ascend is not created yet. [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:57:57.797.952 [mindspore/hal/device.py:158] Backend Ascend is not created yet. [INFO] PS(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.133 [mindspore/ccsrc/ps/ps_context.cc:256] set_ms_role] MS_ROLE of this node is MS_WORKER [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.177 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:46] NodeBase] Cluster topo timeout is 600 seconds. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.196 [mindspore/ccsrc/include/backend/distributed/cluster/topology/node_base.h:51] NodeBase] Node timeout after exception is 30 seconds. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.225 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.455 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471559221488 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.569 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471550828784 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.593 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:29] TCPClient] Tcp client receiving message timeout is 15 seconds. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.689 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:RECV_EVENT_LOOP,loop_thread_:281471542436080 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.804 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:189] Initialize] Set pthread name success name:SEND_EVENT_LOOP,loop_thread_:281471534043376 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.854 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:170] Register] Start connecting heartbeat client. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.798.873 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.799.111 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 18 source: 127.0.0.1:55570, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:57.799.149 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(187834,ffff334d20f0,python):2025-02-07-15:57:57.799.165 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55570 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.823.540 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.823.578 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.823.597 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:178] Register] Start connecting business client. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.823.615 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.823.736 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:55572, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:57.823.759 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(187803,ffff14c6f0f0,python):2025-02-07-15:57:57.823.773 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55572 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.935.180 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.935.207 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.935.465 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:222] Register] The compute graph node: 3 has been registered successfully. [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:57.935.591 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [INFO] DISTRIBUTED(187775,ffff393ba0f0,python):2025-02-07-15:57:57.935.618 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:270] Heartbeat] Interval of heartbeat lower and upper are 3 and 5 [INFO] DISTRIBUTED(187775,ffff393ba0f0,python):2025-02-07-15:57:57.935.643 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:274] Heartbeat] The heartbeat thread is started. [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:57.964.687 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.969.623 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.969.647 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.969.663 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:178] Register] Start connecting business client. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.969.680 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.969.770 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:55574, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:57.969.791 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(187818,ffff3ef9c0f0,python):2025-02-07-15:57:57.969.807 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55574 to 127.0.0.1:10001 is successfully created. System errno: Success [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:58.069.085 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:58.110.908 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:58.110.932 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:58.111.158 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:222] Register] The compute graph node: 4 has been registered successfully. [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:58.111.255 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [INFO] DISTRIBUTED(187789,ffff292fa0f0,python):2025-02-07-15:57:58.111.312 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:270] Heartbeat] Interval of heartbeat lower and upper are 3 and 5 [INFO] DISTRIBUTED(187789,ffff292fa0f0,python):2025-02-07-15:57:58.111.338 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:274] Heartbeat] The heartbeat thread is started. [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:58.249.916 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.299.237 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.299.265 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.299.280 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:178] Register] Start connecting business client. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.299.295 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:441] Connect] Can not found link destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.299.403 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:55576, destination: 127.0.0.1:10001 [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.299.423 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:10001 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(187834,ffff344d40f0,python):2025-02-07-15:57:58.299.440 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:55576 to 127.0.0.1:10001 is successfully created. System errno: Success [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:58.323.851 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:58.323.884 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:58.324.152 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:222] Register] The compute graph node: 5 has been registered successfully. [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:58.324.315 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [INFO] DISTRIBUTED(187803,ffff0e7fc0f0,python):2025-02-07-15:57:58.324.373 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:270] Heartbeat] Interval of heartbeat lower and upper are 3 and 5 [INFO] DISTRIBUTED(187803,ffff0e7fc0f0,python):2025-02-07-15:57:58.324.403 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:274] Heartbeat] The heartbeat thread is started. [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:58.435.669 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:58.464.765 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:58.469.867 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:58.469.891 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:58.470.098 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:222] Register] The compute graph node: 6 has been registered successfully. [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:58.470.187 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [INFO] DISTRIBUTED(187818,ffff3cf980f0,python):2025-02-07-15:57:58.470.252 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:270] Heartbeat] Interval of heartbeat lower and upper are 3 and 5 [INFO] DISTRIBUTED(187818,ffff3cf980f0,python):2025-02-07-15:57:58.470.281 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:274] Heartbeat] The heartbeat thread is started. [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:58.569.164 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:58.611.335 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:58.749.998 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.799.518 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:505] Connect] Connected to destination: 127.0.0.1:10001 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.799.551 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:73] Connect] Connected to the tcp server 127.0.0.1:10001 successfully. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.799.867 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:222] Register] The compute graph node: 7 has been registered successfully. [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:58.799.983 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [INFO] DISTRIBUTED(187834,ffff324d00f0,python):2025-02-07-15:57:58.800.047 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:270] Heartbeat] Interval of heartbeat lower and upper are 3 and 5 [INFO] DISTRIBUTED(187834,ffff324d00f0,python):2025-02-07-15:57:58.800.075 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:274] Heartbeat] The heartbeat thread is started. [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:58.824.400 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:58.935.754 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:58.964.848 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:58.970.275 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.069.287 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.111.420 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.250.085 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.089 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:246] BuildCluster] [PROF]BuildCluster costs 1501.89 msec. [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.113 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.130 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:341] PostProcess] Start post processing for computing graph nodes. [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.147 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 7 rank id: 7 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.162 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:359] PostProcess] Client ip address in this cluster of this compute graph node is 127.0.0.1 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.180 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:370] PostProcess] Assigned for this worker port range is 9910 to 10165 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.202 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:252] BuildCluster] [PROF]PostBuildCluster costs 0.067 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.242 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:183] node_num] Number of role MS_WORKER is 8 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.264 [mindspore/ccsrc/distributed/init.cc:46] Initialize] [PROF]distributed_cluster_init costs 1502.17 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.285 [mindspore/ccsrc/distributed/collective/collective_manager.cc:174] Initialize] Start initializing collective communication for backend: Ascend... [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.348 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:48] MsCollectiveCommLib] Global group name of MindSpore collective communication library is mccl_world_group [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.364 [mindspore/ccsrc/distributed/collective/collective_manager.cc:537] InitHostCommlib] Start initializing communication library on host side... [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.383 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:81] Initialize] Query retry count is 400 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.398 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:89] Initialize] Interval of retry allgather hostname lower and upper are 1 and 2 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.425 [mindspore/ccsrc/distributed/collective/collective_manager.cc:557] InitHostCommlib] Communication library on host side is successfully initialized. Global rank id: 7, global rank size: 8 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.444 [mindspore/ccsrc/distributed/collective/collective_manager.cc:197] Initialize] [PROF]InitHostCommlib costs 0.138 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.462 [mindspore/ccsrc/distributed/collective/collective_manager.cc:600] AssignLocalRank] Host name for rank 7 is ascend85 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.648 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:564] GetHostNames] Worker gets host names {"hostnames":["ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85"]} [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.733 [mindspore/ccsrc/distributed/collective/collective_manager.cc:621] AssignLocalRank] Successfully get all nodes' hostname. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.755 [mindspore/ccsrc/distributed/collective/collective_manager.cc:638] AssignLocalRank] The local rank id assigned for this process is 7 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.781 [mindspore/ccsrc/distributed/collective/collective_manager.cc:639] AssignLocalRank] The env 'DEVICE_ID' assigned for this process is: 7 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.802 [mindspore/ccsrc/distributed/collective/collective_manager.cc:657] AssignLocalRank] The device_id of ms_context is set to local rank id [7]. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.824 [mindspore/ccsrc/distributed/collective/collective_manager.cc:205] Initialize] [PROF]AssignLocalRank costs 0.358 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.904 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:221] Initialize] Start initializing device context. [INFO] CORE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.300.935 [mindspore/core/utils/ms_context.cc:452] PrintJitLevelAndExecMode] The jit_level is: O0, and enable kernelbykernel executor in the GRAPH mode. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.301.033 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:307] Init] Start inititializing kernel runtime(SetDevice & CreateDefaultStream & MemoryManagerInitialization etc). [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.301.051 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:646] SetRtDevice] Enter SetRtDevice, current initialize device number:0 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.515 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:246] BuildCluster] [PROF]BuildCluster costs 2002.18 msec. [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.540 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.557 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:341] PostProcess] Start post processing for computing graph nodes. [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.578 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 5 rank id: 5 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.596 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:359] PostProcess] Client ip address in this cluster of this compute graph node is 127.0.0.1 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.618 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:370] PostProcess] Assigned for this worker port range is 9398 to 9653 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.674 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:252] BuildCluster] [PROF]PostBuildCluster costs 0.113 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.708 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:183] node_num] Number of role MS_WORKER is 8 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.734 [mindspore/ccsrc/distributed/init.cc:46] Initialize] [PROF]distributed_cluster_init costs 2002.51 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.760 [mindspore/ccsrc/distributed/collective/collective_manager.cc:174] Initialize] Start initializing collective communication for backend: Ascend... [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.844 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:48] MsCollectiveCommLib] Global group name of MindSpore collective communication library is mccl_world_group [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.866 [mindspore/ccsrc/distributed/collective/collective_manager.cc:537] InitHostCommlib] Start initializing communication library on host side... [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.890 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:81] Initialize] Query retry count is 400 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.908 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:89] Initialize] Interval of retry allgather hostname lower and upper are 1 and 2 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.941 [mindspore/ccsrc/distributed/collective/collective_manager.cc:557] InitHostCommlib] Communication library on host side is successfully initialized. Global rank id: 5, global rank size: 8 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.964 [mindspore/ccsrc/distributed/collective/collective_manager.cc:197] Initialize] [PROF]InitHostCommlib costs 0.178 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.324.986 [mindspore/ccsrc/distributed/collective/collective_manager.cc:600] AssignLocalRank] Host name for rank 5 is ascend85 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.116 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:564] GetHostNames] Worker gets host names {"hostnames":["ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85"]} [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.190 [mindspore/ccsrc/distributed/collective/collective_manager.cc:621] AssignLocalRank] Successfully get all nodes' hostname. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.226 [mindspore/ccsrc/distributed/collective/collective_manager.cc:638] AssignLocalRank] The local rank id assigned for this process is 5 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.243 [mindspore/ccsrc/distributed/collective/collective_manager.cc:639] AssignLocalRank] The env 'DEVICE_ID' assigned for this process is: 5 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.286 [mindspore/ccsrc/distributed/collective/collective_manager.cc:657] AssignLocalRank] The device_id of ms_context is set to local rank id [5]. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.310 [mindspore/ccsrc/distributed/collective/collective_manager.cc:205] Initialize] [PROF]AssignLocalRank costs 0.322 msec. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.398 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:221] Initialize] Start initializing device context. [INFO] CORE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.434 [mindspore/core/utils/ms_context.cc:452] PrintJitLevelAndExecMode] The jit_level is: O0, and enable kernelbykernel executor in the GRAPH mode. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.544 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:307] Init] Start inititializing kernel runtime(SetDevice & CreateDefaultStream & MemoryManagerInitialization etc). [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.325.567 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:646] SetRtDevice] Enter SetRtDevice, current initialize device number:0 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.435.868 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:246] BuildCluster] [PROF]BuildCluster costs 2502.37 msec. [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.435.892 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.435.910 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:341] PostProcess] Start post processing for computing graph nodes. [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.435.931 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 3 rank id: 3 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.435.948 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:359] PostProcess] Client ip address in this cluster of this compute graph node is 127.0.0.1 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.435.968 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:370] PostProcess] Assigned for this worker port range is 8886 to 9141 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.435.992 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:252] BuildCluster] [PROF]PostBuildCluster costs 0.078 msec. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.020 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:183] node_num] Number of role MS_WORKER is 8 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.044 [mindspore/ccsrc/distributed/init.cc:46] Initialize] [PROF]distributed_cluster_init costs 2502.66 msec. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.067 [mindspore/ccsrc/distributed/collective/collective_manager.cc:174] Initialize] Start initializing collective communication for backend: Ascend... [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.194 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:48] MsCollectiveCommLib] Global group name of MindSpore collective communication library is mccl_world_group [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.213 [mindspore/ccsrc/distributed/collective/collective_manager.cc:537] InitHostCommlib] Start initializing communication library on host side... [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.235 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:81] Initialize] Query retry count is 400 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.253 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:89] Initialize] Interval of retry allgather hostname lower and upper are 1 and 2 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.282 [mindspore/ccsrc/distributed/collective/collective_manager.cc:557] InitHostCommlib] Communication library on host side is successfully initialized. Global rank id: 3, global rank size: 8 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.304 [mindspore/ccsrc/distributed/collective/collective_manager.cc:197] Initialize] [PROF]InitHostCommlib costs 0.213 msec. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.324 [mindspore/ccsrc/distributed/collective/collective_manager.cc:600] AssignLocalRank] Host name for rank 3 is ascend85 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.446 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:564] GetHostNames] Worker gets host names {"hostnames":["ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85"]} [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.524 [mindspore/ccsrc/distributed/collective/collective_manager.cc:621] AssignLocalRank] Successfully get all nodes' hostname. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.548 [mindspore/ccsrc/distributed/collective/collective_manager.cc:638] AssignLocalRank] The local rank id assigned for this process is 3 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.564 [mindspore/ccsrc/distributed/collective/collective_manager.cc:639] AssignLocalRank] The env 'DEVICE_ID' assigned for this process is: 3 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.608 [mindspore/ccsrc/distributed/collective/collective_manager.cc:657] AssignLocalRank] The device_id of ms_context is set to local rank id [3]. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.669 [mindspore/ccsrc/distributed/collective/collective_manager.cc:205] Initialize] [PROF]AssignLocalRank costs 0.304 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.754 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:221] Initialize] Start initializing device context. [INFO] CORE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.788 [mindspore/core/utils/ms_context.cc:452] PrintJitLevelAndExecMode] The jit_level is: O0, and enable kernelbykernel executor in the GRAPH mode. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.888 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:307] Init] Start inititializing kernel runtime(SetDevice & CreateDefaultStream & MemoryManagerInitialization etc). [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.436.908 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:646] SetRtDevice] Enter SetRtDevice, current initialize device number:0 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.464.945 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:246] BuildCluster] [PROF]BuildCluster costs 3002.23 msec. [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.464.971 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.464.988 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:341] PostProcess] Start post processing for computing graph nodes. [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.008 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 0 rank id: 0 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.026 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:359] PostProcess] Client ip address in this cluster of this compute graph node is 127.0.0.1 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.048 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:370] PostProcess] Assigned for this worker port range is 8118 to 8373 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.071 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:252] BuildCluster] [PROF]PostBuildCluster costs 0.081 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.098 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:183] node_num] Number of role MS_WORKER is 8 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.122 [mindspore/ccsrc/distributed/init.cc:46] Initialize] [PROF]distributed_cluster_init costs 3002.51 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.147 [mindspore/ccsrc/distributed/collective/collective_manager.cc:174] Initialize] Start initializing collective communication for backend: Ascend... [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.246 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:48] MsCollectiveCommLib] Global group name of MindSpore collective communication library is mccl_world_group [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.266 [mindspore/ccsrc/distributed/collective/collective_manager.cc:537] InitHostCommlib] Start initializing communication library on host side... [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.290 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:81] Initialize] Query retry count is 400 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.309 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:89] Initialize] Interval of retry allgather hostname lower and upper are 1 and 2 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.339 [mindspore/ccsrc/distributed/collective/collective_manager.cc:557] InitHostCommlib] Communication library on host side is successfully initialized. Global rank id: 0, global rank size: 8 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.361 [mindspore/ccsrc/distributed/collective/collective_manager.cc:197] Initialize] [PROF]InitHostCommlib costs 0.189 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.382 [mindspore/ccsrc/distributed/collective/collective_manager.cc:600] AssignLocalRank] Host name for rank 0 is ascend85 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.493 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:564] GetHostNames] Worker gets host names {"hostnames":["ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85"]} [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.565 [mindspore/ccsrc/distributed/collective/collective_manager.cc:621] AssignLocalRank] Successfully get all nodes' hostname. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.589 [mindspore/ccsrc/distributed/collective/collective_manager.cc:638] AssignLocalRank] The local rank id assigned for this process is 0 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.607 [mindspore/ccsrc/distributed/collective/collective_manager.cc:639] AssignLocalRank] The env 'DEVICE_ID' assigned for this process is: 0 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.630 [mindspore/ccsrc/distributed/collective/collective_manager.cc:657] AssignLocalRank] The device_id of ms_context is set to local rank id [0]. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.653 [mindspore/ccsrc/distributed/collective/collective_manager.cc:205] Initialize] [PROF]AssignLocalRank costs 0.268 msec. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.733 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:221] Initialize] Start initializing device context. [INFO] CORE(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.768 [mindspore/core/utils/ms_context.cc:452] PrintJitLevelAndExecMode] The jit_level is: O0, and enable kernelbykernel executor in the GRAPH mode. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.856 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:307] Init] Start inititializing kernel runtime(SetDevice & CreateDefaultStream & MemoryManagerInitialization etc). [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.465.878 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:646] SetRtDevice] Enter SetRtDevice, current initialize device number:0 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.389 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:246] BuildCluster] [PROF]BuildCluster costs 2001.67 msec. [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.419 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.439 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:341] PostProcess] Start post processing for computing graph nodes. [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.459 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 6 rank id: 6 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.477 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:359] PostProcess] Client ip address in this cluster of this compute graph node is 127.0.0.1 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.499 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:370] PostProcess] Assigned for this worker port range is 9654 to 9909 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.523 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:252] BuildCluster] [PROF]PostBuildCluster costs 0.08 msec. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.565 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:183] node_num] Number of role MS_WORKER is 8 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.589 [mindspore/ccsrc/distributed/init.cc:46] Initialize] [PROF]distributed_cluster_init costs 2001.96 msec. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.614 [mindspore/ccsrc/distributed/collective/collective_manager.cc:174] Initialize] Start initializing collective communication for backend: Ascend... [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.656 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:48] MsCollectiveCommLib] Global group name of MindSpore collective communication library is mccl_world_group [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.674 [mindspore/ccsrc/distributed/collective/collective_manager.cc:537] InitHostCommlib] Start initializing communication library on host side... [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.695 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:81] Initialize] Query retry count is 400 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.714 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:89] Initialize] Interval of retry allgather hostname lower and upper are 1 and 2 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.741 [mindspore/ccsrc/distributed/collective/collective_manager.cc:557] InitHostCommlib] Communication library on host side is successfully initialized. Global rank id: 6, global rank size: 8 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.763 [mindspore/ccsrc/distributed/collective/collective_manager.cc:197] Initialize] [PROF]InitHostCommlib costs 0.125 msec. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.784 [mindspore/ccsrc/distributed/collective/collective_manager.cc:600] AssignLocalRank] Host name for rank 6 is ascend85 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.926 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:564] GetHostNames] Worker gets host names {"hostnames":["ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85"]} [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.470.984 [mindspore/ccsrc/distributed/collective/collective_manager.cc:621] AssignLocalRank] Successfully get all nodes' hostname. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.471.017 [mindspore/ccsrc/distributed/collective/collective_manager.cc:638] AssignLocalRank] The local rank id assigned for this process is 6 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.471.036 [mindspore/ccsrc/distributed/collective/collective_manager.cc:639] AssignLocalRank] The env 'DEVICE_ID' assigned for this process is: 6 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.471.059 [mindspore/ccsrc/distributed/collective/collective_manager.cc:657] AssignLocalRank] The device_id of ms_context is set to local rank id [6]. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.471.082 [mindspore/ccsrc/distributed/collective/collective_manager.cc:205] Initialize] [PROF]AssignLocalRank costs 0.296 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.471.157 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:221] Initialize] Start initializing device context. [INFO] CORE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.471.188 [mindspore/core/utils/ms_context.cc:452] PrintJitLevelAndExecMode] The jit_level is: O0, and enable kernelbykernel executor in the GRAPH mode. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.471.269 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:307] Init] Start inititializing kernel runtime(SetDevice & CreateDefaultStream & MemoryManagerInitialization etc). [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.471.290 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:646] SetRtDevice] Enter SetRtDevice, current initialize device number:0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.504.911 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:112] RegCallback] Register callback thread, stream : 0x4a429b30. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.505.346 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:129] RegCallback] Register callback thread success, stream : 0x4a429b30. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.505.376 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:670] CreateDefaultStream] Create ascend default stream, stream id: 0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.506.813 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:675] CreateDefaultStream] Create ascend communication stream, stream id: 1 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.507.135 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:57] AscendVmmAdapter] VMM align size is 2097152 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.507.204 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:102] IsVmmEnabled] Soc is neither ascend910b nor ascend910_93, vmm is disabled by default. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.507.233 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:175] Initialize] Device MOC Size:32768M, Device free MOC Size:32737M, Reserved MOC size for Other Components(HCCL/rts/etc.):2057M, Recommend Reserved MOC size for Other Components:2046M, User define MindSpore MOC Size:0G, MindSpore Used MOC Size:30680M. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.531.525 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:112] RegCallback] Register callback thread, stream : 0x367ede00. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.531.937 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:129] RegCallback] Register callback thread success, stream : 0x367ede00. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.531.974 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:670] CreateDefaultStream] Create ascend default stream, stream id: 0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.533.399 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:675] CreateDefaultStream] Create ascend communication stream, stream id: 1 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.533.703 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:57] AscendVmmAdapter] VMM align size is 2097152 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.533.764 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:102] IsVmmEnabled] Soc is neither ascend910b nor ascend910_93, vmm is disabled by default. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.533.794 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:175] Initialize] Device MOC Size:32768M, Device free MOC Size:32737M, Reserved MOC size for Other Components(HCCL/rts/etc.):2057M, Recommend Reserved MOC size for Other Components:2046M, User define MindSpore MOC Size:0G, MindSpore Used MOC Size:30680M. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.467 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:246] BuildCluster] [PROF]BuildCluster costs 3001.99 msec. [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.516 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.537 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:341] PostProcess] Start post processing for computing graph nodes. [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.558 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 2 rank id: 2 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.577 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:359] PostProcess] Client ip address in this cluster of this compute graph node is 127.0.0.1 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.598 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:370] PostProcess] Assigned for this worker port range is 8630 to 8885 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.623 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:252] BuildCluster] [PROF]PostBuildCluster costs 0.082 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.656 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:183] node_num] Number of role MS_WORKER is 8 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.680 [mindspore/ccsrc/distributed/init.cc:46] Initialize] [PROF]distributed_cluster_init costs 3002.33 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.705 [mindspore/ccsrc/distributed/collective/collective_manager.cc:174] Initialize] Start initializing collective communication for backend: Ascend... [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.569.800 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:48] MsCollectiveCommLib] Global group name of MindSpore collective communication library is mccl_world_group [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.821 [mindspore/ccsrc/distributed/collective/collective_manager.cc:537] InitHostCommlib] Start initializing communication library on host side... [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.569.842 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:81] Initialize] Query retry count is 400 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.569.859 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:89] Initialize] Interval of retry allgather hostname lower and upper are 1 and 2 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.886 [mindspore/ccsrc/distributed/collective/collective_manager.cc:557] InitHostCommlib] Communication library on host side is successfully initialized. Global rank id: 2, global rank size: 8 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.910 [mindspore/ccsrc/distributed/collective/collective_manager.cc:197] Initialize] [PROF]InitHostCommlib costs 0.181 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.569.930 [mindspore/ccsrc/distributed/collective/collective_manager.cc:600] AssignLocalRank] Host name for rank 2 is ascend85 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.570.080 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:564] GetHostNames] Worker gets host names {"hostnames":["ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85"]} [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.570.154 [mindspore/ccsrc/distributed/collective/collective_manager.cc:621] AssignLocalRank] Successfully get all nodes' hostname. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.570.176 [mindspore/ccsrc/distributed/collective/collective_manager.cc:638] AssignLocalRank] The local rank id assigned for this process is 2 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.570.194 [mindspore/ccsrc/distributed/collective/collective_manager.cc:639] AssignLocalRank] The env 'DEVICE_ID' assigned for this process is: 2 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.570.217 [mindspore/ccsrc/distributed/collective/collective_manager.cc:657] AssignLocalRank] The device_id of ms_context is set to local rank id [2]. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:57:59.570.240 [mindspore/ccsrc/distributed/collective/collective_manager.cc:205] Initialize] [PROF]AssignLocalRank costs 0.308 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.570.310 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:221] Initialize] Start initializing device context. [INFO] CORE(187764,ffff97badc10,python):2025-02-07-15:57:59.570.340 [mindspore/core/utils/ms_context.cc:452] PrintJitLevelAndExecMode] The jit_level is: O0, and enable kernelbykernel executor in the GRAPH mode. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.570.423 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:307] Init] Start inititializing kernel runtime(SetDevice & CreateDefaultStream & MemoryManagerInitialization etc). [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.570.443 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:646] SetRtDevice] Enter SetRtDevice, current initialize device number:0 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.534 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:246] BuildCluster] [PROF]BuildCluster costs 2501.93 msec. [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.560 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.575 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:341] PostProcess] Start post processing for computing graph nodes. [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.594 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 4 rank id: 4 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.612 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:359] PostProcess] Client ip address in this cluster of this compute graph node is 127.0.0.1 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.632 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:370] PostProcess] Assigned for this worker port range is 9142 to 9397 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.654 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:252] BuildCluster] [PROF]PostBuildCluster costs 0.075 msec. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.701 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:183] node_num] Number of role MS_WORKER is 8 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.725 [mindspore/ccsrc/distributed/init.cc:46] Initialize] [PROF]distributed_cluster_init costs 2502.23 msec. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.749 [mindspore/ccsrc/distributed/collective/collective_manager.cc:174] Initialize] Start initializing collective communication for backend: Ascend... [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.832 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:48] MsCollectiveCommLib] Global group name of MindSpore collective communication library is mccl_world_group [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.852 [mindspore/ccsrc/distributed/collective/collective_manager.cc:537] InitHostCommlib] Start initializing communication library on host side... [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.874 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:81] Initialize] Query retry count is 400 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.892 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:89] Initialize] Interval of retry allgather hostname lower and upper are 1 and 2 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.921 [mindspore/ccsrc/distributed/collective/collective_manager.cc:557] InitHostCommlib] Communication library on host side is successfully initialized. Global rank id: 4, global rank size: 8 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.943 [mindspore/ccsrc/distributed/collective/collective_manager.cc:197] Initialize] [PROF]InitHostCommlib costs 0.169 msec. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.611.962 [mindspore/ccsrc/distributed/collective/collective_manager.cc:600] AssignLocalRank] Host name for rank 4 is ascend85 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.081 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:564] GetHostNames] Worker gets host names {"hostnames":["ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85"]} [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.162 [mindspore/ccsrc/distributed/collective/collective_manager.cc:621] AssignLocalRank] Successfully get all nodes' hostname. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.185 [mindspore/ccsrc/distributed/collective/collective_manager.cc:638] AssignLocalRank] The local rank id assigned for this process is 4 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.202 [mindspore/ccsrc/distributed/collective/collective_manager.cc:639] AssignLocalRank] The env 'DEVICE_ID' assigned for this process is: 4 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.245 [mindspore/ccsrc/distributed/collective/collective_manager.cc:657] AssignLocalRank] The device_id of ms_context is set to local rank id [4]. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.268 [mindspore/ccsrc/distributed/collective/collective_manager.cc:205] Initialize] [PROF]AssignLocalRank costs 0.302 msec. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.324 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:221] Initialize] Start initializing device context. [INFO] CORE(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.355 [mindspore/core/utils/ms_context.cc:452] PrintJitLevelAndExecMode] The jit_level is: O0, and enable kernelbykernel executor in the GRAPH mode. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.437 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:307] Init] Start inititializing kernel runtime(SetDevice & CreateDefaultStream & MemoryManagerInitialization etc). [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.612.457 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:646] SetRtDevice] Enter SetRtDevice, current initialize device number:0 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.648.528 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:112] RegCallback] Register callback thread, stream : 0x2615e5e0. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.648.850 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:129] RegCallback] Register callback thread success, stream : 0x2615e5e0. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.648.884 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:670] CreateDefaultStream] Create ascend default stream, stream id: 0 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.650.235 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:675] CreateDefaultStream] Create ascend communication stream, stream id: 1 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.650.504 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:57] AscendVmmAdapter] VMM align size is 2097152 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.650.561 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:102] IsVmmEnabled] Soc is neither ascend910b nor ascend910_93, vmm is disabled by default. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.650.589 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:175] Initialize] Device MOC Size:32768M, Device free MOC Size:32738M, Reserved MOC size for Other Components(HCCL/rts/etc.):2056M, Recommend Reserved MOC size for Other Components:2046M, User define MindSpore MOC Size:0G, MindSpore Used MOC Size:30682M. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.668.567 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:258] MallocFromRts] Call rtMalloc to allocate device memory Success, size: 32170311680 bytes, address start: 0x12c100000000 end: 0x12c87d800000 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.668.665 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:66] Initialize] Ascend Memory Adapter initialize success, Memory Statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Actual peak memory usage: 0M [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.173 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:367] Init] End inititializing kernel runtime. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.274 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:245] SetHcclOptions] Values for hccl options: env_table_file[], simulation_level[], env_rank_id[7], env_device_id[7], enable_hccl[1], UseDynamicCluster[1]. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.302 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:271] SetHcclOptions] No hccl mode. If use hccl, make sure [RANK_TABLE_FILE,RANK_ID,DEVICE_ID] all be set in ENV. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.322 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:495] GetGeOptions] Set ge.exec.jobId to default value 0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.338 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:505] GetGeOptions] Got empty proto lib path, cannot set ge.opsProtoLibPath. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.355 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:204] SetAscendConfig] Set GE topo mode to memory-priority. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.371 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:207] SetAscendConfig] Set staticMemoryPolicy to default mode 2. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.387 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:214] SetAscendConfig] The default value of jit_compile is set to 2. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.403 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:179] SetAscendHF32Config] The default value of allow_matmul_hf32 and allow_conv_hf32 are set by CANN. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.416 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:188] SetAscendHF32Config] allow_matmul_hf32: , allow_conv_hf32: [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:57:59.670.469 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:528] GetGeOptions] Set ge::DETERMINISTIC to 0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.681.829 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:112] RegCallback] Register callback thread, stream : 0x2d36d880. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.682.212 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:129] RegCallback] Register callback thread success, stream : 0x2d36d880. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.682.244 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:670] CreateDefaultStream] Create ascend default stream, stream id: 0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.683.630 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:675] CreateDefaultStream] Create ascend communication stream, stream id: 1 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.683.939 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:57] AscendVmmAdapter] VMM align size is 2097152 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.684.009 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:102] IsVmmEnabled] Soc is neither ascend910b nor ascend910_93, vmm is disabled by default. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.684.040 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:175] Initialize] Device MOC Size:32768M, Device free MOC Size:32733M, Reserved MOC size for Other Components(HCCL/rts/etc.):2057M, Recommend Reserved MOC size for Other Components:2045M, User define MindSpore MOC Size:0G, MindSpore Used MOC Size:30676M. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.690.185 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:112] RegCallback] Register callback thread, stream : 0x2a27bc70. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.690.645 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:129] RegCallback] Register callback thread success, stream : 0x2a27bc70. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.690.676 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:670] CreateDefaultStream] Create ascend default stream, stream id: 0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.692.058 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:675] CreateDefaultStream] Create ascend communication stream, stream id: 1 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.692.406 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:57] AscendVmmAdapter] VMM align size is 2097152 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.692.458 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:102] IsVmmEnabled] Soc is neither ascend910b nor ascend910_93, vmm is disabled by default. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.692.484 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:175] Initialize] Device MOC Size:32768M, Device free MOC Size:32737M, Reserved MOC size for Other Components(HCCL/rts/etc.):2057M, Recommend Reserved MOC size for Other Components:2046M, User define MindSpore MOC Size:0G, MindSpore Used MOC Size:30680M. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.697.569 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:258] MallocFromRts] Call rtMalloc to allocate device memory Success, size: 32170311680 bytes, address start: 0x12c100000000 end: 0x12c87d800000 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.697.634 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:66] Initialize] Ascend Memory Adapter initialize success, Memory Statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Actual peak memory usage: 0M [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.220 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:367] Init] End inititializing kernel runtime. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.324 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:245] SetHcclOptions] Values for hccl options: env_table_file[], simulation_level[], env_rank_id[5], env_device_id[5], enable_hccl[1], UseDynamicCluster[1]. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.354 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:271] SetHcclOptions] No hccl mode. If use hccl, make sure [RANK_TABLE_FILE,RANK_ID,DEVICE_ID] all be set in ENV. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.427 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:495] GetGeOptions] Set ge.exec.jobId to default value 0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.498 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:505] GetGeOptions] Got empty proto lib path, cannot set ge.opsProtoLibPath. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.575 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:204] SetAscendConfig] Set GE topo mode to memory-priority. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.646 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:207] SetAscendConfig] Set staticMemoryPolicy to default mode 2. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.721 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:214] SetAscendConfig] The default value of jit_compile is set to 2. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.740 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:179] SetAscendHF32Config] The default value of allow_matmul_hf32 and allow_conv_hf32 are set by CANN. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.757 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:188] SetAscendHF32Config] allow_matmul_hf32: , allow_conv_hf32: [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:57:59.699.805 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:528] GetGeOptions] Set ge::DETERMINISTIC to 0 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.199 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:246] BuildCluster] [PROF]BuildCluster costs 3002.14 msec. [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.227 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.242 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:341] PostProcess] Start post processing for computing graph nodes. [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.261 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 1 rank id: 1 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.278 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:359] PostProcess] Client ip address in this cluster of this compute graph node is 127.0.0.1 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.296 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:370] PostProcess] Assigned for this worker port range is 8374 to 8629 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.322 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:252] BuildCluster] [PROF]PostBuildCluster costs 0.076 msec. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.354 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:183] node_num] Number of role MS_WORKER is 8 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.385 [mindspore/ccsrc/distributed/init.cc:46] Initialize] [PROF]distributed_cluster_init costs 3002.45 msec. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.411 [mindspore/ccsrc/distributed/collective/collective_manager.cc:174] Initialize] Start initializing collective communication for backend: Ascend... [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.535 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:48] MsCollectiveCommLib] Global group name of MindSpore collective communication library is mccl_world_group [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.556 [mindspore/ccsrc/distributed/collective/collective_manager.cc:537] InitHostCommlib] Start initializing communication library on host side... [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.579 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:81] Initialize] Query retry count is 400 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.597 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:89] Initialize] Interval of retry allgather hostname lower and upper are 1 and 2 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.631 [mindspore/ccsrc/distributed/collective/collective_manager.cc:557] InitHostCommlib] Communication library on host side is successfully initialized. Global rank id: 1, global rank size: 8 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.654 [mindspore/ccsrc/distributed/collective/collective_manager.cc:197] Initialize] [PROF]InitHostCommlib costs 0.219 msec. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.675 [mindspore/ccsrc/distributed/collective/collective_manager.cc:600] AssignLocalRank] Host name for rank 1 is ascend85 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.814 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:564] GetHostNames] Worker gets host names {"hostnames":["ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85","ascend85"]} [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.905 [mindspore/ccsrc/distributed/collective/collective_manager.cc:621] AssignLocalRank] Successfully get all nodes' hostname. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.930 [mindspore/ccsrc/distributed/collective/collective_manager.cc:638] AssignLocalRank] The local rank id assigned for this process is 1 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.948 [mindspore/ccsrc/distributed/collective/collective_manager.cc:639] AssignLocalRank] The env 'DEVICE_ID' assigned for this process is: 1 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.973 [mindspore/ccsrc/distributed/collective/collective_manager.cc:657] AssignLocalRank] The device_id of ms_context is set to local rank id [1]. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:57:59.750.997 [mindspore/ccsrc/distributed/collective/collective_manager.cc:205] Initialize] [PROF]AssignLocalRank costs 0.318 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.751.101 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:221] Initialize] Start initializing device context. [INFO] CORE(187753,ffff8292dc10,python):2025-02-07-15:57:59.751.137 [mindspore/core/utils/ms_context.cc:452] PrintJitLevelAndExecMode] The jit_level is: O0, and enable kernelbykernel executor in the GRAPH mode. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.751.321 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:307] Init] Start inititializing kernel runtime(SetDevice & CreateDefaultStream & MemoryManagerInitialization etc). [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.751.344 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:646] SetRtDevice] Enter SetRtDevice, current initialize device number:0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.769.103 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:258] MallocFromRts] Call rtMalloc to allocate device memory Success, size: 32166117376 bytes, address start: 0x12c100000000 end: 0x12c87d400000 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.769.172 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:66] Initialize] Ascend Memory Adapter initialize success, Memory Statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30676M MindSpore memory base address: 0x12c100000000 Actual peak memory usage: 0M [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.659 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:367] Init] End inititializing kernel runtime. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.743 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:245] SetHcclOptions] Values for hccl options: env_table_file[], simulation_level[], env_rank_id[0], env_device_id[0], enable_hccl[1], UseDynamicCluster[1]. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.772 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:271] SetHcclOptions] No hccl mode. If use hccl, make sure [RANK_TABLE_FILE,RANK_ID,DEVICE_ID] all be set in ENV. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.795 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:495] GetGeOptions] Set ge.exec.jobId to default value 0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.814 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:505] GetGeOptions] Got empty proto lib path, cannot set ge.opsProtoLibPath. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.834 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:204] SetAscendConfig] Set GE topo mode to memory-priority. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.852 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:207] SetAscendConfig] Set staticMemoryPolicy to default mode 2. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.872 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:214] SetAscendConfig] The default value of jit_compile is set to 2. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.891 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:179] SetAscendHF32Config] The default value of allow_matmul_hf32 and allow_conv_hf32 are set by CANN. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.908 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:188] SetAscendHF32Config] allow_matmul_hf32: , allow_conv_hf32: [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:57:59.770.963 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:528] GetGeOptions] Set ge::DETERMINISTIC to 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.773.697 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:112] RegCallback] Register callback thread, stream : 0x20a1d080. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.773.955 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:129] RegCallback] Register callback thread success, stream : 0x20a1d080. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.773.984 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:670] CreateDefaultStream] Create ascend default stream, stream id: 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.775.366 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:675] CreateDefaultStream] Create ascend communication stream, stream id: 1 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.775.642 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:57] AscendVmmAdapter] VMM align size is 2097152 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.775.695 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:102] IsVmmEnabled] Soc is neither ascend910b nor ascend910_93, vmm is disabled by default. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.775.720 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:175] Initialize] Device MOC Size:32768M, Device free MOC Size:32737M, Reserved MOC size for Other Components(HCCL/rts/etc.):2057M, Recommend Reserved MOC size for Other Components:2046M, User define MindSpore MOC Size:0G, MindSpore Used MOC Size:30680M. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.804.831 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:258] MallocFromRts] Call rtMalloc to allocate device memory Success, size: 32172408832 bytes, address start: 0x12c100000000 end: 0x12c87da00000 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.804.885 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:66] Initialize] Ascend Memory Adapter initialize success, Memory Statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30682M MindSpore memory base address: 0x12c100000000 Actual peak memory usage: 0M [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.486 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:367] Init] End inititializing kernel runtime. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.586 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:245] SetHcclOptions] Values for hccl options: env_table_file[], simulation_level[], env_rank_id[3], env_device_id[3], enable_hccl[1], UseDynamicCluster[1]. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.613 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:271] SetHcclOptions] No hccl mode. If use hccl, make sure [RANK_TABLE_FILE,RANK_ID,DEVICE_ID] all be set in ENV. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.683 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:495] GetGeOptions] Set ge.exec.jobId to default value 0 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.749 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:505] GetGeOptions] Got empty proto lib path, cannot set ge.opsProtoLibPath. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.826 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:204] SetAscendConfig] Set GE topo mode to memory-priority. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.888 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:207] SetAscendConfig] Set staticMemoryPolicy to default mode 2. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.959 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:214] SetAscendConfig] The default value of jit_compile is set to 2. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.977 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:179] SetAscendHF32Config] The default value of allow_matmul_hf32 and allow_conv_hf32 are set by CANN. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.806.993 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:188] SetAscendHF32Config] allow_matmul_hf32: , allow_conv_hf32: [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:57:59.807.046 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:528] GetGeOptions] Set ge::DETERMINISTIC to 0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.827.181 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:112] RegCallback] Register callback thread, stream : 0x2158b480. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.827.465 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:129] RegCallback] Register callback thread success, stream : 0x2158b480. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.827.498 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:670] CreateDefaultStream] Create ascend default stream, stream id: 0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.829.049 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:675] CreateDefaultStream] Create ascend communication stream, stream id: 1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.829.355 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:57] AscendVmmAdapter] VMM align size is 2097152 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.829.400 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:102] IsVmmEnabled] Soc is neither ascend910b nor ascend910_93, vmm is disabled by default. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.829.427 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:175] Initialize] Device MOC Size:32768M, Device free MOC Size:32737M, Reserved MOC size for Other Components(HCCL/rts/etc.):2057M, Recommend Reserved MOC size for Other Components:2046M, User define MindSpore MOC Size:0G, MindSpore Used MOC Size:30680M. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.839.288 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:258] MallocFromRts] Call rtMalloc to allocate device memory Success, size: 32170311680 bytes, address start: 0x12c100000000 end: 0x12c87d800000 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.839.339 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:66] Initialize] Ascend Memory Adapter initialize success, Memory Statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Actual peak memory usage: 0M [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.840.835 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:367] Init] End inititializing kernel runtime. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.840.914 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:245] SetHcclOptions] Values for hccl options: env_table_file[], simulation_level[], env_rank_id[6], env_device_id[6], enable_hccl[1], UseDynamicCluster[1]. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.840.942 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:271] SetHcclOptions] No hccl mode. If use hccl, make sure [RANK_TABLE_FILE,RANK_ID,DEVICE_ID] all be set in ENV. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.840.964 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:495] GetGeOptions] Set ge.exec.jobId to default value 0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.840.983 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:505] GetGeOptions] Got empty proto lib path, cannot set ge.opsProtoLibPath. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.841.003 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:204] SetAscendConfig] Set GE topo mode to memory-priority. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.841.022 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:207] SetAscendConfig] Set staticMemoryPolicy to default mode 2. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.841.039 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:214] SetAscendConfig] The default value of jit_compile is set to 2. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.841.059 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:179] SetAscendHF32Config] The default value of allow_matmul_hf32 and allow_conv_hf32 are set by CANN. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.841.076 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:188] SetAscendHF32Config] allow_matmul_hf32: , allow_conv_hf32: [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:57:59.841.153 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:528] GetGeOptions] Set ge::DETERMINISTIC to 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.853.247 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:258] MallocFromRts] Call rtMalloc to allocate device memory Success, size: 32170311680 bytes, address start: 0x12c100000000 end: 0x12c87d800000 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.853.308 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:66] Initialize] Ascend Memory Adapter initialize success, Memory Statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Actual peak memory usage: 0M [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.854.794 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:367] Init] End inititializing kernel runtime. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.854.871 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:245] SetHcclOptions] Values for hccl options: env_table_file[], simulation_level[], env_rank_id[2], env_device_id[2], enable_hccl[1], UseDynamicCluster[1]. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.854.895 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:271] SetHcclOptions] No hccl mode. If use hccl, make sure [RANK_TABLE_FILE,RANK_ID,DEVICE_ID] all be set in ENV. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.854.916 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:495] GetGeOptions] Set ge.exec.jobId to default value 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.854.934 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:505] GetGeOptions] Got empty proto lib path, cannot set ge.opsProtoLibPath. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.854.954 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:204] SetAscendConfig] Set GE topo mode to memory-priority. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.854.972 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:207] SetAscendConfig] Set staticMemoryPolicy to default mode 2. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.854.991 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:214] SetAscendConfig] The default value of jit_compile is set to 2. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.855.010 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:179] SetAscendHF32Config] The default value of allow_matmul_hf32 and allow_conv_hf32 are set by CANN. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.855.027 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:188] SetAscendHF32Config] allow_matmul_hf32: , allow_conv_hf32: [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:57:59.855.079 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:528] GetGeOptions] Set ge::DETERMINISTIC to 0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.972.788 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:258] MallocFromRts] Call rtMalloc to allocate device memory Success, size: 32170311680 bytes, address start: 0x12c100000000 end: 0x12c87d800000 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.972.841 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:66] Initialize] Ascend Memory Adapter initialize success, Memory Statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Actual peak memory usage: 0M [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.347 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:367] Init] End inititializing kernel runtime. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.445 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:245] SetHcclOptions] Values for hccl options: env_table_file[], simulation_level[], env_rank_id[4], env_device_id[4], enable_hccl[1], UseDynamicCluster[1]. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.472 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:271] SetHcclOptions] No hccl mode. If use hccl, make sure [RANK_TABLE_FILE,RANK_ID,DEVICE_ID] all be set in ENV. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.493 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:495] GetGeOptions] Set ge.exec.jobId to default value 0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.511 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:505] GetGeOptions] Got empty proto lib path, cannot set ge.opsProtoLibPath. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.529 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:204] SetAscendConfig] Set GE topo mode to memory-priority. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.546 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:207] SetAscendConfig] Set staticMemoryPolicy to default mode 2. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.565 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:214] SetAscendConfig] The default value of jit_compile is set to 2. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.582 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:179] SetAscendHF32Config] The default value of allow_matmul_hf32 and allow_conv_hf32 are set by CANN. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.599 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:188] SetAscendHF32Config] allow_matmul_hf32: , allow_conv_hf32: [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:57:59.974.630 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:528] GetGeOptions] Set ge::DETERMINISTIC to 0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.975.626 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:112] RegCallback] Register callback thread, stream : 0x1f655490. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.975.934 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:129] RegCallback] Register callback thread success, stream : 0x1f655490. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.975.972 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:670] CreateDefaultStream] Create ascend default stream, stream id: 0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.977.400 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:675] CreateDefaultStream] Create ascend communication stream, stream id: 1 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.977.724 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:57] AscendVmmAdapter] VMM align size is 2097152 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.977.818 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:102] IsVmmEnabled] Soc is neither ascend910b nor ascend910_93, vmm is disabled by default. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:57:59.977.850 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:175] Initialize] Device MOC Size:32768M, Device free MOC Size:32735M, Reserved MOC size for Other Components(HCCL/rts/etc.):2057M, Recommend Reserved MOC size for Other Components:2045M, User define MindSpore MOC Size:0G, MindSpore Used MOC Size:30678M. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.048.765 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:258] MallocFromRts] Call rtMalloc to allocate device memory Success, size: 32168214528 bytes, address start: 0x12c100000000 end: 0x12c87d600000 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.048.838 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:66] Initialize] Ascend Memory Adapter initialize success, Memory Statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30678M MindSpore memory base address: 0x12c100000000 Actual peak memory usage: 0M [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.322 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:367] Init] End inititializing kernel runtime. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.414 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:245] SetHcclOptions] Values for hccl options: env_table_file[], simulation_level[], env_rank_id[1], env_device_id[1], enable_hccl[1], UseDynamicCluster[1]. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.443 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:271] SetHcclOptions] No hccl mode. If use hccl, make sure [RANK_TABLE_FILE,RANK_ID,DEVICE_ID] all be set in ENV. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.466 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:495] GetGeOptions] Set ge.exec.jobId to default value 0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.485 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:505] GetGeOptions] Got empty proto lib path, cannot set ge.opsProtoLibPath. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.505 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:204] SetAscendConfig] Set GE topo mode to memory-priority. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.524 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:207] SetAscendConfig] Set staticMemoryPolicy to default mode 2. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.543 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:214] SetAscendConfig] The default value of jit_compile is set to 2. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.567 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:179] SetAscendHF32Config] The default value of allow_matmul_hf32 and allow_conv_hf32 are set by CANN. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.585 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:188] SetAscendHF32Config] allow_matmul_hf32: , allow_conv_hf32: [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:00.050.642 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:528] GetGeOptions] Set ge::DETERMINISTIC to 0 env 'HCCL_IF_BASE_PORT' is 30000 env 'HCCL_IF_BASE_PORT' is 30000 env 'HCCL_IF_BASE_PORT' is 30000 env 'HCCL_IF_BASE_PORT' is 30000 env 'HCCL_IF_BASE_PORT' is 30000 env 'HCCL_IF_BASE_PORT' is 30000 env 'HCCL_IF_BASE_PORT' is 30000 env 'HCCL_IF_BASE_PORT' is 30000 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.873.683 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:480] GetGeSessionOptions] Set GE atomic clean policy to 1. [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.877.069 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:55] NewSession] Create new GE session success! [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.877.126 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:214] SetGeSession] Add a new Ge Session success [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.877.171 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:67] GraphRunner] ME run in ONE_DEVICE strategy mode [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.877.305 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:250] SetGraphRunner] Add a new GraphRunner success [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.877.357 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1610] Initialize] Create session and graphrunner successful. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.877.380 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1614] Initialize] Init ge successful, ge reference = 1. [INFO] COMMON(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.877.590 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:194] GenerateAclInitJson] Generate aclInit json to file : /home/jenkins/mindspore/testcases/testcases/tests/st/auto_parallel/aclinit.json [INFO] COMMON(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.888.302 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:241] InitializeAcl] Call aclInit successfully [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.888.415 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:248] SetAclOpPrecisionMode] Set aclop PRECISION_MODE: allow_fp32_to_fp16 [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.888.887 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.889.620 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:272] Initialize] End initializing device context. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.889.708 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.cc:404] LoadCollectiveCommLib] Loading MACCL collective library successfully. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.889.739 [mindspore/ccsrc/distributed/collective/collective_manager.cc:581] InitDeviceCommLib] Start initializing communication library on device side... [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.889.816 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:82] OpenTsd] Device id = 6, rank size = 8. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.890.001 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel _npu_log begins the construction process witch capacity 128 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.902.055 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_dump begins the construction process witch capacity 128 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.902.753 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_summary begins the construction process witch capacity 128 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.903.248 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_image_summary begins the construction process witch capacity 128 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.903.772 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_scalar_summary begins the construction process witch capacity 128 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.904.261 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_histogram_summary begins the construction process witch capacity 128 [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.904.816 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:215] InitHccl] Start init hccl adapter. [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.905.027 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:450] InitKernelInfoStore] Start init hccl kernel info store. [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.905.112 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:467] InitKernelInfoStore] Get builder ops_kernel_info_hccl [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.905.246 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:489] InitKernelInfoStore] Init hccl kernel info store success. [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.905.271 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:667] InitHcclExec] Start init hccl exec. [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.908.870 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:673] InitHcclExec] Hcom DynamicKernel Initialize success [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.908.914 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:679] InitHcclExec] InitHcclExec success [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.908.935 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:233] InitHccl] Init hccl adapter success. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.908.975 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:87] Initialize] Successfully initialize HCCL. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.908.999 [mindspore/ccsrc/distributed/collective/collective_manager.cc:588] InitDeviceCommLib] Communication library on device side is successfully initialized. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.044 [mindspore/ccsrc/distributed/collective/collective_manager.cc:210] Initialize] [PROF]InitDeviceBackend costs 4437.92 msec. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.078 [mindspore/ccsrc/distributed/collective/collective_manager.cc:810] IsAsyncInitGlobalComm] Async initialize global comm: 1. async_conf: 1, is_graph: 1, use_rank_table: 0, simulation: 0, use_mpi: 0, is_ascend: 1 [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.102 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.161 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.026 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.207 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group hccl_world_group [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.231 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.045 msec. [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.302 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.370 [mindspore/ccsrc/distributed/collective/collective_manager.cc:869] SubmitCreateDeviceCommTask] Launch init comm thread. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.408 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for hccl_world_group. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.433 [mindspore/ccsrc/distributed/collective/collective_manager.cc:224] Initialize] [PROF]CreateGlobalCommunicationGroup costs 0.331 msec. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.452 [mindspore/ccsrc/distributed/collective/collective_manager.cc:227] Initialize] End initializing collective communication for backend: Ascend [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.487 [mindspore/ccsrc/distributed/init.cc:56] Initialize] [PROF]distributed_collective_init costs 4438.87 msec. [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:03.909.489 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: hccl_world_group [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.909.516 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:298] RecordInitStatus] Status record: system init. [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:03.909.542 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for hccl_world_group [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:03.909.737 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.127 msec. [WARNING] DEVICE(187818,fffeba7fc0f0,python):2025-02-07-15:58:03.910.211 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.919.658 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:667] GenerateArgumentsKey] Generate a new compile key for new args, key: 0 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.919.738 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:675] GenerateArgumentsKey] New cached args: Arg[0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1624fed0, value: ValueAny) Arg[1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1624fed0, value: ValueAny) [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.920.360 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1263] CompileInner] Start compiling, phase: train.1738915083917166592.281470987261328.0.. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.920.397 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:272] LoadPassesConfig] AUTO_PASSES_OPTIMIZE_PATH: [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.920.478 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] Start compiling 'Net.construct' and it will take a while. Please wait... [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.930.495 [mindspore/ccsrc/backend/graph_compiler/transform.cc:575] CreateBackend] CreateBackend is: ge [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.930.666 [mindspore/ccsrc/debug/debugger/debugger.cc:80] Init] Debugger got device_id: 6 [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:58:03.930.696 [mindspore/ccsrc/debug/debugger/debugger.cc:82] Init] Debugger got device_target: Ascend [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:03.996.324 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:480] GetGeSessionOptions] Set GE atomic clean policy to 1. [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:03.999.644 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:55] NewSession] Create new GE session success! [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:03.999.692 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:214] SetGeSession] Add a new Ge Session success [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:03.999.763 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:67] GraphRunner] ME run in ONE_DEVICE strategy mode [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:03.999.904 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:250] SetGraphRunner] Add a new GraphRunner success [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:03.999.953 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1610] Initialize] Create session and graphrunner successful. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:03.999.974 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1614] Initialize] Init ge successful, ge reference = 1. [INFO] COMMON(187764,ffff97badc10,python):2025-02-07-15:58:04.010.934 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:241] InitializeAcl] Call aclInit successfully [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.011.047 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:248] SetAclOpPrecisionMode] Set aclop PRECISION_MODE: allow_fp32_to_fp16 [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.011.426 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.012.179 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:272] Initialize] End initializing device context. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.012.270 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.cc:404] LoadCollectiveCommLib] Loading MACCL collective library successfully. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.012.297 [mindspore/ccsrc/distributed/collective/collective_manager.cc:581] InitDeviceCommLib] Start initializing communication library on device side... [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.012.374 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:82] OpenTsd] Device id = 2, rank size = 8. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.012.555 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel _npu_log begins the construction process witch capacity 128 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.022.403 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_dump begins the construction process witch capacity 128 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.023.017 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_summary begins the construction process witch capacity 128 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.023.538 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_image_summary begins the construction process witch capacity 128 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.024.032 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_scalar_summary begins the construction process witch capacity 128 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.024.503 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_histogram_summary begins the construction process witch capacity 128 [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.025.047 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:215] InitHccl] Start init hccl adapter. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.025.250 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:450] InitKernelInfoStore] Start init hccl kernel info store. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.025.317 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:467] InitKernelInfoStore] Get builder ops_kernel_info_hccl [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.025.456 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:489] InitKernelInfoStore] Init hccl kernel info store success. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.025.480 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:667] InitHcclExec] Start init hccl exec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.026.268 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:480] GetGeSessionOptions] Set GE atomic clean policy to 1. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.028.932 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:673] InitHcclExec] Hcom DynamicKernel Initialize success [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.028.973 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:679] InitHcclExec] InitHcclExec success [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.029.010 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:233] InitHccl] Init hccl adapter success. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.029.032 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:87] Initialize] Successfully initialize HCCL. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.056 [mindspore/ccsrc/distributed/collective/collective_manager.cc:588] InitDeviceCommLib] Communication library on device side is successfully initialized. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.106 [mindspore/ccsrc/distributed/collective/collective_manager.cc:210] Initialize] [PROF]InitDeviceBackend costs 4458.82 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.140 [mindspore/ccsrc/distributed/collective/collective_manager.cc:810] IsAsyncInitGlobalComm] Async initialize global comm: 1. async_conf: 1, is_graph: 1, use_rank_table: 0, simulation: 0, use_mpi: 0, is_ascend: 1 [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.164 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.220 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.023 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.029.258 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group hccl_world_group [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.282 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.039 msec. [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.354 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.420 [mindspore/ccsrc/distributed/collective/collective_manager.cc:869] SubmitCreateDeviceCommTask] Launch init comm thread. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.454 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for hccl_world_group. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.479 [mindspore/ccsrc/distributed/collective/collective_manager.cc:224] Initialize] [PROF]CreateGlobalCommunicationGroup costs 0.314 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.495 [mindspore/ccsrc/distributed/collective/collective_manager.cc:227] Initialize] End initializing collective communication for backend: Ascend [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.029.536 [mindspore/ccsrc/distributed/init.cc:56] Initialize] [PROF]distributed_collective_init costs 4459.82 msec. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.029.538 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: hccl_world_group [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.029.565 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:298] RecordInitStatus] Status record: system init. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.029.584 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for hccl_world_group [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.029.652 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:55] NewSession] Create new GE session success! [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.029.699 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:214] SetGeSession] Add a new Ge Session success [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.029.781 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:67] GraphRunner] ME run in ONE_DEVICE strategy mode [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.029.802 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.136 msec. [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.029.909 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:250] SetGraphRunner] Add a new GraphRunner success [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.029.962 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1610] Initialize] Create session and graphrunner successful. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.029.985 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1614] Initialize] Init ge successful, ge reference = 1. [WARNING] DEVICE(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.030.238 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.035.634 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:480] GetGeSessionOptions] Set GE atomic clean policy to 1. [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.039.395 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:55] NewSession] Create new GE session success! [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.039.444 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:214] SetGeSession] Add a new Ge Session success [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.039.567 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:67] GraphRunner] ME run in ONE_DEVICE strategy mode [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.039.586 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:667] GenerateArgumentsKey] Generate a new compile key for new args, key: 0 [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.039.707 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:250] SetGraphRunner] Add a new GraphRunner success [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.039.661 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:675] GenerateArgumentsKey] New cached args: Arg[0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xcace510, value: ValueAny) Arg[1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xcace510, value: ValueAny) [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.039.763 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1610] Initialize] Create session and graphrunner successful. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.039.801 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1614] Initialize] Init ge successful, ge reference = 1. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.040.313 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1263] CompileInner] Start compiling, phase: train.1738915084037101568.281470258177424.0.. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.040.355 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:272] LoadPassesConfig] AUTO_PASSES_OPTIMIZE_PATH: [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.040.428 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] Start compiling 'Net.construct' and it will take a while. Please wait... [INFO] COMMON(187834,ffffb35e0c10,python):2025-02-07-15:58:04.041.759 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:241] InitializeAcl] Call aclInit successfully [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.041.870 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:248] SetAclOpPrecisionMode] Set aclop PRECISION_MODE: allow_fp32_to_fp16 [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.042.287 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.043.110 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:272] Initialize] End initializing device context. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.043.242 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.cc:404] LoadCollectiveCommLib] Loading MACCL collective library successfully. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.043.268 [mindspore/ccsrc/distributed/collective/collective_manager.cc:581] InitDeviceCommLib] Start initializing communication library on device side... [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.043.404 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:82] OpenTsd] Device id = 7, rank size = 8. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.043.594 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel _npu_log begins the construction process witch capacity 128 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:04.051.198 [mindspore/ccsrc/backend/graph_compiler/transform.cc:575] CreateBackend] CreateBackend is: ge [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:58:04.051.422 [mindspore/ccsrc/debug/debugger/debugger.cc:80] Init] Debugger got device_id: 2 [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:58:04.051.450 [mindspore/ccsrc/debug/debugger/debugger.cc:82] Init] Debugger got device_target: Ascend [INFO] COMMON(187742,ffffa187dc10,python):2025-02-07-15:58:04.051.575 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:241] InitializeAcl] Call aclInit successfully [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.051.697 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:248] SetAclOpPrecisionMode] Set aclop PRECISION_MODE: allow_fp32_to_fp16 [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.052.076 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.052.923 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:272] Initialize] End initializing device context. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.053.035 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.cc:404] LoadCollectiveCommLib] Loading MACCL collective library successfully. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.053.065 [mindspore/ccsrc/distributed/collective/collective_manager.cc:581] InitDeviceCommLib] Start initializing communication library on device side... [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.053.207 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:82] OpenTsd] Device id = 0, rank size = 8. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.053.395 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel _npu_log begins the construction process witch capacity 128 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.061.355 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_dump begins the construction process witch capacity 128 [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.061.585 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:739] Initialize] The actor thread number: 5, the kernel thread number: 25 [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.061.784 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.062.047 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_summary begins the construction process witch capacity 128 [INFO] SYMBOLIC_SHAPE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.062.337 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.062.366 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.062.382 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.062.459 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.062.534 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.062.575 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_image_summary begins the construction process witch capacity 128 [INFO] SYMBOLIC_SHAPE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.062.549 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.062.642 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1682] Run] Pipeline run [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.062.724 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start bootstrap action. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.063.109 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_scalar_summary begins the construction process witch capacity 128 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.063.629 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_histogram_summary begins the construction process witch capacity 128 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.063.974 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end bootstrap action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.064.008 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 9 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.064.118 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start type_inference action. [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.064.116 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:215] InitHccl] Start init hccl adapter. [INFO] ANALYZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.064.274 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.064.319 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:450] InitKernelInfoStore] Start init hccl kernel info store. [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.064.401 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:467] InitKernelInfoStore] Get builder ops_kernel_info_hccl [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.064.541 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:489] InitKernelInfoStore] Init hccl kernel info store success. [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.064.565 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:667] InitHcclExec] Start init hccl exec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.065.070 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_dump begins the construction process witch capacity 128 [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.068.355 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:673] InitHcclExec] Hcom DynamicKernel Initialize success [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.068.403 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:679] InitHcclExec] InitHcclExec success [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.068.435 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:233] InitHccl] Init hccl adapter success. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.068.516 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:87] Initialize] Successfully initialize HCCL. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.068.570 [mindspore/ccsrc/distributed/collective/collective_manager.cc:588] InitDeviceCommLib] Communication library on device side is successfully initialized. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.068.939 [mindspore/ccsrc/distributed/collective/collective_manager.cc:210] Initialize] [PROF]InitDeviceBackend costs 4603.24 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.068.974 [mindspore/ccsrc/distributed/collective/collective_manager.cc:810] IsAsyncInitGlobalComm] Async initialize global comm: 1. async_conf: 1, is_graph: 1, use_rank_table: 0, simulation: 0, use_mpi: 0, is_ascend: 1 [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.045 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.153 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.025 msec. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.194 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group hccl_world_group [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.218 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.042 msec. [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.299 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.371 [mindspore/ccsrc/distributed/collective/collective_manager.cc:869] SubmitCreateDeviceCommTask] Launch init comm thread. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.404 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for hccl_world_group. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.427 [mindspore/ccsrc/distributed/collective/collective_manager.cc:224] Initialize] [PROF]CreateGlobalCommunicationGroup costs 0.383 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.443 [mindspore/ccsrc/distributed/collective/collective_manager.cc:227] Initialize] End initializing collective communication for backend: Ascend [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.476 [mindspore/ccsrc/distributed/init.cc:56] Initialize] [PROF]distributed_collective_init costs 4604.33 msec. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.069.478 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: hccl_world_group [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.069.504 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:298] RecordInitStatus] Status record: system init. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.069.526 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for hccl_world_group [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.074.523 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 4.9 msec. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.074.758 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group hccl_world_group [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.074.794 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.218 msec. [WARNING] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.074.855 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.074.931 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187742,fffe7b7fe0f0,python):2025-02-07-15:58:04.075.200 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:58:04.779.97 [mindspore/_extends/parse/namespace.py:132] 'Net' object has no attribute or method: '__is_tensors_queue__', so will return None. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.079.443 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.079.532 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: __main___Net_construct_2 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1624fed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1624fed0, value: ValueAny), Parent: } [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.079.877 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end type_inference action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.079.908 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.005 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.243 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.265 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.352 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start graph_reusing action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.415 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end graph_reusing action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.432 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.458 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start inline action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.521 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.620 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.080.699 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass a1a2 start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.081.802 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:667] GenerateArgumentsKey] Generate a new compile key for new args, key: 0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.081.888 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:675] GenerateArgumentsKey] New cached args: Arg[0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x18fb69c0, value: ValueAny) Arg[1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x18fb69c0, value: ValueAny) [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.082.693 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1263] CompileInner] Start compiling, phase: train.1738915084078296064.281470489933200.0.. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.082.740 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:272] LoadPassesConfig] AUTO_PASSES_OPTIMIZE_PATH: [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.082.812 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] Start compiling 'Net.construct' and it will take a while. Please wait... [INFO] PARSER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.087.067 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {cast_ : Prim[Cast]} [INFO] PARSER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.087.362 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:151] WriteVariable] fill_3 update var `value` with node @fill_3:value{[0]: CNode_4, [1]: param_value, [2]: param_type} [INFO] PARSER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.087.678 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {fillv2_ : Prim[FillV2]} [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.088.942 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.090.771 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.expand_dump_flag [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.090.830 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.090.927 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.090.972 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.066 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.109 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.175 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.277 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.296 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.587 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_1.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.616 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.637 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.666 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.688 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.717 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.738 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.762 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.781 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.805 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.839 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parameter_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.867 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.888 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.907 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r1_a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.926 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.952 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.973 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.091.996 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.014 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.036 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.054 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.076 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.093 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.115 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.132 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.155 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.171 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.193 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.211 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.232 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.249 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.269 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.294 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.322 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.341 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.362 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.380 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.402 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.419 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.441 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.458 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.479 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.496 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.518 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.535 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.556 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.573 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.595 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.616 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_2.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.656 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parallel_inline_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.681 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_parallel_inline_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.706 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.732 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.expand_dump_flag [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.753 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.786 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.805 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.830 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.851 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.870 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.895 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.917 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.092.935 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.073 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_1.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.094 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.113 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.138 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.159 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.183 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.203 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.226 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.247 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.270 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.290 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parameter_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.312 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.333 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.351 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r2_a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.368 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.390 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.415 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.438 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.455 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.478 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.496 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.518 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.535 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.557 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.573 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.595 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.613 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.635 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.651 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.674 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.690 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.711 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.728 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.755 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.772 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.794 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.810 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.839 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.857 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.878 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.895 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.916 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.933 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.955 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.972 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.093.994 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.010 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.032 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.051 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_2.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.072 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parallel_inline_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.090 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_parallel_inline_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.116 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parallel_inline_pass.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.140 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass a1a2 end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.166 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end inline action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.185 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.215 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol action. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.240 [mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.cc:223] ForwardHasDynamicShape] Can not find the forward graph, so find the ops in root graph [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.296 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.315 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.349 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pre_auto_parallel action. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.405 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 33 us [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.426 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pre_auto_parallel action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.445 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.094.472 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start insert-virtual-dataset action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.414 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end insert-virtual-dataset action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.449 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.486 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol-second action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.509 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol-second action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.528 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.555 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start dataset_repeat_opt action. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.625 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.649 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2248] GetCommInfo] Get global rank from communication model, the global rank is 6 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.714 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 6, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 6 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.733 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.751 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.770 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 6, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [WARNING] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.798 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:305] BroadcastDataset] For now on, only dataset sink mode support dataset reader optimizer. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.819 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end dataset_repeat_opt action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.837 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.864 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_split action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.899 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:239] PipelineSplit] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.920 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:256] PipelineSplit] The parameter 'stage_num' is: 1. No need Pipeline split. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.955 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_split action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.973 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.095.999 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start optimize action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.049 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.083 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.108 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.160 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.214 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_a start ... [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.240 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.expand_dump_flag [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.264 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.285 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.305 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.342 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.363 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.383 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.412 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.433 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.451 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.755 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_1.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.781 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.803 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.832 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.863 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.892 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.913 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.938 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.959 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.096.984 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.005 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parameter_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.028 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.049 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.067 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.087 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.115 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.133 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.158 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.176 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.201 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.218 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.243 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.260 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.284 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.302 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.333 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.358 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.384 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.402 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.427 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.444 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.468 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.485 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.516 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.534 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.558 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.575 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.610 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.628 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.654 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.671 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.696 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.713 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.737 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.755 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.777 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.793 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.824 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.846 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_2.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.867 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.accelerated_algorithm [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.886 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_accelerated_algorithm [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.916 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.937 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.956 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.974 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.097.994 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.014 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_shard_fg_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.039 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.059 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.077 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_shard_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.106 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard_inline.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.127 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_parallel [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.161 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 13 us [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.185 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_parallel.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.207 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.238 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.269 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 6, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 6 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.287 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.304 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.322 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 6, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.429 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3587] MarkForwardCNode] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.505 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.526 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3182] IsInsertVirtualOutput] The current stage is: 0 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.098.573 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=_VirtualOutput [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:04.098.995 [mindspore/ccsrc/backend/graph_compiler/transform.cc:575] CreateBackend] CreateBackend is: ge [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.368 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualDatasetInfo0 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.404 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:58:04.099.405 [mindspore/ccsrc/debug/debugger/debugger.cc:80] Init] Debugger got device_id: 0 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.484 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1), (1, 1, 1)) [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:58:04.099.434 [mindspore/ccsrc/debug/debugger/debugger.cc:82] Init] Debugger got device_target: Ascend [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.508 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.595 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualDatasetInfo00: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.753 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator MulInfo1 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.777 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.818 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((2, 2, 2), (2, 2, 2)) [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.839 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.899 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.920 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 0 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.941 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.959 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 1 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.099.978 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:503] InferMirrorOps] MulInfo11: No need to insert mirror ops [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.003 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2631] InferAsLossDivisor] MulInfo11: the dev matrix shape is [2, 2, 2], the output tensor map is [2, 1, 0], loss divisor is 1 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.030 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] MulInfo11: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.048 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1334] Init] MulInfo11 : Init success. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.119 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualOutputInfo2 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.140 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.172 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1)) [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.192 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.233 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualOutputInfo22: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.345 [mindspore/ccsrc/frontend/parallel/parameter_manager.cc:1445] HandleCameAndAdaFactorOpt] Adafactor or Came optimizer process start [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.422 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1150] MergeEntireShapeForDynamic] Into MergeEntireShapeForDynamic [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.451 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1163] MergeEntireShapeForDynamic] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.524 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(1) [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.592 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.767 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 2-6 and group name is 2-511848487187618470 [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.811 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-511848487187618470 [const vector]{2, 6}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.880 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.016 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.926 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-511848487187618470 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.956 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.05 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.100.965 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_image_summary begins the construction process witch capacity 128 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.100.995 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.188 msec. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.015 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-511848487187618470 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.083 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 4-6 and group name is 2-5435772415009061329 [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.113 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5435772415009061329 [const vector]{4, 6}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.145 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.004 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.181 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-5435772415009061329 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.205 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.037 msec. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.228 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.114 msec. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.246 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-5435772415009061329 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.294 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 6-7 and group name is 2-6853331267304275293 [WARNING] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.321 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-6853331267304275293 [const vector]{6, 7}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.349 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.002 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.378 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-6853331267304275293 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.401 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.03 msec. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.423 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.101 msec. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.442 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-6853331267304275293 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.508 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.101.557 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_scalar_summary begins the construction process witch capacity 128 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.662 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op0, op=StridedSlice [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.806 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.848 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op0, op=StridedSlice [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.925 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.101.955 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op1, op=StridedSlice [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.102.000 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_histogram_summary begins the construction process witch capacity 128 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.023 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.048 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.068 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(2) [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.139 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.277 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.379 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op1, op=StridedSlice [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.102.451 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:215] InitHccl] Start init hccl adapter. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.466 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.497 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op2, op=StridedSlice [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.566 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.597 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op3, op=StridedSlice [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.665 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.689 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.102.695 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:450] InitKernelInfoStore] Start init hccl kernel info store. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.739 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_6{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 1} [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.102.783 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:467] InitKernelInfoStore] Get builder ops_kernel_info_hccl [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.788 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_8{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_6, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.846 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_9{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_10, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.878 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_11{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 0} [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.102.902 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:489] InitKernelInfoStore] Init hccl kernel info store success. [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.102.927 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:667] InitHcclExec] Start init hccl exec. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.922 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_12{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_11, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.102.968 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_13{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_14, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.103.014 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/Mul-op0->Default/_VirtualOutput-op0(1) [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.103.066 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.103.205 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 7, operator_vector: AllGather, AllGather, Split, Concat, AllGather, Split, Concat [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.103.321 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=AllGather [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.104.436 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_7692796245619514736AllGather_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.104.486 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op0, op=Split [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.104.546 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_12015561575443432111Split_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.104.607 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op0, op=Concat [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.104.732 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8051664706019937323Concat_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.104.765 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op0, op=AllGather [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.105.436 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_5140002550487651858AllGather_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.105.482 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op1, op=Split [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.105.540 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8691182465882856301Split_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.105.612 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op1, op=Concat [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.105.654 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_6614310911506831424Concat_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.105.682 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op1, op=AllGather [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.318 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_1898494724763908338AllGather_ success [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.359 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.406 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_15{[0]: ValueNode PrimFunc_Mul, [1]: CNode_13, [2]: CNode_9} [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.438 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_16{[0]: ValueNode AllGather, [1]: CNode_15} [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.469 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_17{[0]: ValueNode AllGather, [1]: CNode_18} [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.822 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:918] FindCommonMirrorGroup] The common mirror group is:[const vector]{} [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.873 [mindspore/ccsrc/frontend/parallel/parallel_postprocessor.cc:352] HandleGlobalNormScale] Start to process the global norm [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.513 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:673] InitHcclExec] Hcom DynamicKernel Initialize success [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.555 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:679] InitHcclExec] InitHcclExec success [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.573 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:233] InitHccl] Init hccl adapter success. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.593 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:87] Initialize] Successfully initialize HCCL. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.614 [mindspore/ccsrc/distributed/collective/collective_manager.cc:588] InitDeviceCommLib] Communication library on device side is successfully initialized. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.687 [mindspore/ccsrc/distributed/collective/collective_manager.cc:210] Initialize] [PROF]InitDeviceBackend costs 4805.81 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.718 [mindspore/ccsrc/distributed/collective/collective_manager.cc:810] IsAsyncInitGlobalComm] Async initialize global comm: 1. async_conf: 1, is_graph: 1, use_rank_table: 0, simulation: 0, use_mpi: 0, is_ascend: 1 [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.736 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.799 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.031 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.837 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group hccl_world_group [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.860 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.042 msec. [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.106.937 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.936 [mindspore/ccsrc/frontend/parallel/step_parallel.cc:171] StepParallel] Now leaving step parallel, used time: 8703 us [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.106.971 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.002 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.107.008 [mindspore/ccsrc/distributed/collective/collective_manager.cc:869] SubmitCreateDeviceCommTask] Launch init comm thread. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.107.045 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for hccl_world_group. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.052 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp.changed [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.107.072 [mindspore/ccsrc/distributed/collective/collective_manager.cc:224] Initialize] [PROF]CreateGlobalCommunicationGroup costs 0.334 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.107.090 [mindspore/ccsrc/distributed/collective/collective_manager.cc:227] Initialize] End initializing collective communication for backend: Ascend [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.090 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_comm [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.107.125 [mindspore/ccsrc/distributed/init.cc:56] Initialize] [PROF]distributed_collective_init costs 4806.84 msec. [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.144 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_comm.unchanged [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.107.124 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: hccl_world_group [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.168 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_fusion [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.107.159 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:298] RecordInitStatus] Status record: system init. [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.107.184 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for hccl_world_group [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.216 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_fusion.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.239 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.matmul_add_comm_reduction [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.293 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.314 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.357 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_shard_identity [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.378 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_shard_identity [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.107.393 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.146 msec. [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.454 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.476 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_dataset [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.494 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_dataset [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.602 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_dataset.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.627 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.get_grad_eliminate_ [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.647 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_get_grad_eliminate_ [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.713 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.734 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_output [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.753 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_output [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.834 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_output.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_forward [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.906 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_forward.unchanged [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.107.917 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group hccl_world_group [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.928 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_recompute_pass [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.107.949 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.522 msec. [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.953 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_recompute_pass.unchanged [WARNING] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.107.981 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.107.985 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_handle_not_recompute_node_pass [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.108.003 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.005 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.026 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.118 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.141 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.162 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.before_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.180 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_before_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.267 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.before_grad.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.288 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation [WARNING] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:04.108.292 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.334 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.356 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel_renormalize [INFO] ANALYZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.108.507 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.109.246 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.109.280 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.109.679 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.109.710 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.109.913 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.109.941 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.110.008 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.110.065 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 1___main___Net_construct_5 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1624fed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1624fed0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.110.878 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel_renormalize.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.110.921 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.update_top_fg [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.110.948 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.update_top_fg.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.110.971 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cast_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.110.993 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_cast_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.069 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cast_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.092 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_fg_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.140 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_fg_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.162 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation_after_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.219 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.241 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp_send_recv_attached [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.284 [mindspore/ccsrc/frontend/parallel/pass/flash_sp.cc:2977] FlashSPSendRecvNodeAttach] No RA/FlashSP Send/Recv grad is found to be attached. [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.307 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp_send_recv_attached.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.329 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.receive_attached [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.364 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.receive_attached.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.386 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.after_resolve [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.405 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_after_resolve [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.477 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.after_resolve.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.500 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_after_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.519 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_after_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.615 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_after_grad.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.636 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.655 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.724 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.special_op_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.757 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.renormalize [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.778 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.renormalize.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.799 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.add_forward_monad_depend [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.825 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.845 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.868 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_grad.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.887 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.952 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.111.974 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cse [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.165 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cse.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.191 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_3 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.212 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_3 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.231 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.296 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.315 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.372 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.391 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.450 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.467 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.525 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.542 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.600 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.639 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.706 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.725 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.788 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.807 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.869 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.892 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_3.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.920 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.expand_dump_flag [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.943 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.963 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.112.981 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.113.046 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.113.067 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.113.086 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.113.149 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.113.170 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.113.188 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.526 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_1.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.558 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.580 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.642 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.664 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.714 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.748 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.793 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.815 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.879 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parameter_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.904 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.925 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.945 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.114.964 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.020 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.039 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.094 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.113 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.166 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.183 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.237 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.256 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.308 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.326 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.391 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.411 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.465 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.493 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.550 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.569 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.629 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.648 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.714 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.734 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.792 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.810 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.883 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.904 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.957 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.115.974 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.027 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.045 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.097 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.115 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.169 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.188 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.244 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.267 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_2.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.291 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.accelerated_algorithm [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.320 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_accelerated_algorithm [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.383 [mindspore/ccsrc/frontend/optimizer/opt.cc:232] ApplyIRToSubstitutions] There may be a problem. Substitution: opt_a.r2.accelerated_algorithm.less_batch_normalization [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.452 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.accelerated_algorithm.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.477 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.497 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.517 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.539 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.562 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_shard_fg_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.599 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.622 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.656 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_shard_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.729 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard_inline.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.753 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_parallel [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.811 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 38 us [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.836 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_parallel.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.859 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.894 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.917 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.116.976 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.001 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_comm [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.055 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_comm.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.081 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_fusion [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.131 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_fusion.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.154 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.matmul_add_comm_reduction [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.214 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.237 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.260 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.281 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_shard_identity [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.300 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_shard_identity [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.364 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.388 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_dataset [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.407 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_dataset [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.470 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_dataset.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.490 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.get_grad_eliminate_ [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.509 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_get_grad_eliminate_ [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.570 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.592 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_output [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.611 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_output [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.670 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_output.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.691 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_forward [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.735 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_forward.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.755 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_recompute_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.780 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.800 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.818 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.835 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.925 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.947 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.967 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.before_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.117.985 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_before_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.066 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.before_grad.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.087 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.131 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.152 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel_renormalize [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.173 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel_renormalize.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.193 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.update_top_fg [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.213 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.update_top_fg.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.234 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cast_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.252 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_cast_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.315 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cast_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_fg_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.382 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_fg_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.403 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation_after_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.456 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.477 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp_send_recv_attached [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.499 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.520 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.receive_attached [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.541 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.receive_attached.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.562 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.after_resolve [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.587 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_after_resolve [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.655 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.after_resolve.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.118.628 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:667] GenerateArgumentsKey] Generate a new compile key for new args, key: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.677 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_after_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.696 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_after_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.781 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_after_grad.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.118.700 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:675] GenerateArgumentsKey] New cached args: Arg[0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x363fded0, value: ValueAny) Arg[1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x363fded0, value: ValueAny) [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.802 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.820 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.881 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.special_op_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.118.902 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.renormalize [INFO] ANALYZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.119.137 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.119.451 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1263] CompileInner] Start compiling, phase: train.1738915084115859456.281470816413072.0.. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.119.496 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:272] LoadPassesConfig] AUTO_PASSES_OPTIMIZE_PATH: [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.119.600 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.119.570 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] Start compiling 'Net.construct' and it will take a while. Please wait... [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.119.632 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.119.891 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.119.920 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.120.104 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.120.132 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.120.200 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.120.265 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 3_1___main___Net_construct_19 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1624fed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1624fed0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.093 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.renormalize.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.138 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.add_forward_monad_depend [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.171 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.194 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.231 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_grad.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.252 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.318 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.340 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cse [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.514 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cse.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.539 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_3 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.559 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_3 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.579 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.643 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.662 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.716 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.735 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.787 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.804 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.854 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.871 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.923 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.941 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.121.993 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.010 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.063 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.080 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.143 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.166 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_3.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.192 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.expand_dump_flag [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.216 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.expand_dump_flag.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.235 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.253 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.316 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.switch_simplify.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.356 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.418 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.loop_unroll.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.439 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.122.458 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.676 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_1.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.702 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.723 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_recompute_prepare [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.782 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.recompute_prepare.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.804 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_depend_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.855 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.878 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_assign_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.925 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.949 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_loads_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.123.994 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.018 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parameter_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.053 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parameter_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.075 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.095 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.115 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.176 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.194 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.253 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.271 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.324 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.342 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.395 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.412 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.464 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.481 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.541 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.558 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.611 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.640 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.698 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.715 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.769 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.786 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.859 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.877 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.930 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.124.948 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.013 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.032 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.085 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.103 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.156 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.173 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.224 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.242 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.295 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.313 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.362 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.384 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_2.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.405 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.accelerated_algorithm [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.424 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_accelerated_algorithm [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.491 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.513 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.531 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.550 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.576 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.598 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_shard_fg_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.632 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.653 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.672 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_shard_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.733 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard_inline.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.754 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_parallel [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.812 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 38 us [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.836 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_parallel.changed [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.860 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.893 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.916 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.938 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.125.959 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_comm [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.009 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_comm.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.033 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_fusion [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.079 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_fusion.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.102 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.matmul_add_comm_reduction [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.155 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.177 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.199 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.219 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_shard_identity [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.237 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_shard_identity [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.300 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_dataset [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.356 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_dataset [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.417 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_dataset.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.438 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.get_grad_eliminate_ [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.456 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_get_grad_eliminate_ [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.516 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.537 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_output [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.555 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_output [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.614 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_output.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.635 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_forward [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.679 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_forward.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.700 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_recompute_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.723 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.743 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.762 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.780 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.860 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.881 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.901 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.before_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.919 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_before_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.126.999 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.before_grad.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.019 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.062 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.091 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel_renormalize [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.112 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel_renormalize.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.132 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.update_top_fg [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.152 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.update_top_fg.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.172 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cast_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.190 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_cast_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.250 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cast_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.270 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_fg_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.317 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_fg_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.338 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation_after_expand [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.390 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.411 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp_send_recv_attached [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.433 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.453 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.receive_attached [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.476 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.receive_attached.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.496 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.after_resolve [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.514 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_after_resolve [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.578 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.after_resolve.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.599 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_after_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.617 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_after_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.703 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_after_grad.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.724 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.742 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.809 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.special_op_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.830 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.renormalize [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.850 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.renormalize.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.871 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.add_forward_monad_depend [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.893 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.913 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_grad [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.936 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_grad.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.127.957 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.018 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.041 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cse [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.205 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cse.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.228 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_3 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.246 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_3 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.265 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.322 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.340 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.394 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.411 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.464 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.481 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.533 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.551 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.603 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.639 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.696 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.714 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.768 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.784 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.836 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.857 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_3.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.882 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_a end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.909 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute_after_opt_a start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.967 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute_after_opt_a end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.128.992 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_cell_reuse_recomputed_activation start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.012 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_cell_reuse_recomputed_activation end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.035 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_after_opt_a start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.355 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_after_opt_a end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.388 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass convert_after_rewriter start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.436 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass convert_after_rewriter end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.462 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass order_py_execute_after_rewriter start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.501 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass order_py_execute_after_rewriter end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.525 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_b start ... [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.550 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.570 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_b_r1_b_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.589 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: zero_like_fill_zero [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.646 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: zero_like_fill_zero, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.674 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: list_to_tuple_eliminator_ [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.729 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: list_to_tuple_eliminator_, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.746 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_to_list_eliminator_ [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.802 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_to_list_eliminator_, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.819 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.888 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.905 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_const_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.967 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_const_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.129.984 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.038 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.055 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_set_item_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.119 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.137 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_depend_reorder [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.196 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_depend_reorder, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.214 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_convert_item_index_to_positive [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.282 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_convert_item_index_to_positive, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.300 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: make_slice_get_slice_eliminator [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.354 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: make_slice_get_slice_eliminator, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.371 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.431 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.456 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reset_defer_inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.510 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reset_defer_inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.527 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.580 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.597 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.650 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.667 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_pure_node_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.719 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_pure_node_eliminater, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.736 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: load_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.789 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: load_eliminater, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.807 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: stopgrad_eliminater [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.859 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: stopgrad_eliminater, change: 0 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:04.130.822 [mindspore/ccsrc/backend/graph_compiler/transform.cc:575] CreateBackend] CreateBackend is: ge [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.877 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.928 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: special_op_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.946 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.130.996 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.013 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_add_eliminate [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:58:04.131.050 [mindspore/ccsrc/debug/debugger/debugger.cc:80] Init] Debugger got device_id: 7 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.061 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_add_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.079 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_set_eliminate [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:58:04.131.081 [mindspore/ccsrc/debug/debugger/debugger.cc:82] Init] Debugger got device_target: Ascend [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.129 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_set_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.147 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_depend_swap [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.204 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_depend_swap, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.223 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_add_const_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.275 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_add_const_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.292 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: value_based_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.344 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: value_based_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.361 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: parallel_virtual_node [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.414 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: parallel_virtual_node, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.431 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: const_output_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.484 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: const_output_eliminate, change: 0 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.505 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_1.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.527 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.548 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_b_r1_b_2 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.611 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_2.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.633 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.679 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.701 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.745 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.766 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.808 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.830 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.renormalize [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.850 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.renormalize.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.131.870 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.cse [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.018 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.cse.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.060 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_b end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.087 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass optimize_parallel_all_gather_comm start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.140 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass optimize_parallel_all_gather_comm end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.165 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_param_gather start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.185 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_param_gather end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.207 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cconv start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.257 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cconv end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.281 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass loop_unroll start ... [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.953 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start loop_unroll_optimizer.r1.loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.132.991 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, loop_unroll_optimizer_r1_loop_unroll [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.069 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End loop_unroll_optimizer.r1.loop_unroll.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.100 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass loop_unroll end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.127 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_after_cconv start ... [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.151 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.c_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.169 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_after_cconv_r1_c_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.396 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.c_1.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.418 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.parameter_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.440 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.461 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.511 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.534 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.577 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.597 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.650 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.671 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.cse [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.826 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.cse.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.851 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.renormalize [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.873 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.renormalize.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.894 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_after_cconv end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.133.919 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_dup_value start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.205 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_dup_value end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.235 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass tuple_transform start ... [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.258 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.d_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.278 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_trans_graph_r1_d_1 [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.571 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.d_1.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.595 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.renormalize [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.617 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.renormalize.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.639 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass tuple_transform end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.663 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass partial_unused_args_eliminate start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.683 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass partial_unused_args_eliminate end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.705 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_cache_embedding start ... [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.752 [mindspore/ccsrc/frontend/parallel/cache_embedding/cache_embedding.cc:706] AddCacheEmbedding] Parameters are all not cache enable. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.774 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_cache_embedding end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.796 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_recomputation start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.134.988 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_recomputation end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.017 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cse_after_recomputation start ... [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.041 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_recompute.r1.cse [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.149 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_recompute.r1.cse.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.178 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cse_after_recomputation end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.203 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass environ_conv start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.264 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass environ_conv end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.289 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass swap_dp_allreduce_reducescatter start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.332 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass swap_dp_allreduce_reducescatter end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.356 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass bias_add_comm_swap start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.376 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass bias_add_comm_swap end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.398 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_micro_interleaved_index start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.417 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_micro_interleaved_index end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.440 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_fine_grained_interleaved_index start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.460 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_fine_grained_interleaved_index end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.480 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass merge_cast_opt start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.498 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass merge_cast_opt end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.521 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_recompute_activation start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.566 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_recompute_activation end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.589 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass micro_interleaved_order_control start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.608 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass micro_interleaved_order_control end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.629 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass assign_add_opt start ... [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.710 [mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.cc:466] AssignAddOpt] Merge multi matmul assign add begin and concat eliminate enable flag is:0 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.752 [mindspore/ccsrc/frontend/parallel/pass/pass_utils.cc:122] ExtractBackwardMatMul] backward_matmul_dx_dw_map size:0 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.796 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass assign_add_opt end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.820 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass ForceFp32Comm start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.838 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass ForceFp32Comm end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.870 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_cast_before_assign_add start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.910 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_cast_before_assign_add end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.934 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass full_micro_interleaved_order_control start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.953 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass full_micro_interleaved_order_control end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.974 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass reorder_send_recv_between_fp_bp start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.135.993 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass reorder_send_recv_between_fp_bp end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.015 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass comm_op_add_attrs start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.079 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass comm_op_add_attrs end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.104 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_comm_op_reuse_tag start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.176 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_comm_op_reuse_tag end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.201 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_split_concat_branches start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.220 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_split_concat_branches end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.241 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_parallel_branches start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.260 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_parallel_branches end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.281 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_in_pipeline start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.327 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_in_pipeline end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.351 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_grad_in_pipeline start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.372 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_grad_in_pipeline end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.393 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass control_data_broadcast_order start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.412 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass control_data_broadcast_order end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.434 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass grouped_pairwise_exchange_alltoall start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.460 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass grouped_pairwise_exchange_alltoall end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.482 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass offloading_packed_experts start ... [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.507 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:746] SetOffloadingPackedExpert] pass if (parallel::g_device_manager == nullptr) [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.525 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:707] CheckUserSettings] To activate the pass, set_auto_parallel_context 'enable_alltoall' should be true [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.542 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:751] SetOffloadingPackedExpert] CheckUserSettings_not_pass [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.559 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass offloading_packed_experts end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.582 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_and_grad_model_parallel start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.600 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_and_grad_model_parallel end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.621 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_matmul_and_grad_allreduce start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.679 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_matmul_and_grad_allreduce end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.716 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_allgather_and_fa_grad start ... [WARNING] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.735 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.753 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_allgather_and_fa_grad end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.773 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_ring_attention start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.834 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_ring_attention end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.859 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_flash_sp start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.905 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_flash_sp end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.928 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass begin_end_overlap_inline start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.946 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass begin_end_overlap_inline end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.967 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_matmul_comm_elemetwise start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.136.986 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_matmul_comm_elemetwise end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.008 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_layernorm_comm start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.027 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_layernorm_comm end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.048 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass handle_group_info start ... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.077 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass handle_group_info end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.098 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass symbol_engine_optimizer start ... [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.122 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.build [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.155 [mindspore/ccsrc/frontend/optimizer/irpass/symbol_engine_optimizer.cc:39] operator()] There is no dynamic shape node, the SymbolEngineBuilder is disabled. [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.177 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.build.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.197 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_shapecalc [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.217 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_shapecalc [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.277 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_shapecalc.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.298 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_not_effective [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.315 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_not_effective [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.390 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_not_effective.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.411 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.opt_reshape [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.429 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_opt_reshape [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.482 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.opt_reshape.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.503 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.fold_const_symbol [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.520 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_fold_const_symbol [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.592 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.fold_const_symbol.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.613 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.renormalize [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.633 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.renormalize.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.655 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass symbol_engine_optimizer end. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.680 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end optimize action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.699 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.735 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_parallel_scheduler action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.756 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_parallel_scheduler action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.774 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.799 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad_reorder action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.880 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad_reorder action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.899 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.925 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start get_jit_bprop_graph action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.944 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end get_jit_bprop_graph action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.961 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.137.986 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.003 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.021 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.043 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start eliminate_special_op_node action. [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.581 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.ad_related_special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.615 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_ad_related_special_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.678 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.ad_related_special_op_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.703 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.mutable_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.721 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_mutable_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.774 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.mutable_op_eliminate.unchanged [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.795 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.convert_tensor_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.813 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_convert_tensor_op_eliminate [INFO] OPTIMIZER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.865 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.convert_tensor_op_eliminate.unchanged [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.907 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end eliminate_special_op_node action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.928 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.957 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start distribtued_split action. [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.138.982 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:378] GenerateStrategy] Current parallel mode is semi_auto_parallel [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.000 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:390] GenerateStrategy] Generated distributed strategy is 1 [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.133 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:1277] Run] All nodes are on this process so there's no need to build and split distributed graph. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.157 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end distribtued_split action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.175 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.203 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start validate action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.322 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end validate action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.343 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PROFILER(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.418 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc:49] IsProfilingParallelStrategyEnabled] Profiling parallel strategy is disabled. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.495 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start task_emit action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.660 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1679] SetRunMode] Run graph mode with kernel by kernel by configuration. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.139.797 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1057] CompileGraphs] Status record: start compile function graph: 4_3_1___main___Net_construct_20 [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.140.075 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.140.488 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: 4_3_1___main___Net_construct_20 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.140.673 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-511848487187618470, the max communication size is 1 MB. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.140.715 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-511848487187618470, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:04.140.745 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-511848487187618470. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.201.888 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:480] GetGeSessionOptions] Set GE atomic clean policy to 1. [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.205.493 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:55] NewSession] Create new GE session success! [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.205.549 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:214] SetGeSession] Add a new Ge Session success [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.205.623 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:67] GraphRunner] ME run in ONE_DEVICE strategy mode [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.205.761 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:250] SetGraphRunner] Add a new GraphRunner success [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.205.818 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1610] Initialize] Create session and graphrunner successful. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:04.205.737 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:739] Initialize] The actor thread number: 5, the kernel thread number: 25 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.205.844 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1614] Initialize] Init ge successful, ge reference = 1. [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:58:04.205.993 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] SYMBOLIC_SHAPE(187764,ffff97badc10,python):2025-02-07-15:58:04.206.552 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187764,ffff97badc10,python):2025-02-07-15:58:04.206.583 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187764,ffff97badc10,python):2025-02-07-15:58:04.206.601 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187764,ffff97badc10,python):2025-02-07-15:58:04.206.617 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187764,ffff97badc10,python):2025-02-07-15:58:04.206.692 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187764,ffff97badc10,python):2025-02-07-15:58:04.206.766 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.206.860 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1682] Run] Pipeline run [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.206.936 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start bootstrap action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.208.088 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end bootstrap action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.208.127 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 9 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.208.230 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start type_inference action. [INFO] ANALYZER(187764,ffff97badc10,python):2025-02-07-15:58:04.208.381 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] COMMON(187775,ffffba4dbc10,python):2025-02-07-15:58:04.217.029 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:241] InitializeAcl] Call aclInit successfully [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.217.165 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:248] SetAclOpPrecisionMode] Set aclop PRECISION_MODE: allow_fp32_to_fp16 [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.217.585 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.218.426 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:272] Initialize] End initializing device context. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.218.547 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.cc:404] LoadCollectiveCommLib] Loading MACCL collective library successfully. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.218.584 [mindspore/ccsrc/distributed/collective/collective_manager.cc:581] InitDeviceCommLib] Start initializing communication library on device side... [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.218.689 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:82] OpenTsd] Device id = 3, rank size = 8. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.218.923 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel _npu_log begins the construction process witch capacity 128 [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:58:04.222.007 [mindspore/_extends/parse/namespace.py:132] 'Net' object has no attribute or method: '__is_tensors_queue__', so will return None. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.224.618 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187764,ffff97badc10,python):2025-02-07-15:58:04.224.758 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: __main___Net_construct_2 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xcace510, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xcace510, value: ValueAny), Parent: } [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.101 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end type_inference action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.131 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.170 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.405 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.429 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.459 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start graph_reusing action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.479 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end graph_reusing action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.496 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.522 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start inline action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.573 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.652 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.225.679 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass a1a2 start ... [INFO] PARSER(187764,ffff97badc10,python):2025-02-07-15:58:04.231.722 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {cast_ : Prim[Cast]} [INFO] PARSER(187764,ffff97badc10,python):2025-02-07-15:58:04.231.970 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:151] WriteVariable] fill_3 update var `value` with node @fill_3:value{[0]: CNode_4, [1]: param_value, [2]: param_type} [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.231.961 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:480] GetGeSessionOptions] Set GE atomic clean policy to 1. [INFO] PARSER(187764,ffff97badc10,python):2025-02-07-15:58:04.232.237 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {fillv2_ : Prim[FillV2]} [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.076 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.expand_dump_flag [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.122 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.170 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.switch_simplify [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.235.241 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:55] NewSession] Create new GE session success! [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.240 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_switch_simplify [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.235.292 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:214] SetGeSession] Add a new Ge Session success [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.339 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.switch_simplify.unchanged [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.235.437 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:67] GraphRunner] ME run in ONE_DEVICE strategy mode [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.395 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.473 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.504 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.526 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.544 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_a_1 [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.235.573 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:250] SetGraphRunner] Add a new GraphRunner success [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.235.623 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1610] Initialize] Create session and graphrunner successful. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.235.645 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1614] Initialize] Init ge successful, ge reference = 1. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.822 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_1.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.851 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.872 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.899 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.921 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.948 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.968 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.235.991 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.011 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.034 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.069 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parameter_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.095 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.116 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.134 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r1_a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.152 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.176 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.196 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.219 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.236 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.259 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.276 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.297 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.314 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.336 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.353 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.375 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.391 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.412 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.429 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.451 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.468 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.486 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.512 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.543 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.561 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.583 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.600 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.621 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.649 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.674 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.691 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.712 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.729 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.751 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.767 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.789 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.805 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.827 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.848 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_2.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.869 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parallel_inline_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.886 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_parallel_inline_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.913 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.937 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.expand_dump_flag [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.958 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.236.985 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.004 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.030 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.049 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.068 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.093 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.114 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.133 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.273 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_1.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.294 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.312 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.338 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.358 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.382 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.402 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.424 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.445 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.467 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.487 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parameter_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.507 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.526 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.544 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r2_a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.563 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.592 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.610 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.632 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.649 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.671 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.688 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.709 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.726 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.747 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.763 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.784 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.800 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.822 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.839 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.860 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.877 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.898 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.915 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.942 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.959 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.981 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.237.998 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.026 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.043 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.064 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.080 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.102 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.118 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.140 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.157 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.178 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.195 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.215 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.235 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_2.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parallel_inline_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.272 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_parallel_inline_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.238.297 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parallel_inline_pass.unchanged [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.321 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass a1a2 end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.345 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end inline action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.363 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.392 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol action. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.238.416 [mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.cc:223] ForwardHasDynamicShape] Can not find the forward graph, so find the ops in root graph [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.470 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.490 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.524 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pre_auto_parallel action. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.238.566 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 19 us [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.586 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pre_auto_parallel action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.604 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.238.629 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start insert-virtual-dataset action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.467 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end insert-virtual-dataset action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.502 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.535 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol-second action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.557 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol-second action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.575 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.601 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start dataset_repeat_opt action. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.239.680 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.239.703 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2248] GetCommInfo] Get global rank from communication model, the global rank is 2 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.239.756 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 2, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 2 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.239.774 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.239.792 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.239.811 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 2, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [WARNING] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.239.839 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:305] BroadcastDataset] For now on, only dataset sink mode support dataset reader optimizer. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.859 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end dataset_repeat_opt action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.877 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.913 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_split action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.938 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:239] PipelineSplit] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.958 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:256] PipelineSplit] The parameter 'stage_num' is: 1. No need Pipeline split. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.239.991 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_split action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.240.008 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.240.034 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start optimize action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.240.078 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.240.109 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.240.133 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.240.180 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.240.232 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_a start ... [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.257 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.expand_dump_flag [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.281 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.302 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.321 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.355 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.376 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.394 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.424 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.444 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.461 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.760 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_1.unchanged [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:04.240.727 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:739] Initialize] The actor thread number: 5, the kernel thread number: 25 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.789 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.810 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.849 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.870 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.898 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.922 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.947 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.968 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.240.993 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.013 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parameter_eliminate [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:58:04.241.015 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.034 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.055 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.072 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.092 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.119 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.136 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.162 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.180 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.204 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.221 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.245 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.262 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.241.195 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_dump begins the construction process witch capacity 128 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.285 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.303 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.342 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.362 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.386 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.404 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.430 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.448 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.472 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.489 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.519 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.537 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.562 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.579 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.610 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.627 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.653 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.670 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.695 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.711 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.733 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.750 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.774 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.791 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.822 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.844 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_2.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.865 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.accelerated_algorithm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.884 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_accelerated_algorithm [INFO] SYMBOLIC_SHAPE(187742,ffffa187dc10,python):2025-02-07-15:58:04.241.896 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.915 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.937 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.241.958 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.241.954 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_summary begins the construction process witch capacity 128 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.241.977 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] SYMBOLIC_SHAPE(187742,ffffa187dc10,python):2025-02-07-15:58:04.241.927 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.241.997 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard.unchanged [INFO] SYMBOLIC_SHAPE(187742,ffffa187dc10,python):2025-02-07-15:58:04.242.004 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.242.016 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_shard_fg_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.242.041 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.242.061 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard_inline [INFO] SYMBOLIC_SHAPE(187742,ffffa187dc10,python):2025-02-07-15:58:04.242.022 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.242.079 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_shard_inline [INFO] SYMBOLIC_SHAPE(187742,ffffa187dc10,python):2025-02-07-15:58:04.242.092 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.242.108 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard_inline.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.242.129 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_parallel [INFO] SYMBOLIC_SHAPE(187742,ffffa187dc10,python):2025-02-07-15:58:04.242.104 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.159 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 11 us [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.242.181 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_parallel.changed [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.242.195 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1682] Run] Pipeline run [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.242.203 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.242.234 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start bootstrap action. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.234 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.265 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 2, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 2 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.283 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.301 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.328 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 2, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.421 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3587] MarkForwardCNode] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.490 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.509 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3182] IsInsertVirtualOutput] The current stage is: 0 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.242.548 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=_VirtualOutput [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.242.542 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_image_summary begins the construction process witch capacity 128 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.243.091 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_scalar_summary begins the construction process witch capacity 128 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.339 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualDatasetInfo0 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.373 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.445 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1), (1, 1, 1)) [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.468 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.243.468 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end bootstrap action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.243.506 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 9 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.243.558 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start type_inference action. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.555 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualDatasetInfo00: The loss divisor is 1, no need to create virtual div op. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.243.570 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_histogram_summary begins the construction process witch capacity 128 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.698 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator MulInfo1 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.720 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] ANALYZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.243.741 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.756 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((2, 2, 2), (2, 2, 2)) [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.777 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.829 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.849 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 0 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.870 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.887 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 1 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.906 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:503] InferMirrorOps] MulInfo11: No need to insert mirror ops [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.929 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2631] InferAsLossDivisor] MulInfo11: the dev matrix shape is [2, 2, 2], the output tensor map is [2, 1, 0], loss divisor is 1 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.956 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] MulInfo11: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.243.974 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1334] Init] MulInfo11 : Init success. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.041 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualOutputInfo2 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.061 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.093 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1)) [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.113 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.244.132 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:215] InitHccl] Start init hccl adapter. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.147 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualOutputInfo22: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.261 [mindspore/ccsrc/frontend/parallel/parameter_manager.cc:1445] HandleCameAndAdaFactorOpt] Adafactor or Came optimizer process start [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.349 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1150] MergeEntireShapeForDynamic] Into MergeEntireShapeForDynamic [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.378 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1163] MergeEntireShapeForDynamic] Can not find the forward graph, so mark the ops in root graph [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.244.384 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:450] InitKernelInfoStore] Start init hccl kernel info store. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.442 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(1) [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.244.468 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:467] InitKernelInfoStore] Get builder ops_kernel_info_hccl [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.504 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.594 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 2-6 and group name is 2-511848487187618470 [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.244.597 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:489] InitKernelInfoStore] Init hccl kernel info store success. [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.244.656 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-511848487187618470 [const vector]{2, 6}, async: 0, submit_now: 0 [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.244.625 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:667] InitHcclExec] Start init hccl exec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.244.745 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.015 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.244.791 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-511848487187618470 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.244.818 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.048 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.244.850 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.221 msec. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.878 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-511848487187618470 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.244.933 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 0-2 and group name is 2-5208665662337742843 [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.244.960 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5208665662337742843 [const vector]{0, 2}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.244.993 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.003 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.245.019 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-5208665662337742843 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.245.042 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.028 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.245.065 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.104 msec. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.084 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-5208665662337742843 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.129 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 2-3 and group name is 2-3358271254418797552 [WARNING] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.245.157 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-3358271254418797552 [const vector]{2, 3}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.245.187 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.002 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.245.220 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-3358271254418797552 [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.245.243 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.032 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.245.266 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.108 msec. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.287 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-3358271254418797552 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.346 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.467 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op0, op=StridedSlice [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.623 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.656 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op0, op=StridedSlice [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.722 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.752 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op1, op=StridedSlice [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.822 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.846 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.865 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(2) [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.245.932 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.072 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.171 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op1, op=StridedSlice [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.255 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.287 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op2, op=StridedSlice [INFO] COMMON(187789,ffffaa419c10,python):2025-02-07-15:58:04.246.324 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:241] InitializeAcl] Call aclInit successfully [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.356 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.392 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op3, op=StridedSlice [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.246.400 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:248] SetAclOpPrecisionMode] Set aclop PRECISION_MODE: allow_fp32_to_fp16 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.457 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.479 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.563 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_6{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 1} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.612 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_8{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_6, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.669 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_9{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_10, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.246.692 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.701 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_11{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 0} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.746 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_12{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_11, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.790 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_13{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_14, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.834 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/Mul-op0->Default/_VirtualOutput-op0(1) [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.246.886 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.247.014 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 7, operator_vector: AllGather, AllGather, Split, Concat, AllGather, Split, Concat [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.247.125 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=AllGather [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.247.449 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:272] Initialize] End initializing device context. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.247.525 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.cc:404] LoadCollectiveCommLib] Loading MACCL collective library successfully. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.247.559 [mindspore/ccsrc/distributed/collective/collective_manager.cc:581] InitDeviceCommLib] Start initializing communication library on device side... [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.247.616 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:82] OpenTsd] Device id = 4, rank size = 8. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.247.780 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel _npu_log begins the construction process witch capacity 128 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.248.148 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_7692796245619514736AllGather_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.248.195 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op0, op=Split [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.248.285 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_12015561575443432111Split_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.248.348 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op0, op=Concat [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.377 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:673] InitHcclExec] Hcom DynamicKernel Initialize success [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.422 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:679] InitHcclExec] InitHcclExec success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.248.423 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8051664706019937323Concat_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.248.455 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op0, op=AllGather [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.443 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:233] InitHccl] Init hccl adapter success. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.493 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:87] Initialize] Successfully initialize HCCL. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.519 [mindspore/ccsrc/distributed/collective/collective_manager.cc:588] InitDeviceCommLib] Communication library on device side is successfully initialized. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.581 [mindspore/ccsrc/distributed/collective/collective_manager.cc:210] Initialize] [PROF]InitDeviceBackend costs 4811.86 msec. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.615 [mindspore/ccsrc/distributed/collective/collective_manager.cc:810] IsAsyncInitGlobalComm] Async initialize global comm: 1. async_conf: 1, is_graph: 1, use_rank_table: 0, simulation: 0, use_mpi: 0, is_ascend: 1 [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.721 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.792 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.028 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.835 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group hccl_world_group [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.860 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.044 msec. [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.248.941 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.249.100 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_5140002550487651858AllGather_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.249.146 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op1, op=Split [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.249.204 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8691182465882856301Split_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.249.276 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op1, op=Concat [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.249.317 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_6614310911506831424Concat_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.249.345 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op1, op=AllGather [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.249.994 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_1898494724763908338AllGather_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.250.035 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.250.081 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_15{[0]: ValueNode PrimFunc_Mul, [1]: CNode_13, [2]: CNode_9} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.250.115 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_16{[0]: ValueNode AllGather, [1]: CNode_15} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.250.146 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_17{[0]: ValueNode AllGather, [1]: CNode_18} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.250.496 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:918] FindCommonMirrorGroup] The common mirror group is:[const vector]{} [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.250.547 [mindspore/ccsrc/frontend/parallel/parallel_postprocessor.cc:352] HandleGlobalNormScale] Start to process the global norm [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.250.610 [mindspore/ccsrc/frontend/parallel/step_parallel.cc:171] StepParallel] Now leaving step parallel, used time: 8381 us [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.639 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.669 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.726 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.763 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_comm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.817 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_comm.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.841 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_fusion [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.889 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_fusion.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.912 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.matmul_add_comm_reduction [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.963 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.250.985 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.005 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.025 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_shard_identity [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.044 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_shard_identity [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.116 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.137 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_dataset [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.155 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_dataset [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.258 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_dataset.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.284 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.get_grad_eliminate_ [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.304 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_get_grad_eliminate_ [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.373 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.395 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_output [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.414 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_output [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.251.404 [mindspore/ccsrc/distributed/collective/collective_manager.cc:869] SubmitCreateDeviceCommTask] Launch init comm thread. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.251.440 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for hccl_world_group. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.251.471 [mindspore/ccsrc/distributed/collective/collective_manager.cc:224] Initialize] [PROF]CreateGlobalCommunicationGroup costs 2.745 msec. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.251.489 [mindspore/ccsrc/distributed/collective/collective_manager.cc:227] Initialize] End initializing collective communication for backend: Ascend [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.496 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_output.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.521 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_forward [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.251.531 [mindspore/ccsrc/distributed/init.cc:56] Initialize] [PROF]distributed_collective_init costs 4815.46 msec. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.251.525 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: hccl_world_group [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.570 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_forward.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.251.566 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:298] RecordInitStatus] Status record: system init. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.251.590 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for hccl_world_group [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.594 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_recompute_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.630 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.652 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.671 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.690 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.780 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.801 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.251.800 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.142 msec. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.822 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.before_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.841 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_before_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.925 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.before_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.946 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.251.990 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.252.014 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel_renormalize [INFO] ANALYZER(187764,ffff97badc10,python):2025-02-07-15:58:04.252.153 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.252.327 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group hccl_world_group [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.252.361 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.515 msec. [WARNING] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.252.380 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.252.401 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187775,fffeae7fc0f0,python):2025-02-07-15:58:04.252.765 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.252.844 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.252.878 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.253.271 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.253.301 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.253.508 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.253.536 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.253.597 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187764,ffff97badc10,python):2025-02-07-15:58:04.253.652 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 1___main___Net_construct_5 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xcace510, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xcace510, value: ValueAny), Parent: } [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.423 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel_renormalize.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.462 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.update_top_fg [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.488 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.update_top_fg.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.509 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cast_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.528 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_cast_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.602 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cast_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.624 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_fg_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.671 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_fg_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.693 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation_after_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.750 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.771 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp_send_recv_attached [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.254.813 [mindspore/ccsrc/frontend/parallel/pass/flash_sp.cc:2977] FlashSPSendRecvNodeAttach] No RA/FlashSP Send/Recv grad is found to be attached. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.836 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp_send_recv_attached.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.859 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.receive_attached [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.883 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.receive_attached.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.905 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.after_resolve [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.254.924 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_after_resolve [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.000 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.after_resolve.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.024 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_after_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.043 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_after_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.138 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_after_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.160 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.special_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.176 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_special_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.special_op_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.277 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.renormalize [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.297 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.renormalize.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.318 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.add_forward_monad_depend [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.342 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.361 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.383 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.402 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.464 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.487 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cse [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.691 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cse.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.721 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_3 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.741 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_3 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.760 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.825 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.844 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.905 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.923 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.980 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.255.998 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.055 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.072 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.126 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.151 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.208 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.225 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.280 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.297 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.352 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.373 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_3.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.398 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.expand_dump_flag [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.419 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.439 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.457 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.519 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.539 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.557 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.619 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.651 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.256.674 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_1 [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:58:04.257.286 [mindspore/_extends/parse/namespace.py:132] 'Net' object has no attribute or method: '__is_tensors_queue__', so will return None. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.050 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_1.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.079 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.100 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.159 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.183 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.230 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.262 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.306 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.330 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.373 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.395 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parameter_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.418 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.438 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.458 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.477 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.531 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.549 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.603 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.620 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.673 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.691 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.744 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.762 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.815 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.833 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.895 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.913 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.967 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.258.992 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.050 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.069 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.126 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.143 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.203 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.221 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.276 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.293 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.364 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.382 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.439 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.457 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.515 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.532 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.588 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.606 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.663 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.681 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.737 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.759 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_2.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.788 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.accelerated_algorithm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.808 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_accelerated_algorithm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.869 [mindspore/ccsrc/frontend/optimizer/opt.cc:232] ApplyIRToSubstitutions] There may be a problem. Substitution: opt_a.r2.accelerated_algorithm.less_batch_normalization [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.937 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.accelerated_algorithm.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.259.962 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.259.982 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.260.004 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.025 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.046 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_shard_fg_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.078 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.100 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard_inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.118 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_shard_inline [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.260.118 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.185 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard_inline.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.207 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_parallel [INFO] ANALYZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.260.227 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: __main___Net_construct_2 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x18fb69c0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x18fb69c0, value: ValueAny), Parent: } [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.260.262 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 35 us [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.287 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_parallel.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.307 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.360 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.413 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.438 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_comm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.490 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_comm.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.514 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_fusion [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.561 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_fusion.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.592 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.matmul_add_comm_reduction [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.653 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.689 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.709 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.732 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_shard_identity [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.752 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_shard_identity [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.260.795 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end type_inference action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.822 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_shard_identity.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.260.834 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.845 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_dataset [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.867 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_dataset [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.260.880 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.934 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_dataset.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.958 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.get_grad_eliminate_ [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.260.976 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_get_grad_eliminate_ [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.041 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.063 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_output [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.083 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_output [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.130 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.148 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_output.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.169 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_forward [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.151 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.215 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_forward.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.238 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_recompute_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.262 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_recompute_pass.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.243 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start graph_reusing action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.285 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_handle_not_recompute_node_pass [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.303 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end graph_reusing action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.305 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_cell_reuse_handle_not_recompute_node_pass [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.320 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.324 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.343 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start inline action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.420 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.414 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.437 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.459 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.before_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.478 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_before_grad [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.516 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.261.542 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass a1a2 start ... [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.558 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.before_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.582 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.625 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.648 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel_renormalize [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.668 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel_renormalize.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.688 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.update_top_fg [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.707 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.update_top_fg.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.726 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cast_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.744 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_cast_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.803 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cast_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.824 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_fg_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.867 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_fg_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.886 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation_after_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.935 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.957 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp_send_recv_attached [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.261.978 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.000 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.receive_attached [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.022 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.receive_attached.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.043 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.after_resolve [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.069 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_after_resolve [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.133 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.after_resolve.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.154 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_after_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.172 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_after_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.256 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_after_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.278 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.special_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.296 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_special_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.354 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.special_op_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.375 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.renormalize [INFO] ANALYZER(187764,ffff97badc10,python):2025-02-07-15:58:04.262.536 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.262.926 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.262.957 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.263.173 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.263.203 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.263.380 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.263.349 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:667] GenerateArgumentsKey] Generate a new compile key for new args, key: 0 [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:04.263.407 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.263.467 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.263.433 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:675] GenerateArgumentsKey] New cached args: Arg[0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1220f510, value: ValueAny) Arg[1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1220f510, value: ValueAny) [INFO] ANALYZER(187764,ffff97badc10,python):2025-02-07-15:58:04.263.521 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 3_1___main___Net_construct_19 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xcace510, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xcace510, value: ValueAny), Parent: } [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.264.194 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1263] CompileInner] Start compiling, phase: train.1738915084260222464.281470854288784.0.. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.216 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.renormalize.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.add_forward_monad_depend [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.264.236 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:272] LoadPassesConfig] AUTO_PASSES_OPTIMIZE_PATH: [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.285 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.315 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.338 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.359 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_eliminator [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.264.315 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] Start compiling 'Net.construct' and it will take a while. Please wait... [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.420 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.445 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cse [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.605 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cse.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.648 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_3 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.672 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_3 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.692 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.753 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.772 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.824 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.843 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.896 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.913 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.965 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.264.982 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.036 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.053 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.103 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.120 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.169 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.187 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.248 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.270 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_3.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.295 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.expand_dump_flag [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.318 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.expand_dump_flag.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.338 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.357 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.419 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.switch_simplify.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.440 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.458 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.517 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.loop_unroll.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.538 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.265.556 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.266.766 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_1.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.266.792 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.266.812 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.266.871 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.recompute_prepare.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.266.892 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_depend_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.266.940 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.266.961 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_assign_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.004 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.025 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_loads_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.068 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.089 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parameter_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.121 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parameter_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.142 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.159 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.178 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.231 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.249 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.302 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.319 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.371 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.388 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.440 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.456 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.508 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.525 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.587 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.604 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.657 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.674 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.727 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.744 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] PARSER(187742,ffffa187dc10,python):2025-02-07-15:58:04.267.758 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {cast_ : Prim[Cast]} [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.796 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.813 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.879 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.896 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.949 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.267.966 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] PARSER(187742,ffffa187dc10,python):2025-02-07-15:58:04.268.019 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:151] WriteVariable] fill_3 update var `value` with node @fill_3:value{[0]: CNode_4, [1]: param_value, [2]: param_type} [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.030 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.049 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.100 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.117 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.169 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.186 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.237 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.254 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.305 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.322 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] PARSER(187742,ffffa187dc10,python):2025-02-07-15:58:04.268.336 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {fillv2_ : Prim[FillV2]} [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.373 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.393 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_2.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.415 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.accelerated_algorithm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.433 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_accelerated_algorithm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.499 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.520 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.268.539 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.268.556 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.587 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.607 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_shard_fg_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.655 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.689 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard_inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.712 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_shard_inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.773 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard_inline.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.793 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_parallel [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.268.845 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 32 us [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.868 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_parallel.changed [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.891 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.918 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.939 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.960 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.268.980 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_comm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.026 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_comm.unchanged [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.268.994 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_dump begins the construction process witch capacity 128 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.051 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_fusion [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.097 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_fusion.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.119 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.matmul_add_comm_reduction [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.167 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.190 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.210 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.232 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_shard_identity [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.249 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_shard_identity [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.320 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.342 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_dataset [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.362 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_dataset [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.422 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_dataset.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.443 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.get_grad_eliminate_ [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.461 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_get_grad_eliminate_ [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.521 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.543 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_output [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.560 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_output [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.621 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_output.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.643 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_forward [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.686 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_forward.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.709 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_recompute_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.732 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.752 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.771 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.790 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.868 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.889 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.909 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.before_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.269.926 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_before_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.006 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.before_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.027 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.077 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.099 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel_renormalize [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.119 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel_renormalize.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.139 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.update_top_fg [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.159 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.update_top_fg.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.178 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cast_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.196 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_cast_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.256 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cast_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.276 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_fg_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.319 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_fg_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.341 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation_after_expand [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.390 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.410 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp_send_recv_attached [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.431 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.451 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.receive_attached [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.472 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.receive_attached.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.492 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.after_resolve [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.509 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_after_resolve [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.572 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.after_resolve.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.594 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_after_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.612 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_after_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.695 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_after_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.717 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.special_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.741 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_special_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.803 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.special_op_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.824 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.renormalize [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.844 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.renormalize.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.863 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.add_forward_monad_depend [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.884 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.904 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_grad [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.925 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_grad.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.944 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.270.997 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.018 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cse [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.162 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cse.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.187 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_3 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.206 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_3 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.225 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.282 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.300 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.353 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.371 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.424 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.441 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.492 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.509 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.516 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.expand_dump_flag [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.566 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.570 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.589 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.612 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.634 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.642 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.660 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.679 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.699 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.loop_unroll [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.714 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.712 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.729 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.746 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.765 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.271.779 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.782 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.271.804 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_3.unchanged [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.271.828 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_a end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.271.853 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute_after_opt_a start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.271.907 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute_after_opt_a end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.271.933 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_cell_reuse_recomputed_activation start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.271.953 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_cell_reuse_recomputed_activation end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.271.975 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_after_opt_a start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.081 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_1.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.111 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.129 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.154 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.183 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.211 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.228 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.248 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.264 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.283 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.299 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parameter_eliminate [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.272.300 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_after_opt_a end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.321 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.339 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_2 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.272.333 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass convert_after_rewriter start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.355 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r1_a_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.370 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.272.381 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass convert_after_rewriter end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.393 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.412 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.272.407 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass order_py_execute_after_rewriter start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.433 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.447 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.272.447 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass order_py_execute_after_rewriter end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.466 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.272.470 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_b start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.479 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.499 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.496 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.515 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.516 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_b_r1_b_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.533 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.547 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.535 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: zero_like_fill_zero [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.565 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.579 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.604 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.608 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: zero_like_fill_zero, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.646 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: list_to_tuple_eliminator_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.618 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.712 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.739 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.733 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: list_to_tuple_eliminator_, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.759 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.752 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_to_list_eliminator_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.775 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.804 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.819 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.812 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_to_list_eliminator_, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.840 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.833 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.855 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.872 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.886 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.904 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.903 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.918 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.923 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_const_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.935 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.949 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.966 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.979 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.272.984 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_const_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.272.996 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.010 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.002 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.028 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.056 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_2.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.056 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.075 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parallel_inline_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.075 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_set_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.090 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_parallel_inline_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.114 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.136 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.expand_dump_flag [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.139 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.154 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.159 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_depend_reorder [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.170 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.185 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.207 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.223 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.221 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_depend_reorder, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.238 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.239 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_convert_item_index_to_positive [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.260 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.278 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.291 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.306 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_convert_item_index_to_positive, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.326 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: make_slice_get_slice_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.378 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: make_slice_get_slice_eliminator, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.396 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.429 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_1.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.448 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.463 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_recompute_prepare [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.455 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.485 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reset_defer_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.486 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.506 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.525 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.540 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reset_defer_inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.559 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.540 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.615 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.632 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.621 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.685 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.702 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_pure_node_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.691 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.754 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_pure_node_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.772 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: load_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.761 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.824 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: load_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.842 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: stopgrad_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.830 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parameter_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.894 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: stopgrad_eliminater, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.911 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: special_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.900 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.968 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.964 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: special_op_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.273.984 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r2_a_2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.273.980 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.000 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.024 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.038 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.032 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.057 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.051 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_add_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.073 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.091 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.104 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.104 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_add_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.121 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.121 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_set_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.136 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.153 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.167 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.173 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_set_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.184 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.199 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.193 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_depend_swap [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.216 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.230 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.246 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.259 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.253 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_depend_swap, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.273 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_add_const_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.278 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.299 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.323 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.324 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_add_const_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.338 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.342 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: value_based_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.356 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.370 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.386 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.399 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.396 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: value_based_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.418 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.413 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: parallel_virtual_node [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.434 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.450 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.463 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.466 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: parallel_virtual_node, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.481 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.484 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: const_output_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.495 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.512 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.524 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.541 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.536 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: const_output_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.559 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_2.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.557 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_1.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.577 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parallel_inline_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.581 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.592 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_parallel_inline_pass [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.600 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_b_r1_b_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.615 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parallel_inline_pass.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.635 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass a1a2 end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.655 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end inline action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.663 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_2.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.677 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.685 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_depend_eliminate [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.702 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.727 [mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.cc:223] ForwardHasDynamicShape] Can not find the forward graph, so find the ops in root graph [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.731 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.755 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_assign_eliminate [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.790 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.807 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.798 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.822 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_loads_eliminate [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.832 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pre_auto_parallel action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.866 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_loads_eliminate.unchanged [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.876 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 22 us [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.896 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pre_auto_parallel action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.888 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.renormalize [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.912 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.910 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.renormalize.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.274.936 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start insert-virtual-dataset action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.274.931 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.cse [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.275.085 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.cse.unchanged [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.275.114 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_b end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.275.140 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass optimize_parallel_all_gather_comm start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.275.191 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass optimize_parallel_all_gather_comm end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.275.215 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_param_gather start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.275.235 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_param_gather end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.275.259 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cconv start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.275.305 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cconv end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.275.332 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass loop_unroll start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.275.927 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end insert-virtual-dataset action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.275.968 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.006 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol-second action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.026 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol-second action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.041 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.063 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start dataset_repeat_opt action. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.127 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start loop_unroll_optimizer.r1.loop_unroll [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.163 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, loop_unroll_optimizer_r1_loop_unroll [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.182 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.207 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2248] GetCommInfo] Get global rank from communication model, the global rank is 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.242 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End loop_unroll_optimizer.r1.loop_unroll.unchanged [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.271 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 0, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.276.275 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass loop_unroll end. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.287 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.276.302 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_after_cconv start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.302 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.329 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 0, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.326 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.c_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.345 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_after_cconv_r1_c_1 [WARNING] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.359 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:305] BroadcastDataset] For now on, only dataset sink mode support dataset reader optimizer. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.376 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end dataset_repeat_opt action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.390 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.416 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_split action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.440 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:239] PipelineSplit] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.457 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:256] PipelineSplit] The parameter 'stage_num' is: 1. No need Pipeline split. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.492 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_split action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.507 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.531 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start optimize action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.584 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute start ... [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.579 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.c_1.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.601 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.parameter_eliminate [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.615 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute end. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.624 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.parameter_eliminate.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.653 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.656 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_depend_eliminate [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.706 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.710 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.732 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_assign_eliminate [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.763 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_a start ... [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.779 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.788 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.expand_dump_flag [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.812 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.802 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.833 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.851 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_switch_simplify [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.886 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.276.882 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.cse [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.906 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.loop_unroll [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.921 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_loop_unroll [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.957 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.975 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.276.989 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.277.029 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.cse.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.277.055 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.renormalize [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.277.077 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.renormalize.unchanged [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.100 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_after_cconv end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.124 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_dup_value start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.282 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_1.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.305 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.322 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.349 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.367 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.392 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.408 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_assign_eliminate [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.408 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_dup_value end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.429 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.446 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_loads_eliminate [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.439 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass tuple_transform start ... [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.277.464 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.d_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.473 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.277.483 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_trans_graph_r1_d_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.491 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parameter_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.511 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.527 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.541 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.556 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.579 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.594 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.615 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.629 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.649 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.674 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.695 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.709 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.730 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.745 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.277.760 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.d_1.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.774 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:04.277.696 [mindspore/ccsrc/backend/graph_compiler/transform.cc:575] CreateBackend] CreateBackend is: ge [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.789 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.277.787 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.renormalize [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.811 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.277.810 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.renormalize.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.826 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.832 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass tuple_transform end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.847 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.862 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.855 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass partial_unused_args_eliminate start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.878 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass partial_unused_args_eliminate end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.884 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.901 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.900 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_cache_embedding start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.929 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.943 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.277.955 [mindspore/ccsrc/frontend/parallel/cache_embedding/cache_embedding.cc:706] AddCacheEmbedding] Parameters are all not cache enable. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.964 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.277.980 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.277.979 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_cache_embedding end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.003 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_recomputation start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.010 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.027 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.048 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:58:04.278.033 [mindspore/ccsrc/debug/debugger/debugger.cc:80] Init] Debugger got device_id: 3 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.063 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.093 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:58:04.278.066 [mindspore/ccsrc/debug/debugger/debugger.cc:82] Init] Debugger got device_target: Ascend [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.108 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.129 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.142 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.160 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.174 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.194 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.186 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_recomputation end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.214 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_2.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.232 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.accelerated_algorithm [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.215 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cse_after_recomputation start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.247 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_accelerated_algorithm [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.278.251 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_recompute.r1.cse [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.275 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.293 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.308 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.324 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.340 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.278.346 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_recompute.r1.cse.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.357 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_shard_fg_expand [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.378 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_shard_fg_expand.unchanged [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.374 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cse_after_recomputation end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.397 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard_inline [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.398 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass environ_conv start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.411 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_shard_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.435 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard_inline.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.452 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_parallel [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.451 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass environ_conv end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.477 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass swap_dp_allreduce_reducescatter start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.482 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 14 us [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.502 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_parallel.changed [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.519 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass swap_dp_allreduce_reducescatter end. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.528 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.545 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass bias_add_comm_swap start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.561 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.566 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass bias_add_comm_swap end. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.592 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 0, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.588 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_micro_interleaved_index start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.608 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.606 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_micro_interleaved_index end. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.624 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.628 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_fine_grained_interleaved_index start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.638 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 0, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.648 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_fine_grained_interleaved_index end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.670 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass merge_cast_opt start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.688 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass merge_cast_opt end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.709 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_recompute_activation start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.739 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3587] MarkForwardCNode] Can not find the forward graph, so mark the ops in root graph [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.753 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_recompute_activation end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.776 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass micro_interleaved_order_control start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.795 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass micro_interleaved_order_control end. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.808 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.815 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass assign_add_opt start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.826 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3182] IsInsertVirtualOutput] The current stage is: 0 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.278.868 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=_VirtualOutput [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.278.895 [mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.cc:466] AssignAddOpt] Merge multi matmul assign add begin and concat eliminate enable flag is:0 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.278.936 [mindspore/ccsrc/frontend/parallel/pass/pass_utils.cc:122] ExtractBackwardMatMul] backward_matmul_dx_dw_map size:0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.278.980 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass assign_add_opt end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.003 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass ForceFp32Comm start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.031 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass ForceFp32Comm end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.054 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_cast_before_assign_add start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.091 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_cast_before_assign_add end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.114 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass full_micro_interleaved_order_control start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.133 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass full_micro_interleaved_order_control end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.155 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass reorder_send_recv_between_fp_bp start ... [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:04.279.097 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:739] Initialize] The actor thread number: 5, the kernel thread number: 25 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.173 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass reorder_send_recv_between_fp_bp end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.194 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass comm_op_add_attrs start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.253 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass comm_op_add_attrs end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.277 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_comm_op_reuse_tag start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.348 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_comm_op_reuse_tag end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.373 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_split_concat_branches start ... [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:58:04.279.368 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.392 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_split_concat_branches end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.413 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_parallel_branches start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.430 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_parallel_branches end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.452 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_in_pipeline start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.499 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_in_pipeline end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.522 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_grad_in_pipeline start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.542 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_grad_in_pipeline end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.564 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass control_data_broadcast_order start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.582 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass control_data_broadcast_order end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.605 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass grouped_pairwise_exchange_alltoall start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.631 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass grouped_pairwise_exchange_alltoall end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.654 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass offloading_packed_experts start ... [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.279.689 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:746] SetOffloadingPackedExpert] pass if (parallel::g_device_manager == nullptr) [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.279.708 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:707] CheckUserSettings] To activate the pass, set_auto_parallel_context 'enable_alltoall' should be true [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.279.726 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:751] SetOffloadingPackedExpert] CheckUserSettings_not_pass [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.279.716 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualDatasetInfo0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.744 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass offloading_packed_experts end. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.279.759 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.768 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_and_grad_model_parallel start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.788 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_and_grad_model_parallel end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.810 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_matmul_and_grad_allreduce start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.827 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_matmul_and_grad_allreduce end. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.279.849 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1), (1, 1, 1)) [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.848 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_allgather_and_fa_grad start ... [WARNING] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.279.866 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.279.873 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.885 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_allgather_and_fa_grad end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.909 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_ring_attention start ... [INFO] SYMBOLIC_SHAPE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.279.935 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.960 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_ring_attention end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.279.985 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_flash_sp start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.279.997 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualDatasetInfo00: The loss divisor is 1, no need to create virtual div op. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.029 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_flash_sp end. [INFO] SYMBOLIC_SHAPE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.279.969 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.280.053 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.055 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass begin_end_overlap_inline start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.073 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass begin_end_overlap_inline end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.094 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_matmul_comm_elemetwise start ... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.114 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_matmul_comm_elemetwise end. [INFO] SYMBOLIC_SHAPE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.280.072 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.134 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_layernorm_comm start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.162 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator MulInfo1 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.153 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_layernorm_comm end. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.184 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.184 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass handle_group_info start ... [INFO] SYMBOLIC_SHAPE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.280.148 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.210 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass handle_group_info end. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.227 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((2, 2, 2), (2, 2, 2)) [INFO] SYMBOLIC_SHAPE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.280.229 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.234 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass symbol_engine_optimizer start ... [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.246 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.257 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.build [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.294 [mindspore/ccsrc/frontend/optimizer/irpass/symbol_engine_optimizer.cc:39] operator()] There is no dynamic shape node, the SymbolEngineBuilder is disabled. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.303 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.280.273 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1682] Run] Pipeline run [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.318 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.build.unchanged [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.333 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 0 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.342 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_shapecalc [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.353 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.368 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 1 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.363 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_shapecalc [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.383 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:503] InferMirrorOps] MulInfo11: No need to insert mirror ops [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.280.358 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start bootstrap action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.404 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2631] InferAsLossDivisor] MulInfo11: the dev matrix shape is [2, 2, 2], the output tensor map is [2, 1, 0], loss divisor is 1 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.419 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] MulInfo11: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.433 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1334] Init] MulInfo11 : Init success. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.426 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_shapecalc.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.451 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_not_effective [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.471 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_not_effective [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.516 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualOutputInfo2 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.534 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.549 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_not_effective.unchanged [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.566 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1)) [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.573 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.opt_reshape [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.585 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.592 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_opt_reshape [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.660 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.opt_reshape.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.711 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.fold_const_symbol [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.728 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_fold_const_symbol [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.621 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualOutputInfo22: The loss divisor is 1, no need to create virtual div op. [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.802 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.fold_const_symbol.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.825 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.renormalize [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.280.847 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.renormalize.unchanged [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.858 [mindspore/ccsrc/frontend/parallel/parameter_manager.cc:1445] HandleCameAndAdaFactorOpt] Adafactor or Came optimizer process start [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.870 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass symbol_engine_optimizer end. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.895 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end optimize action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.915 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.954 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_parallel_scheduler action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.280.976 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1150] MergeEntireShapeForDynamic] Into MergeEntireShapeForDynamic [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.976 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_parallel_scheduler action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.280.993 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.007 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1163] MergeEntireShapeForDynamic] Can not find the forward graph, so mark the ops in root graph [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.017 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad_reorder action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.091 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(1) [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.103 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad_reorder action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.125 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.153 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start get_jit_bprop_graph action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.169 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.172 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end get_jit_bprop_graph action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.192 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.216 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.234 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.250 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.281.274 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start eliminate_special_op_node action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.285 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 0-4 and group name is 2-16453000547691086251 [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.332 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-16453000547691086251 [const vector]{0, 4}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.418 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.019 msec. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.472 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-16453000547691086251 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.498 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.055 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.531 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.206 msec. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.546 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-16453000547691086251 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.610 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 0-2 and group name is 2-5208665662337742843 [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.637 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5208665662337742843 [const vector]{0, 2}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.669 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.004 msec. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.695 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-5208665662337742843 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.715 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.027 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.735 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.097 msec. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.749 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-5208665662337742843 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.797 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 0-1 and group name is 2-5004544844489628105 [WARNING] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.821 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5004544844489628105 [const vector]{0, 1}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.846 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.002 msec. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.281.834 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end bootstrap action. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.871 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-5004544844489628105 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.281.876 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 9 [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.892 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.027 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.921 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.098 msec. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.281.936 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-5004544844489628105 [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.281.924 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.ad_related_special_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.281.958 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_ad_related_special_op_eliminate [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.281.935 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start type_inference action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.000 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.282.020 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.ad_related_special_op_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.282.049 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.mutable_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.282.069 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_mutable_op_eliminate [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.282.121 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.mutable_op_eliminate.unchanged [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.282.143 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.convert_tensor_op_eliminate [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.152 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op0, op=StridedSlice [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.282.160 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_convert_tensor_op_eliminate [INFO] ANALYZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.282.179 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] OPTIMIZER(187764,ffff97badc10,python):2025-02-07-15:58:04.282.221 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.convert_tensor_op_eliminate.unchanged [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.246 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end eliminate_special_op_node action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.267 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.295 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start distribtued_split action. [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.282.324 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:378] GenerateStrategy] Current parallel mode is semi_auto_parallel [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.329 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.282.342 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:390] GenerateStrategy] Generated distributed strategy is 1 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.364 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op0, op=StridedSlice [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.437 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.465 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op1, op=StridedSlice [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:04.282.478 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:1277] Run] All nodes are on this process so there's no need to build and split distributed graph. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.501 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end distribtued_split action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.519 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.541 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.546 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start validate action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.564 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.580 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(2) [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.641 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.651 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end validate action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.672 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PROFILER(187764,ffff97badc10,python):2025-02-07-15:58:04.282.750 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc:49] IsProfilingParallelStrategyEnabled] Profiling parallel strategy is disabled. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.787 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.823 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start task_emit action. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.888 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op1, op=StridedSlice [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.282.989 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:04.282.997 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1679] SetRunMode] Run graph mode with kernel by kernel by configuration. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.020 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op2, op=StridedSlice [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.094 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.121 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op3, op=StridedSlice [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:04.283.153 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1057] CompileGraphs] Status record: start compile function graph: 4_3_1___main___Net_construct_20 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.196 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.216 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.302 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_6{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 1} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.351 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_8{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_6, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.396 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_9{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_10, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.426 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_11{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 0} [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:04.283.435 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.465 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_12{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_11, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.507 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_13{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_14, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.551 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/Mul-op0->Default/_VirtualOutput-op0(1) [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.601 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.740 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 7, operator_vector: AllGather, AllGather, Split, Concat, AllGather, Split, Concat [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:04.283.848 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: 4_3_1___main___Net_construct_20 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.283.879 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=AllGather [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:04.284.012 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-511848487187618470, the max communication size is 1 MB. [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:04.284.038 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-511848487187618470, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:04.284.067 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-511848487187618470. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.284.951 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_7692796245619514736AllGather_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.285.010 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op0, op=Split [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.285.132 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_12015561575443432111Split_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.285.203 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op0, op=Concat [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.285.273 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8051664706019937323Concat_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.285.303 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op0, op=AllGather [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.285.968 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_5140002550487651858AllGather_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.286.023 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op1, op=Split [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.286.088 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8691182465882856301Split_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.286.163 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op1, op=Concat [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.286.204 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_6614310911506831424Concat_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.286.230 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op1, op=AllGather [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.286.919 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_1898494724763908338AllGather_ success [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.286.964 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.010 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_15{[0]: ValueNode PrimFunc_Mul, [1]: CNode_13, [2]: CNode_9} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.041 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_16{[0]: ValueNode AllGather, [1]: CNode_15} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.068 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_17{[0]: ValueNode AllGather, [1]: CNode_18} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.437 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:918] FindCommonMirrorGroup] The common mirror group is:[const vector]{} [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.493 [mindspore/ccsrc/frontend/parallel/parallel_postprocessor.cc:352] HandleGlobalNormScale] Start to process the global norm [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.557 [mindspore/ccsrc/frontend/parallel/step_parallel.cc:171] StepParallel] Now leaving step parallel, used time: 9004 us [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.589 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.616 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.677 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.699 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_comm [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.749 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_comm.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.771 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_fusion [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.816 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_fusion.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.837 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.matmul_add_comm_reduction [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.888 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.909 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.927 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.943 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_shard_identity [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.287.960 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_shard_identity [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.033 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.053 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_dataset [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.069 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_dataset [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.179 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_dataset.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.204 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.get_grad_eliminate_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.220 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_get_grad_eliminate_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.285 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.313 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_output [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.329 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_output [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.413 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_output.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.434 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_forward [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.481 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_forward.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.499 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_recompute_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.519 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.536 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.551 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.566 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.679 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.701 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.719 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.before_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.733 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_before_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.819 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.before_grad.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.836 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.880 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.288.900 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel_renormalize [INFO] ANALYZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.289.067 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.289.807 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.289.844 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.290.282 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.290.315 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.290.543 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.290.586 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.290.657 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.290.717 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 1___main___Net_construct_5 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x18fb69c0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x18fb69c0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.538 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel_renormalize.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.584 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.update_top_fg [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.609 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.update_top_fg.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.628 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cast_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.644 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_cast_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.718 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cast_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.737 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_fg_expand [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.781 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_fg_expand.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.798 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation_after_expand [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.852 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.873 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp_send_recv_attached [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.913 [mindspore/ccsrc/frontend/parallel/pass/flash_sp.cc:2977] FlashSPSendRecvNodeAttach] No RA/FlashSP Send/Recv grad is found to be attached. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.933 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp_send_recv_attached.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.952 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.receive_attached [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.973 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.receive_attached.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.291.990 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.after_resolve [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.005 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_after_resolve [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.088 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.after_resolve.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.108 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_after_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.123 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_after_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.213 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_after_grad.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.231 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.special_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.245 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_special_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.306 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.special_op_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.323 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.renormalize [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.339 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.renormalize.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.355 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.add_forward_monad_depend [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.376 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.392 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.412 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_grad.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.428 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.488 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.506 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cse [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.792 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cse.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.820 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_3 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.838 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_3 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.854 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.914 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.930 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.984 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.292.997 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.292.968 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.062 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.078 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.129 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.143 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.195 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.209 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.262 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.275 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.328 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.342 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.394 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.412 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_3.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.435 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.expand_dump_flag [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.455 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.472 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.486 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.547 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.564 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.loop_unroll [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.578 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_loop_unroll [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.637 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.654 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.293.669 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.035 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_1.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.068 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.087 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.147 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.166 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.215 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.234 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.275 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.293 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.332 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.351 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parameter_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.371 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.387 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.402 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.417 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.470 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.485 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.537 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.551 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.600 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.614 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.662 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.676 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.733 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.747 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.807 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.824 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.873 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.887 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.937 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.295.950 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.000 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.014 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.070 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.084 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.133 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.147 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.208 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.223 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.272 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.286 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.335 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.349 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.397 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.418 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.467 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.481 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.529 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.548 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_2.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.566 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.accelerated_algorithm [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.582 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_accelerated_algorithm [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.694 [mindspore/ccsrc/frontend/optimizer/opt.cc:232] ApplyIRToSubstitutions] There may be a problem. Substitution: opt_a.r2.accelerated_algorithm.less_batch_normalization [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:58:04.296.211 [mindspore/_extends/parse/namespace.py:132] 'Net' object has no attribute or method: '__is_tensors_queue__', so will return None. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.785 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.accelerated_algorithm.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.808 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.825 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.841 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.857 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.874 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_shard_fg_expand [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.901 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.918 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.933 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_shard_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.296.992 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard_inline.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.009 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_parallel [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.060 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 34 us [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.078 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_parallel.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.096 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.123 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.149 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.203 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.223 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_comm [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.269 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_comm.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.286 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_fusion [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.328 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_fusion.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.345 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.matmul_add_comm_reduction [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.386 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.403 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.420 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.436 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_shard_identity [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.450 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_shard_identity [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.509 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.526 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_dataset [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.541 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_dataset [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.600 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_dataset.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.617 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.get_grad_eliminate_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.631 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_get_grad_eliminate_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.688 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.705 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_output [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.720 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_output [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.778 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_output.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.795 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_forward [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.844 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_forward.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.862 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_recompute_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.882 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.899 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.913 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.297.928 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.006 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.025 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.042 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.before_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.057 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_before_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.134 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.before_grad.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.152 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.193 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.211 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel_renormalize [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.228 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel_renormalize.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.245 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.update_top_fg [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.298.189 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.261 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.update_top_fg.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.278 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cast_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.292 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_cast_eliminate [INFO] ANALYZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.298.327 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: __main___Net_construct_2 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x363fded0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x363fded0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.350 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cast_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.368 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_fg_expand [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.406 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_fg_expand.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.423 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation_after_expand [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.476 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.495 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp_send_recv_attached [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.513 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.528 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.receive_attached [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.546 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.receive_attached.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.561 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.after_resolve [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.575 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_after_resolve [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.637 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.after_resolve.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.654 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_after_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.669 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_after_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.752 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_after_grad.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.769 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.special_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.783 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_special_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.841 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.special_op_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.298.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.renormalize [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.298.837 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end type_inference action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.298.878 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.298.873 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:480] GetGeSessionOptions] Set GE atomic clean policy to 1. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.298.990 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad action. [INFO] ANALYZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.299.027 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.235 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.258 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.357 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start graph_reusing action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.414 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end graph_reusing action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.431 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.457 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start inline action. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.299.450 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.299.488 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.529 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.675 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.299.709 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass a1a2 start ... [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.299.720 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.299.752 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.299.947 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:04.299.977 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.300.042 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.300.098 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 3_1___main___Net_construct_19 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x18fb69c0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x18fb69c0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.300.874 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.renormalize.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.300.925 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.add_forward_monad_depend [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.300.955 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.300.974 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.300.994 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_grad.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.011 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.073 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.093 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cse [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.253 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cse.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.278 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_3 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.295 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_3 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.311 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.370 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.387 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.438 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.453 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.503 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.517 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.566 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.580 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.629 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.654 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.705 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.719 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.768 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.782 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.832 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.852 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_3.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.876 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.expand_dump_flag [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.895 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.expand_dump_flag.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.914 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.928 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.301.988 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.switch_simplify.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.302.007 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.loop_unroll [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.302.023 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_loop_unroll [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.302.082 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.loop_unroll.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.302.100 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.302.115 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_1 [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.302.786 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:55] NewSession] Create new GE session success! [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.302.842 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:214] SetGeSession] Add a new Ge Session success [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.302.909 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:67] GraphRunner] ME run in ONE_DEVICE strategy mode [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.303.091 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:250] SetGraphRunner] Add a new GraphRunner success [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.303.146 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1610] Initialize] Create session and graphrunner successful. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.303.200 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1614] Initialize] Init ge successful, ge reference = 1. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.349 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_1.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.392 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.410 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_recompute_prepare [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.474 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.recompute_prepare.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.494 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_depend_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.554 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.576 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_assign_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.618 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.641 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_loads_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.682 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.701 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parameter_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.724 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parameter_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.742 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.757 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.776 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.834 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.851 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.904 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.919 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.969 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.303.985 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.036 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.052 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.102 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.117 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.182 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.199 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.249 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.277 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.329 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.344 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.397 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.411 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.475 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.490 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.543 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.559 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.625 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.656 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.709 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.725 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.777 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.792 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.842 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.859 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.912 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.927 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.304.979 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.003 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_2.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.037 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.accelerated_algorithm [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.055 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_accelerated_algorithm [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.305.051 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_image_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.131 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.155 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.174 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.191 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.209 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.227 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_shard_fg_expand [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.260 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.279 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.296 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_shard_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.358 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard_inline.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.378 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_parallel [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.441 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 44 us [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.464 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_parallel.changed [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.488 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.522 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.544 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.565 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.583 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_comm [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.636 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_comm.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.656 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_fusion [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.702 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_fusion.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.725 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.matmul_add_comm_reduction [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.789 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.811 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.830 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.847 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_shard_identity [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.864 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_shard_identity [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.305.842 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_scalar_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.931 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.953 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_dataset [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.305.969 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_dataset [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.027 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_dataset.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.046 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.get_grad_eliminate_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.060 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_get_grad_eliminate_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.119 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.137 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_output [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.152 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_output [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.212 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_output.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.232 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_forward [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.276 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_forward.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.299 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_recompute_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.322 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.339 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.354 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.369 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.306.427 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_histogram_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.454 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.487 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.506 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.before_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.522 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_before_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.603 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.before_grad.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.625 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation [INFO] PARSER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.306.554 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {cast_ : Prim[Cast]} [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.668 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.689 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel_renormalize [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.707 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel_renormalize.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.725 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.update_top_fg [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.743 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.update_top_fg.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.760 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cast_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.775 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_cast_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.836 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cast_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.856 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_fg_expand [INFO] PARSER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.306.872 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:151] WriteVariable] fill_3 update var `value` with node @fill_3:value{[0]: CNode_4, [1]: param_value, [2]: param_type} [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.898 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_fg_expand.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.919 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation_after_expand [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.306.935 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:215] InitHccl] Start init hccl adapter. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.968 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.306.989 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp_send_recv_attached [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.009 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.028 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.receive_attached [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.046 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.receive_attached.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.062 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.after_resolve [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.077 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_after_resolve [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.156 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.after_resolve.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.175 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_after_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.190 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_after_grad [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.307.179 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:450] InitKernelInfoStore] Start init hccl kernel info store. [INFO] PARSER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.307.196 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {fillv2_ : Prim[FillV2]} [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.277 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_after_grad.unchanged [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.307.264 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:467] InitKernelInfoStore] Get builder ops_kernel_info_hccl [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.298 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.special_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.315 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_special_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.373 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.special_op_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.391 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.renormalize [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.408 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.renormalize.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.425 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.add_forward_monad_depend [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.307.425 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:489] InitKernelInfoStore] Init hccl kernel info store success. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.447 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.465 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_grad [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.486 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_grad.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.504 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_eliminator [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.307.453 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:667] InitHcclExec] Start init hccl exec. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.566 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.589 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cse [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.761 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cse.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.791 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_3 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.809 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_3 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.826 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.887 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.902 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.954 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.307.980 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.032 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.048 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.097 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.113 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.165 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.181 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.233 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.249 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.301 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.317 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.368 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.390 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_3.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.418 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_a end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.447 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute_after_opt_a start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.509 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute_after_opt_a end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.532 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_cell_reuse_recomputed_activation start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.549 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_cell_reuse_recomputed_activation end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.567 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_after_opt_a start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.933 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_after_opt_a end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.308.983 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass convert_after_rewriter start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.033 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass convert_after_rewriter end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.071 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass order_py_execute_after_rewriter start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.110 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass order_py_execute_after_rewriter end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.132 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_b start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.158 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.176 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_b_r1_b_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.192 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: zero_like_fill_zero [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.255 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: zero_like_fill_zero, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.272 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: list_to_tuple_eliminator_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.324 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: list_to_tuple_eliminator_, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.339 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_to_list_eliminator_ [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.391 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_to_list_eliminator_, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.408 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.478 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.494 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_const_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.554 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_const_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.571 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.621 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.635 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_set_item_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.698 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.713 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_depend_reorder [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.773 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_depend_reorder, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.789 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_convert_item_index_to_positive [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.868 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_convert_item_index_to_positive, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.886 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: make_slice_get_slice_eliminator [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.936 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: make_slice_get_slice_eliminator, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.309.950 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.011 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.028 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reset_defer_inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.081 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reset_defer_inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.096 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.146 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.161 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.212 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.229 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_pure_node_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.278 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_pure_node_eliminater, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.293 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: load_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.343 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: load_eliminater, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.360 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: stopgrad_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.410 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: stopgrad_eliminater, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.425 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: special_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.476 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: special_op_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.491 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.541 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.562 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_add_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.612 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_add_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.627 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_set_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.677 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_set_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.693 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_depend_swap [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.743 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_depend_swap, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.758 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_add_const_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.811 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_add_const_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.827 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: value_based_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.880 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: value_based_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.896 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: parallel_virtual_node [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.948 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: parallel_virtual_node, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.310.963 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: const_output_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.015 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: const_output_eliminate, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.041 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.310.960 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.expand_dump_flag [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.066 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.061 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.085 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_b_r1_b_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.150 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_2.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.129 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.171 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.224 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.189 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.249 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.296 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.305 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.331 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.374 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.396 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.347 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.loop_unroll [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.418 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.renormalize.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.438 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.cse [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.366 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:673] InitHcclExec] Hcom DynamicKernel Initialize success [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.414 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:679] InitHcclExec] InitHcclExec success [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.434 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:233] InitHccl] Init hccl adapter success. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.424 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.524 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.593 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.599 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:87] Initialize] Successfully initialize HCCL. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.605 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.cse.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.311.613 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_a_1 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.625 [mindspore/ccsrc/distributed/collective/collective_manager.cc:588] InitDeviceCommLib] Communication library on device side is successfully initialized. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.639 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_b end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.669 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass optimize_parallel_all_gather_comm start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.726 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass optimize_parallel_all_gather_comm end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.753 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_param_gather start ... [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.755 [mindspore/ccsrc/distributed/collective/collective_manager.cc:210] Initialize] [PROF]InitDeviceBackend costs 4699.44 msec. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.770 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_param_gather end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.792 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cconv start ... [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.789 [mindspore/ccsrc/distributed/collective/collective_manager.cc:810] IsAsyncInitGlobalComm] Async initialize global comm: 1. async_conf: 1, is_graph: 1, use_rank_table: 0, simulation: 0, use_mpi: 0, is_ascend: 1 [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.812 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.860 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cconv end. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.878 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.028 msec. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.311.885 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass loop_unroll start ... [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.922 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group hccl_world_group [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.311.948 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.046 msec. [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.312.012 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.033 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_1.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.073 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.recompute_prepare [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.312.084 [mindspore/ccsrc/distributed/collective/collective_manager.cc:869] SubmitCreateDeviceCommTask] Launch init comm thread. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.099 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.133 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.157 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_depend_eliminate [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.312.162 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for hccl_world_group. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.193 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.217 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_assign_eliminate [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.312.234 [mindspore/ccsrc/distributed/collective/collective_manager.cc:224] Initialize] [PROF]CreateGlobalCommunicationGroup costs 0.421 msec. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.312.253 [mindspore/ccsrc/distributed/collective/collective_manager.cc:227] Initialize] End initializing collective communication for backend: Ascend [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.247 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.287 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_loads_eliminate [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.312.292 [mindspore/ccsrc/distributed/init.cc:56] Initialize] [PROF]distributed_collective_init costs 4700.53 msec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.313 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_loads_eliminate.unchanged [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.312.294 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: hccl_world_group [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.335 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parameter_eliminate [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.312.348 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for hccl_world_group [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.312.349 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:298] RecordInitStatus] Status record: system init. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.366 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.388 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.407 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r1_a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.427 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.456 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.480 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.507 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.526 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.550 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.312.543 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.127 msec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.568 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.592 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.610 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.657 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.679 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.708 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.312.679 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start loop_unroll_optimizer.r1.loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.727 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.312.735 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, loop_unroll_optimizer_r1_loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.750 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.768 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.790 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.817 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.312.818 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End loop_unroll_optimizer.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.841 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.312.857 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass loop_unroll end. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.863 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.312.889 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_after_cconv start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.905 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.312.914 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.c_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.312.932 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_after_cconv_r1_c_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.926 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.949 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.967 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.312.990 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.008 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.034 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.050 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.074 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.313.054 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group hccl_world_group [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.092 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.313.093 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.504 msec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.117 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.134 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [WARNING] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.313.115 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.313.134 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.154 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.163 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.c_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.173 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.188 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.parameter_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.199 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.212 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.226 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_2.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.245 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.257 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parallel_inline_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.281 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_parallel_inline_pass [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.297 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.321 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.323 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.356 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.expand_dump_flag [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.363 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.384 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.384 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.407 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.423 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.427 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.443 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.cse [WARNING] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:04.313.425 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.464 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.486 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.505 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.535 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.561 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.581 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_a_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.610 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.cse.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.640 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.renormalize [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.660 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.renormalize.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.681 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_after_cconv end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.313.704 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_dup_value start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.780 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.817 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.839 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.871 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.894 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.928 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.953 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.313.984 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.005 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.029 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_loads_eliminate.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.021 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_dup_value end. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.050 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parameter_eliminate [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.063 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass tuple_transform start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.076 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.090 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.d_1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.108 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_trans_graph_r1_d_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.098 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.132 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r2_a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.156 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.189 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.210 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.314.097 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_utils.cc:480] GetGeSessionOptions] Set GE atomic clean policy to 1. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.236 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.257 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.282 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.300 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.324 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.341 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.366 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.385 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.406 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.414 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.d_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.424 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.443 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.447 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.464 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.renormalize.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.466 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.486 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass tuple_transform end. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.492 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.509 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass partial_unused_args_eliminate start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.510 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.530 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass partial_unused_args_eliminate end. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.535 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.554 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.549 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_cache_embedding start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.600 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.620 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.636 [mindspore/ccsrc/frontend/parallel/cache_embedding/cache_embedding.cc:706] AddCacheEmbedding] Parameters are all not cache enable. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.659 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_cache_embedding end. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.662 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.681 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_recomputation start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.682 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.708 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.725 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.748 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.766 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.789 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.807 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.829 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.849 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.872 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.891 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.892 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_recomputation end. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.917 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.925 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cse_after_recomputation start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.314.950 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_recompute.r1.cse [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.951 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_2.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.314.990 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parallel_inline_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.015 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_parallel_inline_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.053 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.062 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_recompute.r1.cse.unchanged [INFO] COMMON(187803,ffff93d7bc10,python):2025-02-07-15:58:04.314.990 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:241] InitializeAcl] Call aclInit successfully [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.086 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass a1a2 end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.092 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cse_after_recomputation end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.117 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass environ_conv start ... [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.315.109 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:248] SetAclOpPrecisionMode] Set aclop PRECISION_MODE: allow_fp32_to_fp16 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.120 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end inline action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.143 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.183 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.188 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass environ_conv end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.216 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass swap_dp_allreduce_reducescatter start ... [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.225 [mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.cc:223] ForwardHasDynamicShape] Can not find the forward graph, so find the ops in root graph [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.259 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass swap_dp_allreduce_reducescatter end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.282 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass bias_add_comm_swap start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.302 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass bias_add_comm_swap end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.320 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_micro_interleaved_index start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.323 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.337 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_micro_interleaved_index end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.349 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.356 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_fine_grained_interleaved_index start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.373 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_fine_grained_interleaved_index end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.384 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pre_auto_parallel action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.390 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass merge_cast_opt start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.407 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass merge_cast_opt end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.426 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_recompute_activation start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.471 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_recompute_activation end. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.468 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 42 us [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.494 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass micro_interleaved_order_control start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.495 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pre_auto_parallel action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.511 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.511 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass micro_interleaved_order_control end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.544 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass assign_add_opt start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.315.545 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start insert-virtual-dataset action. [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.315.531 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.651 [mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.cc:466] AssignAddOpt] Merge multi matmul assign add begin and concat eliminate enable flag is:0 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.698 [mindspore/ccsrc/frontend/parallel/pass/pass_utils.cc:122] ExtractBackwardMatMul] backward_matmul_dx_dw_map size:0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.743 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass assign_add_opt end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.768 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass ForceFp32Comm start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.785 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass ForceFp32Comm end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.804 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_cast_before_assign_add start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.842 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_cast_before_assign_add end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.864 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass full_micro_interleaved_order_control start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.881 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass full_micro_interleaved_order_control end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.900 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass reorder_send_recv_between_fp_bp start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.917 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass reorder_send_recv_between_fp_bp end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.315.935 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass comm_op_add_attrs start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.014 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass comm_op_add_attrs end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.042 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_comm_op_reuse_tag start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.128 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_comm_op_reuse_tag end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.157 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_split_concat_branches start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.174 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_split_concat_branches end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.195 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_parallel_branches start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.212 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_parallel_branches end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.233 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_in_pipeline start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.293 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_in_pipeline end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.315 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_grad_in_pipeline start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.344 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_grad_in_pipeline end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.365 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass control_data_broadcast_order start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.381 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass control_data_broadcast_order end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.399 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass grouped_pairwise_exchange_alltoall start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.428 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass grouped_pairwise_exchange_alltoall end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.450 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass offloading_packed_experts start ... [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.316.420 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:272] Initialize] End initializing device context. [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.466 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:746] SetOffloadingPackedExpert] pass if (parallel::g_device_manager == nullptr) [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.484 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:707] CheckUserSettings] To activate the pass, set_auto_parallel_context 'enable_alltoall' should be true [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.497 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:751] SetOffloadingPackedExpert] CheckUserSettings_not_pass [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.512 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass offloading_packed_experts end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.530 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_and_grad_model_parallel start ... [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.316.533 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.cc:404] LoadCollectiveCommLib] Loading MACCL collective library successfully. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.547 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_and_grad_model_parallel end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.566 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_matmul_and_grad_allreduce start ... [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.316.565 [mindspore/ccsrc/distributed/collective/collective_manager.cc:581] InitDeviceCommLib] Start initializing communication library on device side... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.581 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_matmul_and_grad_allreduce end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.599 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_allgather_and_fa_grad start ... [WARNING] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.615 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.643 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_allgather_and_fa_grad end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.666 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_ring_attention start ... [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.316.680 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:82] OpenTsd] Device id = 5, rank size = 8. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.723 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_ring_attention end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.745 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_flash_sp start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.788 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_flash_sp end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.316.756 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end insert-virtual-dataset action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.316.812 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.810 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass begin_end_overlap_inline start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.836 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass begin_end_overlap_inline end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.856 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_matmul_comm_elemetwise start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.873 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_matmul_comm_elemetwise end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.316.876 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol-second action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.892 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_layernorm_comm start ... [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.316.879 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel _npu_log begins the construction process witch capacity 128 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.316.902 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol-second action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.910 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_layernorm_comm end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.316.920 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.928 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass handle_group_info start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.316.944 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start dataset_repeat_opt action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.949 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass handle_group_info end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.968 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass symbol_engine_optimizer start ... [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.316.994 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.build [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.034 [mindspore/ccsrc/frontend/optimizer/irpass/symbol_engine_optimizer.cc:39] operator()] There is no dynamic shape node, the SymbolEngineBuilder is disabled. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.054 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.build.unchanged [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.064 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.073 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_shapecalc [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.094 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_shapecalc [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.093 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2248] GetCommInfo] Get global rank from communication model, the global rank is 7 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.156 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_shapecalc.unchanged [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.168 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 7, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 7 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.178 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_not_effective [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.188 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.195 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_not_effective [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.204 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.223 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 7, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.273 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_not_effective.unchanged [WARNING] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.261 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:305] BroadcastDataset] For now on, only dataset sink mode support dataset reader optimizer. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.296 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.opt_reshape [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.301 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end dataset_repeat_opt action. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.313 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_opt_reshape [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.319 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.351 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_split action. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.364 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.opt_reshape.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.384 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.fold_const_symbol [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.382 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:239] PipelineSplit] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.404 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:256] PipelineSplit] The parameter 'stage_num' is: 1. No need Pipeline split. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.399 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_fold_const_symbol [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.459 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_split action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.478 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.486 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.fold_const_symbol.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.507 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start optimize action. [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.510 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.renormalize [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.531 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.renormalize.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.555 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass symbol_engine_optimizer end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.565 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.579 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end optimize action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.597 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.610 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute end. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.631 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_parallel_scheduler action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.641 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.653 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_parallel_scheduler action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.668 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.691 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad_reorder action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.715 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.773 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_a start ... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.793 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad_reorder action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.815 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.814 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.expand_dump_flag [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.843 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start get_jit_bprop_graph action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.843 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.expand_dump_flag.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.862 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end get_jit_bprop_graph action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.868 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.switch_simplify [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.878 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.893 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_switch_simplify [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.901 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.916 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.930 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.941 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.switch_simplify.unchanged [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.317.953 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start eliminate_special_op_node action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.968 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.317.993 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.028 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.054 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.074 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_1 [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.318.075 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:55] NewSession] Create new GE session success! [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.318.149 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:214] SetGeSession] Add a new Ge Session success [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.318.233 [mindspore/ccsrc/transform/graph_ir/graph_runner.cc:67] GraphRunner] ME run in ONE_DEVICE strategy mode [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.318.385 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:250] SetGraphRunner] Add a new GraphRunner success [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.318.440 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1610] Initialize] Create session and graphrunner successful. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.318.462 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:1614] Initialize] Init ge successful, ge reference = 1. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.486 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.560 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.586 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.625 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.651 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.318.652 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.ad_related_special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.690 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.318.710 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_ad_related_special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.717 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.749 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.772 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.802 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.318.785 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.ad_related_special_op_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.826 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parameter_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.318.843 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.mutable_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.856 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.318.861 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_mutable_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.877 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.894 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_2 [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.318.918 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.mutable_op_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.916 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.318.938 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.convert_tensor_op_eliminate [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.318.954 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_convert_tensor_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.955 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.318.977 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.003 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.convert_tensor_op_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.005 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.024 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.031 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end eliminate_special_op_node action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.049 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.049 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.068 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.081 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start distribtued_split action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.096 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.116 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:378] GenerateStrategy] Current parallel mode is semi_auto_parallel [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.132 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:390] GenerateStrategy] Generated distributed strategy is 1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.116 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.156 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.178 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.220 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.239 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.267 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.285 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.314 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:1277] Run] All nodes are on this process so there's no need to build and split distributed graph. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.321 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.342 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end distribtued_split action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.342 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.359 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.369 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.387 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.389 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start validate action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.433 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.456 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.483 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.500 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.532 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end validate action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.543 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.554 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.564 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.596 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.614 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.639 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] PROFILER(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.656 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc:49] IsProfilingParallelStrategyEnabled] Profiling parallel strategy is disabled. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.657 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.685 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.717 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.745 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start task_emit action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.748 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.768 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.798 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.829 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_2.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.873 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.accelerated_algorithm [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.904 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_accelerated_algorithm [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.949 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.976 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:04.319.973 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1679] SetRunMode] Run graph mode with kernel by kernel by configuration. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.319.998 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.021 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.042 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.067 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_shard_fg_expand [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.098 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.124 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.144 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_shard_inline [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:04.320.163 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1057] CompileGraphs] Status record: start compile function graph: 4_3_1___main___Net_construct_20 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.187 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard_inline.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.212 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_parallel [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.275 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 34 us [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.305 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_parallel.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.395 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:04.320.442 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.440 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 7, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 7 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.483 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.504 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.525 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 7, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.723 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3587] MarkForwardCNode] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.846 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.874 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3182] IsInsertVirtualOutput] The current stage is: 0 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.320.936 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=_VirtualOutput [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:04.320.932 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: 4_3_1___main___Net_construct_20 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:04.321.185 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-16453000547691086251, the max communication size is 1 MB. [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:04.321.218 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-16453000547691086251, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.321.243 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-16453000547691086251. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.070 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualDatasetInfo0 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.143 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.253 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1), (1, 1, 1)) [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.282 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.415 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualDatasetInfo00: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.606 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator MulInfo1 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.631 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.683 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((2, 2, 2), (2, 2, 2)) [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.705 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.778 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.802 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 0 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.824 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.841 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 1 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.879 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:503] InferMirrorOps] MulInfo11: No need to insert mirror ops [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.907 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2631] InferAsLossDivisor] MulInfo11: the dev matrix shape is [2, 2, 2], the output tensor map is [2, 1, 0], loss divisor is 1 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.924 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] MulInfo11: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.322.942 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1334] Init] MulInfo11 : Init success. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.051 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualOutputInfo2 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.078 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.117 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1)) [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.141 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.323.141 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:667] GenerateArgumentsKey] Generate a new compile key for new args, key: 0 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.189 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualOutputInfo22: The loss divisor is 1, no need to create virtual div op. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.323.216 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:675] GenerateArgumentsKey] New cached args: Arg[0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xd1d49c0, value: ValueAny) Arg[1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xd1d49c0, value: ValueAny) [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.386 [mindspore/ccsrc/frontend/parallel/parameter_manager.cc:1445] HandleCameAndAdaFactorOpt] Adafactor or Came optimizer process start [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.548 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1150] MergeEntireShapeForDynamic] Into MergeEntireShapeForDynamic [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.591 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1163] MergeEntireShapeForDynamic] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.696 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(1) [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.785 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.323.901 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1263] CompileInner] Start compiling, phase: train.1738915084320481280.281470585332944.0.. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.930 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 3-7 and group name is 2-5488101015797526856 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.323.945 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:272] LoadPassesConfig] AUTO_PASSES_OPTIMIZE_PATH: [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.323.970 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] Start compiling 'Net.construct' and it will take a while. Please wait... [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.323.994 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5488101015797526856 [const vector]{3, 7}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.086 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.025 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.153 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-5488101015797526856 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.208 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.088 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.249 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.266 msec. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.271 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-5488101015797526856 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.363 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 5-7 and group name is 2-16057586909177180503 [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.405 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-16057586909177180503 [const vector]{5, 7}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.445 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.004 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.484 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-16057586909177180503 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.512 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.04 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.537 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.133 msec. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.555 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-16057586909177180503 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.614 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 6-7 and group name is 2-6853331267304275293 [WARNING] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.718 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-6853331267304275293 [const vector]{6, 7}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.760 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.005 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.796 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-6853331267304275293 [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.823 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.037 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.847 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.129 msec. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.868 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-6853331267304275293 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.324.950 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.175 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op0, op=StridedSlice [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.467 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.523 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op0, op=StridedSlice [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.620 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.659 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op1, op=StridedSlice [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.756 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.788 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.812 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(2) [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.325.918 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.153 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.306 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op1, op=StridedSlice [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.436 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.477 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op2, op=StridedSlice [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.326.441 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_dump begins the construction process witch capacity 128 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.569 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.608 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op3, op=StridedSlice [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.696 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.725 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.851 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_6{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 1} [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.933 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_8{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_6, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.326.989 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_9{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_10, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.327.027 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_11{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 0} [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.327.076 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_12{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_11, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.327.123 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_13{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_14, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.327.133 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_summary begins the construction process witch capacity 128 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.327.213 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/Mul-op0->Default/_VirtualOutput-op0(1) [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.327.308 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.327.545 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 7, operator_vector: AllGather, AllGather, Split, Concat, AllGather, Split, Concat [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.327.650 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_image_summary begins the construction process witch capacity 128 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.327.737 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=AllGather [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.328.173 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_scalar_summary begins the construction process witch capacity 128 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.328.885 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_histogram_summary begins the construction process witch capacity 128 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.329.321 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_7692796245619514736AllGather_ success [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.329.384 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:215] InitHccl] Start init hccl adapter. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.329.420 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op0, op=Split [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.329.604 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_12015561575443432111Split_ success [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.329.654 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:450] InitKernelInfoStore] Start init hccl kernel info store. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.329.690 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op0, op=Concat [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.329.746 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:467] InitKernelInfoStore] Get builder ops_kernel_info_hccl [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.329.773 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8051664706019937323Concat_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.329.810 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op0, op=AllGather [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.329.898 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:489] InitKernelInfoStore] Init hccl kernel info store success. [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.329.925 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:667] InitHcclExec] Start init hccl exec. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.330.599 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_5140002550487651858AllGather_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.330.679 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op1, op=Split [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.330.755 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8691182465882856301Split_ success [INFO] COMMON(187753,ffff8292dc10,python):2025-02-07-15:58:04.330.663 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:241] InitializeAcl] Call aclInit successfully [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.330.850 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op1, op=Concat [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.330.856 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:248] SetAclOpPrecisionMode] Set aclop PRECISION_MODE: allow_fp32_to_fp16 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.330.904 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_6614310911506831424Concat_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.330.936 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op1, op=AllGather [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.331.308 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.331.643 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_1898494724763908338AllGather_ success [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.331.707 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.331.774 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_15{[0]: ValueNode PrimFunc_Mul, [1]: CNode_13, [2]: CNode_9} [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.331.811 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_16{[0]: ValueNode AllGather, [1]: CNode_15} [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.331.845 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_17{[0]: ValueNode AllGather, [1]: CNode_18} [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.332.218 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:272] Initialize] End initializing device context. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.277 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:918] FindCommonMirrorGroup] The common mirror group is:[const vector]{} [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.343 [mindspore/ccsrc/frontend/parallel/parallel_postprocessor.cc:352] HandleGlobalNormScale] Start to process the global norm [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.332.348 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.cc:404] LoadCollectiveCommLib] Loading MACCL collective library successfully. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.332.384 [mindspore/ccsrc/distributed/collective/collective_manager.cc:581] InitDeviceCommLib] Start initializing communication library on device side... [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.429 [mindspore/ccsrc/frontend/parallel/step_parallel.cc:171] StepParallel] Now leaving step parallel, used time: 12055 us [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.332.492 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:82] OpenTsd] Device id = 1, rank size = 8. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.494 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.540 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.601 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.699 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_comm [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.773 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_comm.unchanged [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.332.786 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel _npu_log begins the construction process witch capacity 128 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.801 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_fusion [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_fusion.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.882 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.matmul_add_comm_reduction [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.946 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.332.975 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.002 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.025 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_shard_identity [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.050 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_shard_identity [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.157 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.191 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_dataset [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.213 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_dataset [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.387 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_dataset.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.427 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.get_grad_eliminate_ [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.450 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_get_grad_eliminate_ [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.541 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.569 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_output [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.590 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_output [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.703 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_output.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.740 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_forward [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.822 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_forward.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.853 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_recompute_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.884 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.911 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.932 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.333.953 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.079 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.114 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.138 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.before_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.160 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_before_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.285 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.before_grad.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.315 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.374 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.402 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel_renormalize [INFO] ANALYZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.334.668 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.335.652 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.335.717 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:04.335.865 [mindspore/ccsrc/backend/graph_compiler/transform.cc:575] CreateBackend] CreateBackend is: ge [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:58:04.336.080 [mindspore/ccsrc/debug/debugger/debugger.cc:80] Init] Debugger got device_id: 4 [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:58:04.336.111 [mindspore/ccsrc/debug/debugger/debugger.cc:82] Init] Debugger got device_target: Ascend [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.336.325 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.336.370 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.336.697 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.336.665 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:673] InitHcclExec] Hcom DynamicKernel Initialize success [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.336.738 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:679] InitHcclExec] InitHcclExec success [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.336.736 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.336.759 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:233] InitHccl] Init hccl adapter success. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.336.786 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:87] Initialize] Successfully initialize HCCL. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.336.837 [mindspore/ccsrc/distributed/collective/collective_manager.cc:588] InitDeviceCommLib] Communication library on device side is successfully initialized. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.336.839 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.336.896 [mindspore/ccsrc/distributed/collective/collective_manager.cc:210] Initialize] [PROF]InitDeviceBackend costs 5011.54 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.336.934 [mindspore/ccsrc/distributed/collective/collective_manager.cc:810] IsAsyncInitGlobalComm] Async initialize global comm: 1. async_conf: 1, is_graph: 1, use_rank_table: 0, simulation: 0, use_mpi: 0, is_ascend: 1 [INFO] ANALYZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.336.926 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 1___main___Net_construct_5 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x363fded0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x363fded0, value: ValueAny), Parent: } [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.336.960 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.337.036 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.033 msec. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.337.085 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group hccl_world_group [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.337.111 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.05 msec. [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.337.193 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.337.874 [mindspore/ccsrc/distributed/collective/collective_manager.cc:869] SubmitCreateDeviceCommTask] Launch init comm thread. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.337.922 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for hccl_world_group. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.337.953 [mindspore/ccsrc/distributed/collective/collective_manager.cc:224] Initialize] [PROF]CreateGlobalCommunicationGroup costs 0.991 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.337.975 [mindspore/ccsrc/distributed/collective/collective_manager.cc:227] Initialize] End initializing collective communication for backend: Ascend [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.338.010 [mindspore/ccsrc/distributed/init.cc:56] Initialize] [PROF]distributed_collective_init costs 5013.25 msec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.337.993 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel_renormalize.changed [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.338.005 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: hccl_world_group [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.338.046 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:298] RecordInitStatus] Status record: system init. [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.338.071 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for hccl_world_group [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.073 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.update_top_fg [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.106 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.update_top_fg.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.129 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cast_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.154 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_cast_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cast_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.280 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_fg_expand [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.338.297 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.151 msec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_fg_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.359 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation_after_expand [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.432 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.458 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp_send_recv_attached [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.515 [mindspore/ccsrc/frontend/parallel/pass/flash_sp.cc:2977] FlashSPSendRecvNodeAttach] No RA/FlashSP Send/Recv grad is found to be attached. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.543 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp_send_recv_attached.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.566 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.receive_attached [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.596 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.receive_attached.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.618 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.after_resolve [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.634 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_after_resolve [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.730 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.after_resolve.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.756 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_after_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.777 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_after_grad [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.338.815 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group hccl_world_group [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.338.868 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.531 msec. [WARNING] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.338.890 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.894 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_after_grad.unchanged [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.338.915 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.940 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.338.960 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.042 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.special_op_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.067 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.087 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.renormalize.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.107 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.add_forward_monad_depend [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.141 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.163 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.190 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_grad.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.210 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_eliminator [WARNING] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:04.339.274 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.298 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.329 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cse [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.569 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cse.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.613 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_3 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.638 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_3 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.660 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.743 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.766 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.833 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.853 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.919 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.339.938 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.005 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.043 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.111 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.131 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.197 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.215 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.285 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.305 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.378 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.409 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_3.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.446 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.expand_dump_flag [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.471 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.494 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.514 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.600 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.727 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.766 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.847 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.872 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.340.891 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.342.729 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_1.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.342.816 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.342.846 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.342.932 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.342.981 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.046 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.075 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.126 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.149 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.196 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.221 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parameter_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.252 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.273 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.292 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.313 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.384 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.404 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.469 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.488 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.548 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.564 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.623 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.642 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.706 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.725 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.343.671 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_dump begins the construction process witch capacity 128 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.809 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.842 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.908 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.928 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.343.992 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.011 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.076 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.096 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.179 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.203 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.267 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.285 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.369 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.389 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.344.424 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_tensor_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.453 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.473 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.536 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.557 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.619 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.740 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.810 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.831 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.912 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.942 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_2.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.344.974 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.accelerated_algorithm [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.001 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_accelerated_algorithm [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.345.060 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_image_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.089 [mindspore/ccsrc/frontend/optimizer/opt.cc:232] ApplyIRToSubstitutions] There may be a problem. Substitution: opt_a.r2.accelerated_algorithm.less_batch_normalization [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.174 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.accelerated_algorithm.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.204 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.228 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.249 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.269 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.292 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_shard_fg_expand [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.332 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.355 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.378 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_shard_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.461 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard_inline.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.490 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_parallel [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.563 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 48 us [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.590 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_parallel.changed [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.345.588 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_scalar_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.614 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.653 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.680 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.748 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.777 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_comm [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.836 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_comm.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.877 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_fusion [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.931 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_fusion.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.345.954 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.matmul_add_comm_reduction [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.008 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.034 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.054 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.076 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_shard_identity [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.098 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_shard_identity [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.346.092 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:308] MbufDataHandler] Channel ms_histogram_summary begins the construction process witch capacity 128 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.177 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.202 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_dataset [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.222 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_dataset [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.296 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_dataset.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.322 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.get_grad_eliminate_ [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.340 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_get_grad_eliminate_ [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.413 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.437 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_output [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.455 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_output [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.529 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_output.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.555 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_forward [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.610 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_forward.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.634 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_recompute_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.663 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_recompute_pass.unchanged [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.346.645 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:215] InitHccl] Start init hccl adapter. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.684 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.716 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.736 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.849 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.878 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.902 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.before_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.346.923 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_before_grad [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.346.936 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:450] InitKernelInfoStore] Start init hccl kernel info store. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.029 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.before_grad.unchanged [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.347.033 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:467] InitKernelInfoStore] Get builder ops_kernel_info_hccl [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.057 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.107 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.129 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel_renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.149 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel_renormalize.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.170 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.update_top_fg [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.191 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.update_top_fg.unchanged [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.347.192 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:489] InitKernelInfoStore] Init hccl kernel info store success. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.211 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cast_eliminate [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.347.219 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:667] InitHcclExec] Start init hccl exec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.229 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_cast_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.305 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cast_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.330 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_fg_expand [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.385 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_fg_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.406 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation_after_expand [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.465 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.485 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp_send_recv_attached [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.506 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.539 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.receive_attached [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.562 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.receive_attached.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.583 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.after_resolve [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.604 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_after_resolve [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.687 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.after_resolve.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.709 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_after_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.727 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_after_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.832 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_after_grad.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.856 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.873 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.944 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.special_op_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.347.966 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.renormalize [INFO] ANALYZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.348.242 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.348.929 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.348.973 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.349.287 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.349.321 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.349.540 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.349.485 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:667] GenerateArgumentsKey] Generate a new compile key for new args, key: 0 [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:04.349.571 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.349.596 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:675] GenerateArgumentsKey] New cached args: Arg[0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x227c3ed0, value: ValueAny) Arg[1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x227c3ed0, value: ValueAny) [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.349.647 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.349.720 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 3_1___main___Net_construct_19 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x363fded0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x363fded0, value: ValueAny), Parent: } [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.350.267 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1263] CompileInner] Start compiling, phase: train.1738915084346594048.281469968999824.0.. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.350.315 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:272] LoadPassesConfig] AUTO_PASSES_OPTIMIZE_PATH: [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.350.341 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] Start compiling 'Net.construct' and it will take a while. Please wait... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.350.651 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.renormalize.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.350.722 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.add_forward_monad_depend [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.350.757 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.350.779 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.350.803 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_grad.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.350.823 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.350.897 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.350.918 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cse [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.101 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cse.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.129 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_3 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.153 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_3 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.176 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.251 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.269 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.331 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.348 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.407 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.423 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.482 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.498 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.557 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.573 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.631 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.648 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.560 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:673] InitHcclExec] Hcom DynamicKernel Initialize success [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.650 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:679] InitHcclExec] InitHcclExec success [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.697 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:233] InitHccl] Init hccl adapter success. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.720 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.727 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:87] Initialize] Successfully initialize HCCL. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.738 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.753 [mindspore/ccsrc/distributed/collective/collective_manager.cc:588] InitDeviceCommLib] Communication library on device side is successfully initialized. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.799 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.814 [mindspore/ccsrc/distributed/collective/collective_manager.cc:210] Initialize] [PROF]InitDeviceBackend costs 4600.76 msec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.822 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_3.unchanged [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.850 [mindspore/ccsrc/distributed/collective/collective_manager.cc:810] IsAsyncInitGlobalComm] Async initialize global comm: 1. async_conf: 1, is_graph: 1, use_rank_table: 0, simulation: 0, use_mpi: 0, is_ascend: 1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.851 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.expand_dump_flag [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.873 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.876 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.expand_dump_flag.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.895 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.913 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_switch_simplify [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.943 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.034 msec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.351.988 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.switch_simplify.unchanged [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.351.996 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group hccl_world_group [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.352.009 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.loop_unroll [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.352.019 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.053 msec. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.352.028 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.352.098 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.loop_unroll.unchanged [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.352.117 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.352.120 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.352.139 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_1 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.352.196 [mindspore/ccsrc/distributed/collective/collective_manager.cc:869] SubmitCreateDeviceCommTask] Launch init comm thread. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.352.236 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for hccl_world_group. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.352.264 [mindspore/ccsrc/distributed/collective/collective_manager.cc:224] Initialize] [PROF]CreateGlobalCommunicationGroup costs 0.389 msec. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.352.280 [mindspore/ccsrc/distributed/collective/collective_manager.cc:227] Initialize] End initializing collective communication for backend: Ascend [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.352.319 [mindspore/ccsrc/distributed/init.cc:56] Initialize] [PROF]distributed_collective_init costs 4601.9 msec. [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.352.341 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: hccl_world_group [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.352.353 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:298] RecordInitStatus] Status record: system init. [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.352.386 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for hccl_world_group [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.352.656 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.16 msec. [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.353.124 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group hccl_world_group [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.353.161 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.468 msec. [WARNING] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.353.178 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.353.197 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187753,fffe767fc0f0,python):2025-02-07-15:58:04.353.532 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.353.721 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.353.766 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.353.788 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_recompute_prepare [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.353.859 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.recompute_prepare.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.353.881 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_depend_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.353.935 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.353.958 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_assign_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.006 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.028 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_loads_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.086 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.109 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parameter_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.133 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parameter_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.153 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.168 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.186 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.253 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.273 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.336 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.355 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.416 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.435 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.493 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.510 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.568 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.584 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.658 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.678 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.739 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.754 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.811 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.828 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.897 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.915 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.354.985 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.002 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.063 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.081 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.158 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.176 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.237 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.254 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.314 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.334 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.393 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.410 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.469 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.487 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.545 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.568 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_2.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.592 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.accelerated_algorithm [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.613 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_accelerated_algorithm [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.697 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.732 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.752 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.771 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.791 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.811 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_shard_fg_expand [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.844 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.865 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.882 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_shard_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.955 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard_inline.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.355.978 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_parallel [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.046 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 47 us [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.070 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_parallel.changed [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.094 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.131 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.151 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.173 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.191 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_comm [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.245 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_comm.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.266 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_fusion [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.317 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_fusion.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.matmul_add_comm_reduction [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.391 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.415 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.436 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.465 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_shard_identity [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.483 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_shard_identity [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.556 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.579 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_dataset [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.595 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_dataset [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.755 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_dataset.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.779 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.get_grad_eliminate_ [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.797 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_get_grad_eliminate_ [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.866 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.886 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_output [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.903 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_output [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.971 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_output.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.356.990 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_forward [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.036 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_forward.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.058 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_recompute_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.083 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.103 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.121 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.138 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.241 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.263 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.283 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.before_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.301 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_before_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.412 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.before_grad.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.434 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.480 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.503 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel_renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.522 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel_renormalize.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.541 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.update_top_fg [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.562 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.update_top_fg.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.580 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cast_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.599 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_cast_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.669 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cast_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.690 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_fg_expand [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.741 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_fg_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.762 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation_after_expand [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.818 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.839 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp_send_recv_attached [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.860 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.879 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.receive_attached [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.899 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.receive_attached.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.918 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.after_resolve [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.357.935 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_after_resolve [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.011 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.after_resolve.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.033 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_after_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.051 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_after_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.163 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_after_grad.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.184 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.200 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.270 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.special_op_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.290 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.307 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.renormalize.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.326 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.add_forward_monad_depend [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.348 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.366 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_grad [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.388 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_grad.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.406 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.470 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.493 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cse [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.660 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cse.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.688 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_3 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.709 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_3 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.727 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.795 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.814 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.875 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.893 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.953 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.358.968 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.037 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.056 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.116 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.131 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.189 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.205 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.263 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.279 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.337 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.358 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_3.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.383 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_a end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.413 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute_after_opt_a start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.477 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute_after_opt_a end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.503 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_cell_reuse_recomputed_activation start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.524 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_cell_reuse_recomputed_activation end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.545 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_after_opt_a start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.937 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_after_opt_a end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.359.977 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass convert_after_rewriter start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.030 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass convert_after_rewriter end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.055 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass order_py_execute_after_rewriter start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.099 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass order_py_execute_after_rewriter end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.122 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_b start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.148 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.177 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_b_r1_b_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.196 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: zero_like_fill_zero [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.264 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: zero_like_fill_zero, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.283 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: list_to_tuple_eliminator_ [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.343 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: list_to_tuple_eliminator_, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.359 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_to_list_eliminator_ [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.419 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_to_list_eliminator_, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.435 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.512 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.530 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_const_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.600 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_const_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.616 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.681 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.702 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_set_item_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.775 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.791 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_depend_reorder [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.863 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_depend_reorder, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.879 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_convert_item_index_to_positive [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.956 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_convert_item_index_to_positive, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.360.974 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: make_slice_get_slice_eliminator [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.033 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: make_slice_get_slice_eliminator, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.062 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.132 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.150 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reset_defer_inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.210 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reset_defer_inline, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.227 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.287 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.303 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.361 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.377 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_pure_node_eliminater [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.435 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_pure_node_eliminater, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.451 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: load_eliminater [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.510 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: load_eliminater, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.526 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: stopgrad_eliminater [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.583 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: stopgrad_eliminater, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.601 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.659 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: special_op_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.675 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.732 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.748 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_add_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.806 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_add_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.822 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_set_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.888 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_set_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.906 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_depend_swap [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.964 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_depend_swap, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.361.980 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_add_const_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.036 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_add_const_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.052 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: value_based_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.112 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: value_based_eliminate, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.128 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: parallel_virtual_node [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.187 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: parallel_virtual_node, change: 0 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.203 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: const_output_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.260 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: const_output_eliminate, change: 0 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:04.362.208 [mindspore/ccsrc/backend/graph_compiler/transform.cc:575] CreateBackend] CreateBackend is: ge [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.285 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.309 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.328 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_b_r1_b_2 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.403 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_2.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.424 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.476 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.498 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.542 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_assign_eliminate.unchanged [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:58:04.362.537 [mindspore/ccsrc/debug/debugger/debugger.cc:80] Init] Debugger got device_id: 5 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.564 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_loads_eliminate [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:58:04.362.575 [mindspore/ccsrc/debug/debugger/debugger.cc:82] Init] Debugger got device_target: Ascend [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.608 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.629 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.659 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.renormalize.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.679 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.cse [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.834 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.cse.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.864 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_b end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.892 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass optimize_parallel_all_gather_comm start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.949 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass optimize_parallel_all_gather_comm end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.975 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_param_gather start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.362.993 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_param_gather end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.363.015 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cconv start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.363.076 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cconv end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.363.102 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass loop_unroll start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.363.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start loop_unroll_optimizer.r1.loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.363.900 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, loop_unroll_optimizer_r1_loop_unroll [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.363.987 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End loop_unroll_optimizer.r1.loop_unroll.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.020 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass loop_unroll end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.049 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_after_cconv start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.072 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.c_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.090 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_after_cconv_r1_c_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.378 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.c_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.400 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.parameter_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.424 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.443 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.495 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.518 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.575 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.596 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.689 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.720 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.cse [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.879 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.cse.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.908 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.932 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.renormalize.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.954 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_after_cconv end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.364.979 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_dup_value start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.365.197 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:667] GenerateArgumentsKey] Generate a new compile key for new args, key: 0 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.298 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_dup_value end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.365.321 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:675] GenerateArgumentsKey] New cached args: Arg[0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xb62aed0, value: ValueAny) Arg[1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xb62aed0, value: ValueAny) [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.334 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass tuple_transform start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.363 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.d_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.383 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_trans_graph_r1_d_1 [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.751 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.d_1.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.776 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.797 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.renormalize.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.819 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass tuple_transform end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.843 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass partial_unused_args_eliminate start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.863 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass partial_unused_args_eliminate end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.885 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_cache_embedding start ... [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.935 [mindspore/ccsrc/frontend/parallel/cache_embedding/cache_embedding.cc:706] AddCacheEmbedding] Parameters are all not cache enable. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.955 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_cache_embedding end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.365.978 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_recomputation start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.366.151 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1263] CompileInner] Start compiling, phase: train.1738915084361932544.281470011721104.0.. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.366.210 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:272] LoadPassesConfig] AUTO_PASSES_OPTIMIZE_PATH: [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.208 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_recomputation end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.366.238 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] Start compiling 'Net.construct' and it will take a while. Please wait... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.240 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cse_after_recomputation start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.266 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_recompute.r1.cse [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.370 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_recompute.r1.cse.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.398 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cse_after_recomputation end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.424 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass environ_conv start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.489 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass environ_conv end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.514 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass swap_dp_allreduce_reducescatter start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.559 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass swap_dp_allreduce_reducescatter end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.583 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass bias_add_comm_swap start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.604 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass bias_add_comm_swap end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.626 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_micro_interleaved_index start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.645 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_micro_interleaved_index end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.666 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_fine_grained_interleaved_index start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.687 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_fine_grained_interleaved_index end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.708 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass merge_cast_opt start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.725 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass merge_cast_opt end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.746 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_recompute_activation start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.793 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_recompute_activation end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.817 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass micro_interleaved_order_control start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.837 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass micro_interleaved_order_control end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.858 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass assign_add_opt start ... [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.366.954 [mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.cc:466] AssignAddOpt] Merge multi matmul assign add begin and concat eliminate enable flag is:0 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.004 [mindspore/ccsrc/frontend/parallel/pass/pass_utils.cc:122] ExtractBackwardMatMul] backward_matmul_dx_dw_map size:0 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.065 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass assign_add_opt end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.091 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass ForceFp32Comm start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.111 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass ForceFp32Comm end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.133 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_cast_before_assign_add start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.174 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_cast_before_assign_add end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.196 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass full_micro_interleaved_order_control start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.215 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass full_micro_interleaved_order_control end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.237 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass reorder_send_recv_between_fp_bp start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.258 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass reorder_send_recv_between_fp_bp end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.278 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass comm_op_add_attrs start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.356 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass comm_op_add_attrs end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.382 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_comm_op_reuse_tag start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.462 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_comm_op_reuse_tag end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.487 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_split_concat_branches start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.505 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_split_concat_branches end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.526 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_parallel_branches start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.543 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_parallel_branches end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.564 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_in_pipeline start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.616 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_in_pipeline end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.639 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_grad_in_pipeline start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.660 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_grad_in_pipeline end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.681 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass control_data_broadcast_order start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.697 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass control_data_broadcast_order end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.727 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass grouped_pairwise_exchange_alltoall start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.756 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass grouped_pairwise_exchange_alltoall end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.779 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass offloading_packed_experts start ... [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.796 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:746] SetOffloadingPackedExpert] pass if (parallel::g_device_manager == nullptr) [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.814 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:707] CheckUserSettings] To activate the pass, set_auto_parallel_context 'enable_alltoall' should be true [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.830 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:751] SetOffloadingPackedExpert] CheckUserSettings_not_pass [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.846 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass offloading_packed_experts end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.866 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_and_grad_model_parallel start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.887 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_and_grad_model_parallel end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.908 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_matmul_and_grad_allreduce start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.924 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_matmul_and_grad_allreduce end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.944 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_allgather_and_fa_grad start ... [WARNING] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.962 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.979 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_allgather_and_fa_grad end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.367.999 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_ring_attention start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.062 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_ring_attention end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.085 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_flash_sp start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.139 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_flash_sp end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.163 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass begin_end_overlap_inline start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.180 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass begin_end_overlap_inline end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.200 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_matmul_comm_elemetwise start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.219 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_matmul_comm_elemetwise end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.254 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_layernorm_comm start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.272 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_layernorm_comm end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.292 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass handle_group_info start ... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.315 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass handle_group_info end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.337 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass symbol_engine_optimizer start ... [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.359 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.build [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.398 [mindspore/ccsrc/frontend/optimizer/irpass/symbol_engine_optimizer.cc:39] operator()] There is no dynamic shape node, the SymbolEngineBuilder is disabled. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.420 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.build.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.440 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_shapecalc [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.459 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_shapecalc [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.527 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_shapecalc.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.549 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_not_effective [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.566 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_not_effective [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.668 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_not_effective.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.693 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.opt_reshape [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.714 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_opt_reshape [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.777 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.opt_reshape.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.799 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.fold_const_symbol [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.816 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_fold_const_symbol [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.902 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.fold_const_symbol.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.922 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.renormalize [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.942 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.renormalize.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.368.962 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass symbol_engine_optimizer end. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.000 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end optimize action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.019 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.052 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_parallel_scheduler action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.074 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_parallel_scheduler action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.090 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.113 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad_reorder action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.206 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad_reorder action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.225 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.251 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start get_jit_bprop_graph action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.267 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end get_jit_bprop_graph action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.284 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.306 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.324 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.340 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.363 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start eliminate_special_op_node action. [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.958 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.ad_related_special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.369.997 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_ad_related_special_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.071 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.ad_related_special_op_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.098 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.mutable_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.116 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_mutable_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.176 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.mutable_op_eliminate.unchanged [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.207 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.convert_tensor_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.225 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_convert_tensor_op_eliminate [INFO] OPTIMIZER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.284 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.convert_tensor_op_eliminate.unchanged [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.310 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end eliminate_special_op_node action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.328 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.358 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start distribtued_split action. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.388 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:378] GenerateStrategy] Current parallel mode is semi_auto_parallel [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.406 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:390] GenerateStrategy] Generated distributed strategy is 1 [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.560 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:1277] Run] All nodes are on this process so there's no need to build and split distributed graph. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.584 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end distribtued_split action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.602 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.630 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start validate action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.784 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end validate action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.805 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PROFILER(187834,ffffb35e0c10,python):2025-02-07-15:58:04.370.942 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc:49] IsProfilingParallelStrategyEnabled] Profiling parallel strategy is disabled. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.371.028 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start task_emit action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.371.247 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1679] SetRunMode] Run graph mode with kernel by kernel by configuration. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:04.371.446 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1057] CompileGraphs] Status record: start compile function graph: 4_3_1___main___Net_construct_20 [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:04.371.754 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:04.372.214 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: 4_3_1___main___Net_construct_20 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:04.372.434 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-5488101015797526856, the max communication size is 1 MB. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:04.372.474 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-5488101015797526856, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:04.372.500 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-5488101015797526856. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:04.379.782 [mindspore/ccsrc/backend/graph_compiler/transform.cc:575] CreateBackend] CreateBackend is: ge [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:58:04.380.117 [mindspore/ccsrc/debug/debugger/debugger.cc:80] Init] Debugger got device_id: 1 [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:58:04.380.147 [mindspore/ccsrc/debug/debugger/debugger.cc:82] Init] Debugger got device_target: Ascend [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.410.779 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group hccl_world_group [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.410.857 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 501.063 msec. [WARNING] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.410.880 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.410.906 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:04.411.308 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:04.445.751 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:739] Initialize] The actor thread number: 5, the kernel thread number: 25 [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:58:04.446.085 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] SYMBOLIC_SHAPE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.447.033 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.447.065 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.447.080 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.447.106 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.447.123 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.447.137 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.447.176 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1682] Run] Pipeline run [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.447.214 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start bootstrap action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.448.601 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end bootstrap action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.448.739 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 9 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.448.803 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start type_inference action. [INFO] ANALYZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.449.039 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:58:04.463.301 [mindspore/_extends/parse/namespace.py:132] 'Net' object has no attribute or method: '__is_tensors_queue__', so will return None. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.465.377 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.465.507 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: __main___Net_construct_2 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1220f510, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1220f510, value: ValueAny), Parent: } [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.040 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end type_inference action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.079 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.126 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.340 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.361 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.391 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start graph_reusing action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.408 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end graph_reusing action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.435 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.459 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start inline action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.531 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.633 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.466.662 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass a1a2 start ... [INFO] PARSER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.473.637 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {cast_ : Prim[Cast]} [INFO] PARSER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.474.003 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:151] WriteVariable] fill_3 update var `value` with node @fill_3:value{[0]: CNode_4, [1]: param_value, [2]: param_type} [INFO] PARSER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.474.363 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {fillv2_ : Prim[FillV2]} [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:04.475.964 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:739] Initialize] The actor thread number: 5, the kernel thread number: 25 [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:58:04.476.266 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] SYMBOLIC_SHAPE(187789,ffffaa419c10,python):2025-02-07-15:58:04.476.824 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187789,ffffaa419c10,python):2025-02-07-15:58:04.476.858 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187789,ffffaa419c10,python):2025-02-07-15:58:04.476.880 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187789,ffffaa419c10,python):2025-02-07-15:58:04.476.920 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187789,ffffaa419c10,python):2025-02-07-15:58:04.476.938 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187789,ffffaa419c10,python):2025-02-07-15:58:04.476.954 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.476.993 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1682] Run] Pipeline run [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.477.035 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start bootstrap action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.179 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.expand_dump_flag [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.281 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.expand_dump_flag.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.478.287 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end bootstrap action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.478.326 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 9 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.343 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.370 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_switch_simplify [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.478.379 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start type_inference action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.429 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.451 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.469 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.499 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.517 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.532 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_a_1 [INFO] ANALYZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.478.555 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.896 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_1.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.934 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.955 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.478.983 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.003 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.037 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.075 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.100 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.118 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.140 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.158 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parameter_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.189 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.207 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.223 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r1_a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.239 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.266 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.286 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.308 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.323 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.343 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.356 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.374 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.389 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.407 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.421 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.440 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.454 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.474 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.497 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.517 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.533 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.553 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.567 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.597 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.614 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.634 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.649 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.669 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.683 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.702 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.717 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.736 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.752 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.772 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.786 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.805 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.819 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.839 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.859 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_2.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.894 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parallel_inline_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.913 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_parallel_inline_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.944 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.974 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.expand_dump_flag [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.479.996 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.015 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.031 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.055 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.075 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.090 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.114 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.133 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.147 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.304 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_1.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.332 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.349 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.375 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.394 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.423 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.444 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.466 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.487 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.508 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.528 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parameter_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.563 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.584 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.601 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r2_a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.618 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.701 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.729 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.752 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.768 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.788 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.803 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.823 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.838 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.857 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.871 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.891 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.905 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.924 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.939 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.958 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.973 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.480.993 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.007 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.053 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.071 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.092 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.109 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.128 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.143 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.162 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.177 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.195 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.209 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.228 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.243 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.261 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.276 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.295 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.319 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_2.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.342 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parallel_inline_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.361 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_parallel_inline_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.390 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parallel_inline_pass.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.414 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass a1a2 end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.443 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end inline action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.461 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.509 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol action. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.548 [mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.cc:223] ForwardHasDynamicShape] Can not find the forward graph, so find the ops in root graph [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.613 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.632 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.658 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pre_auto_parallel action. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.719 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 37 us [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.741 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pre_auto_parallel action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.755 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.481.781 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start insert-virtual-dataset action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.040 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end insert-virtual-dataset action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.113 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.164 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol-second action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.186 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol-second action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.200 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.222 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start dataset_repeat_opt action. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.341 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.366 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2248] GetCommInfo] Get global rank from communication model, the global rank is 3 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.435 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 3, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 3 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.452 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.466 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.481 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 3, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [WARNING] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.536 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:305] BroadcastDataset] For now on, only dataset sink mode support dataset reader optimizer. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.556 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end dataset_repeat_opt action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.571 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.602 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_split action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.630 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:239] PipelineSplit] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.650 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:256] PipelineSplit] The parameter 'stage_num' is: 1. No need Pipeline split. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.696 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_split action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.713 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.738 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start optimize action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.793 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.836 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.863 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.925 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.483.985 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_a start ... [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.018 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.expand_dump_flag [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.042 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.064 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.081 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.124 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.144 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.161 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.191 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.220 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.237 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.583 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_1.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.623 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.664 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.699 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.719 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.754 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.772 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.797 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.816 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.840 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parameter_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.885 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.903 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.919 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.936 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.965 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.484.983 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.007 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.023 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.046 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.063 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.099 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.114 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.137 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.152 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.188 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.205 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.227 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.242 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.264 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.281 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.303 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.319 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.354 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.370 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.393 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.408 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.445 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.464 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.488 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.503 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.524 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.549 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.574 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.589 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.613 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.628 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.650 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.674 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_2.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.700 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.accelerated_algorithm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.719 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_accelerated_algorithm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.758 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.779 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.797 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.816 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.835 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.855 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_shard_fg_expand [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.884 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.918 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard_inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.937 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_shard_inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.971 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard_inline.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.485.990 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_parallel [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.043 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 31 us [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.065 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_parallel.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.089 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.139 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.200 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 3, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 3 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.220 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.235 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.252 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 3, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.392 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3587] MarkForwardCNode] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.496 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.519 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3182] IsInsertVirtualOutput] The current stage is: 0 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.486.577 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=_VirtualOutput [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.487.802 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualDatasetInfo0 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.487.876 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.487.988 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1), (1, 1, 1)) [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.013 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.145 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualDatasetInfo00: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.348 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator MulInfo1 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.372 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.420 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((2, 2, 2), (2, 2, 2)) [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.438 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.504 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.525 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 0 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.566 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.582 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 1 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.597 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:503] InferMirrorOps] MulInfo11: No need to insert mirror ops [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.621 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2631] InferAsLossDivisor] MulInfo11: the dev matrix shape is [2, 2, 2], the output tensor map is [2, 1, 0], loss divisor is 1 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.744 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] MulInfo11: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.760 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1334] Init] MulInfo11 : Init success. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.858 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualOutputInfo2 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.880 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.916 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1)) [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.935 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.488.977 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualOutputInfo22: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.153 [mindspore/ccsrc/frontend/parallel/parameter_manager.cc:1445] HandleCameAndAdaFactorOpt] Adafactor or Came optimizer process start [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.313 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1150] MergeEntireShapeForDynamic] Into MergeEntireShapeForDynamic [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.353 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1163] MergeEntireShapeForDynamic] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.453 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(1) [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.535 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.664 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 3-7 and group name is 2-5488101015797526856 [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.727 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5488101015797526856 [const vector]{3, 7}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.818 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.021 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.897 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-5488101015797526856 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.929 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.066 msec. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.968 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.25 msec. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.489.988 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-5488101015797526856 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.081 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 1-3 and group name is 2-4190060298023907007 [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.125 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-4190060298023907007 [const vector]{1, 3}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.164 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.004 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.195 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-4190060298023907007 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.217 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.032 msec. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.241 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.118 msec. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.259 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-4190060298023907007 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.317 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 2-3 and group name is 2-3358271254418797552 [WARNING] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.349 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-3358271254418797552 [const vector]{2, 3}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.380 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.003 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.409 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-3358271254418797552 [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.432 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.029 msec. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.454 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.103 msec. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.481 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-3358271254418797552 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.557 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.490.764 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op0, op=StridedSlice [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.045 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.094 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op0, op=StridedSlice [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.184 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.221 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op1, op=StridedSlice [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.308 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.335 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.354 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(2) [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.445 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.647 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.792 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op1, op=StridedSlice [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.920 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.491.962 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op2, op=StridedSlice [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.046 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.079 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op3, op=StridedSlice [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.151 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.196 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:58:04.491.926 [mindspore/_extends/parse/namespace.py:132] 'Net' object has no attribute or method: '__is_tensors_queue__', so will return None. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.321 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_6{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 1} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.385 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_8{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_6, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.436 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_9{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_10, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.473 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_11{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 0} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.519 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_12{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_11, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.568 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_13{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_14, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.720 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/Mul-op0->Default/_VirtualOutput-op0(1) [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.804 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.492.998 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 7, operator_vector: AllGather, AllGather, Split, Concat, AllGather, Split, Concat [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.493.175 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=AllGather [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.493.529 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.493.652 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: __main___Net_construct_2 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xd1d49c0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xd1d49c0, value: ValueAny), Parent: } [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.048 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end type_inference action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.084 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.126 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.329 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.352 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.382 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start graph_reusing action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.403 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end graph_reusing action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.437 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.464 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start inline action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.497 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.555 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.494.582 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass a1a2 start ... [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.494.799 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_7692796245619514736AllGather_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.494.914 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op0, op=Split [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.495.078 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_12015561575443432111Split_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.495.156 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op0, op=Concat [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.495.237 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8051664706019937323Concat_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.495.267 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op0, op=AllGather [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.496.200 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_5140002550487651858AllGather_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.496.291 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op1, op=Split [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.496.375 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8691182465882856301Split_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.496.470 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op1, op=Concat [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.496.517 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_6614310911506831424Concat_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.496.545 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op1, op=AllGather [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.497.498 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_1898494724763908338AllGather_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.497.578 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.497.647 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_15{[0]: ValueNode PrimFunc_Mul, [1]: CNode_13, [2]: CNode_9} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.497.679 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_16{[0]: ValueNode AllGather, [1]: CNode_15} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.497.709 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_17{[0]: ValueNode AllGather, [1]: CNode_18} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.170 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:918] FindCommonMirrorGroup] The common mirror group is:[const vector]{} [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.262 [mindspore/ccsrc/frontend/parallel/parallel_postprocessor.cc:352] HandleGlobalNormScale] Start to process the global norm [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.342 [mindspore/ccsrc/frontend/parallel/step_parallel.cc:171] StepParallel] Now leaving step parallel, used time: 12223 us [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.384 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.425 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.504 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.531 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_comm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.592 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_comm.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.613 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_fusion [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.663 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_fusion.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.685 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.matmul_add_comm_reduction [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.741 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.763 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.780 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.801 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_shard_identity [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.822 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_shard_identity [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.911 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.933 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_dataset [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.498.950 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_dataset [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.107 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_dataset.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.143 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.get_grad_eliminate_ [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.162 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_get_grad_eliminate_ [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.234 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.260 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_output [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.291 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_output [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.392 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_output.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.419 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_forward [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.476 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_forward.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.498 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_recompute_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.524 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.544 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.562 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.580 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.679 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.708 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.729 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.before_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.746 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_before_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.836 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.before_grad.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.910 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.499.933 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel_renormalize [INFO] ANALYZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.500.191 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] PARSER(187789,ffffaa419c10,python):2025-02-07-15:58:04.500.866 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {cast_ : Prim[Cast]} [INFO] PARSER(187789,ffffaa419c10,python):2025-02-07-15:58:04.501.151 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:151] WriteVariable] fill_3 update var `value` with node @fill_3:value{[0]: CNode_4, [1]: param_value, [2]: param_type} [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.501.268 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.501.339 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PARSER(187789,ffffaa419c10,python):2025-02-07-15:58:04.501.446 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {fillv2_ : Prim[FillV2]} [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.501.999 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.502.047 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.502.336 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.502.375 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.502.498 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.502.585 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 1___main___Net_construct_5 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1220f510, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1220f510, value: ValueAny), Parent: } [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.659 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel_renormalize.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.740 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.update_top_fg [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.771 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.update_top_fg.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.791 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cast_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.810 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_cast_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.901 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cast_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.922 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_fg_expand [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.970 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_fg_expand.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.503.989 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation_after_expand [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.056 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.076 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp_send_recv_attached [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.132 [mindspore/ccsrc/frontend/parallel/pass/flash_sp.cc:2977] FlashSPSendRecvNodeAttach] No RA/FlashSP Send/Recv grad is found to be attached. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.156 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp_send_recv_attached.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.176 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.receive_attached [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.219 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.receive_attached.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.239 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.after_resolve [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.257 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_after_resolve [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.after_resolve.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.379 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_after_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.396 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_after_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.493 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_after_grad.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.519 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.special_op_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.535 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_special_op_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.601 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.special_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.572 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.expand_dump_flag [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.626 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.renormalize [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.673 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.654 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.695 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.add_forward_monad_depend [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.729 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.750 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.773 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_grad.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.791 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.719 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.823 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.882 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.882 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.504.908 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.905 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.927 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.957 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.978 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.504.996 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.168 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cse.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.205 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_3 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.227 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_3 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.245 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.320 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.323 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_1.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.338 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.353 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.recompute_prepare [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.372 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_recompute_prepare [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.399 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.400 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.420 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.421 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.454 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.487 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.475 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.514 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.513 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.534 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.555 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.575 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parameter_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.576 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.597 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.602 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.625 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.643 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r1_a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.652 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.662 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.675 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.688 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.711 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.734 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.735 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.754 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.755 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.777 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.794 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.815 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.816 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.836 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.831 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.853 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.870 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.891 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.893 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.908 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.923 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_3.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.929 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.960 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.expand_dump_flag [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.959 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.505.986 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.982 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.505.999 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.506.007 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.020 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.506.027 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.038 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.070 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.087 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.506.100 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.108 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.506.127 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.126 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.506.146 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.148 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.166 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.188 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.205 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.506.211 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.226 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.506.235 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.244 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.506.254 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.266 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.281 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.300 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.318 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.339 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.361 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_2.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.391 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parallel_inline_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.410 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_parallel_inline_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.440 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.464 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.expand_dump_flag [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.486 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.506 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.524 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.551 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.572 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.589 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.613 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.633 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.652 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_a_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.793 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_1.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.815 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.recompute_prepare [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.833 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_recompute_prepare [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.878 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.900 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.919 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.942 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.962 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.506.985 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.005 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parameter_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.035 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.056 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.075 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r2_a_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.094 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.119 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.135 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.157 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.175 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.196 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.212 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.233 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.250 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.271 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.287 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.309 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.325 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.344 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.361 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.382 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.399 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.420 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.437 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.478 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.497 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.519 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.536 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.556 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.572 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.593 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.610 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.631 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.648 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.668 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.686 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.706 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.723 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.744 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.767 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_2.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.791 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parallel_inline_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.507.726 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_1.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.809 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_parallel_inline_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.837 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.507.840 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.recompute_prepare [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.863 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass a1a2 end. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.507.872 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_recompute_prepare [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.892 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end inline action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.911 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.507.950 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.recompute_prepare.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.956 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.507.974 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_depend_eliminate [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.507.989 [mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.cc:223] ForwardHasDynamicShape] Can not find the forward graph, so find the ops in root graph [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.040 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_depend_eliminate.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.508.056 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.064 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_assign_eliminate [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.508.077 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.508.106 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pre_auto_parallel action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.110 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.133 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_loads_eliminate [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.508.158 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 28 us [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.177 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_loads_eliminate.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.508.179 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pre_auto_parallel action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.508.196 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.199 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parameter_eliminate [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.508.222 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start insert-virtual-dataset action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.232 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.273 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.291 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.356 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.374 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.431 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.450 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.503 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.524 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.576 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.593 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.696 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.739 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.819 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.840 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.895 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.911 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.966 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.508.982 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.037 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.057 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.129 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.148 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.203 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.221 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.299 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.320 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.378 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.398 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.387 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end insert-virtual-dataset action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.453 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.448 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.475 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.494 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol-second action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.518 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol-second action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.534 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.533 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.557 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.561 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start dataset_repeat_opt action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.626 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.649 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.684 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.706 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.712 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2248] GetCommInfo] Get global rank from communication model, the global rank is 4 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.743 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_2.unchanged [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.758 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 4, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 4 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.777 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.782 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.accelerated_algorithm [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.792 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.808 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_accelerated_algorithm [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.810 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 4, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [WARNING] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.858 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:305] BroadcastDataset] For now on, only dataset sink mode support dataset reader optimizer. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.879 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end dataset_repeat_opt action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.896 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.899 [mindspore/ccsrc/frontend/optimizer/opt.cc:232] ApplyIRToSubstitutions] There may be a problem. Substitution: opt_a.r2.accelerated_algorithm.less_batch_normalization [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.926 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_split action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.954 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:239] PipelineSplit] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.509.975 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:256] PipelineSplit] The parameter 'stage_num' is: 1. No need Pipeline split. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.509.980 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.accelerated_algorithm.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.010 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.017 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_split action. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.033 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.035 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.052 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.062 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start optimize action. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.074 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.097 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_shard_fg_expand [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.095 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.132 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute end. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.139 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_shard_fg_expand.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.156 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.164 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard_inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.185 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_shard_inline [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.214 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.253 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_a start ... [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.254 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard_inline.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.280 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_parallel [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.283 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.expand_dump_flag [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.309 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.330 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.349 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_switch_simplify [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.356 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 48 us [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.385 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_parallel.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.390 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.412 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.413 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.432 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.454 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.462 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.479 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.496 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.514 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.566 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.598 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_comm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.656 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_comm.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.679 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_fusion [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.726 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_fusion.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.751 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.matmul_add_comm_reduction [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.800 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.823 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.842 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.841 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_1.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.863 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_shard_identity [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.869 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.883 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_shard_identity [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.890 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_recompute_prepare [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.920 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.940 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.959 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.970 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.510.987 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_dataset [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.510.990 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.007 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_dataset [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.018 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.038 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.064 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.076 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_dataset.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.083 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parameter_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.103 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.get_grad_eliminate_ [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.108 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.124 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_get_grad_eliminate_ [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.128 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.145 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.164 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.188 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.192 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.210 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.213 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_output [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.235 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_output [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.236 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.253 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.275 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.290 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.301 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_output.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.322 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.328 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_forward [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.340 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.363 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.379 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.385 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_forward.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.410 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.413 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_recompute_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.427 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.452 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.461 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.469 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.485 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.493 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.505 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.510 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.525 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.534 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.551 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.580 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.597 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.622 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.626 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.638 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.656 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.670 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.681 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.before_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.688 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.702 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_before_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.714 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.731 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.754 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.778 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.798 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.before_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.801 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.819 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.826 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.842 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.859 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.880 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.881 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.907 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel_renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.905 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_2.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.931 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel_renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.930 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.accelerated_algorithm [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.949 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_accelerated_algorithm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.953 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.update_top_fg [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.976 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.update_top_fg.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.511.979 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.511.995 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cast_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.000 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.017 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_cast_eliminate [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.019 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.039 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.058 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.078 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_shard_fg_expand [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.087 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cast_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.102 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.111 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_fg_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.123 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.141 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_shard_inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.165 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_fg_expand.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.171 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard_inline.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.193 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation_after_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.191 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_parallel [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.228 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 19 us [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.249 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_parallel.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.272 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.259 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.301 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp_send_recv_attached [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.309 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.331 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.352 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.receive_attached [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.363 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 4, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 4 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.377 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.receive_attached.unchanged [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.385 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.397 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.after_resolve [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.403 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.416 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_after_resolve [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.421 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 4, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.496 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.after_resolve.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.523 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_after_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.545 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_after_grad [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.547 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3587] MarkForwardCNode] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.604 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.625 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3182] IsInsertVirtualOutput] The current stage is: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.719 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_after_grad.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.755 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.special_op_eliminate [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.512.763 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=_VirtualOutput [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.773 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_special_op_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.840 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.special_op_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.512.863 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.renormalize [INFO] ANALYZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.513.199 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.513.662 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualDatasetInfo0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.513.715 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.513.802 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1), (1, 1, 1)) [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.513.828 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.513.843 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.513.900 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.513.931 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualDatasetInfo00: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.022 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator MulInfo1 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.044 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.085 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((2, 2, 2), (2, 2, 2)) [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.106 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.173 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.192 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.227 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.244 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 1 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.262 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:503] InferMirrorOps] MulInfo11: No need to insert mirror ops [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.514.261 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.287 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2631] InferAsLossDivisor] MulInfo11: the dev matrix shape is [2, 2, 2], the output tensor map is [2, 1, 0], loss divisor is 1 [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.514.305 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.304 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] MulInfo11: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.319 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1334] Init] MulInfo11 : Init success. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.396 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualOutputInfo2 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.416 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.451 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1)) [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.470 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.507 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualOutputInfo22: The loss divisor is 1, no need to create virtual div op. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.514.555 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:04.514.589 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.647 [mindspore/ccsrc/frontend/parallel/parameter_manager.cc:1445] HandleCameAndAdaFactorOpt] Adafactor or Came optimizer process start [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.514.682 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.780 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1150] MergeEntireShapeForDynamic] Into MergeEntireShapeForDynamic [INFO] ANALYZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.514.769 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 3_1___main___Net_construct_19 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1220f510, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x1220f510, value: ValueAny), Parent: } [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.813 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1163] MergeEntireShapeForDynamic] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.901 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(1) [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.514.974 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.096 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 0-4 and group name is 2-16453000547691086251 [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.147 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-16453000547691086251 [const vector]{0, 4}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.222 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.019 msec. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.286 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-16453000547691086251 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.322 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.055 msec. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.357 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.216 msec. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.376 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-16453000547691086251 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.440 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 4-6 and group name is 2-5435772415009061329 [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.469 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5435772415009061329 [const vector]{4, 6}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.507 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.003 msec. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.534 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-5435772415009061329 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.558 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.029 msec. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.581 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.11 msec. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.600 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-5435772415009061329 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.649 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 4-5 and group name is 2-6541264347459079684 [WARNING] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.677 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-6541264347459079684 [const vector]{4, 5}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.705 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.003 msec. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.732 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-6541264347459079684 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.755 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.028 msec. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.776 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.099 msec. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.803 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-6541264347459079684 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.515.788 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.renormalize.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.515.871 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.add_forward_monad_depend [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.515.873 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.515.916 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.515.938 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.515.963 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_grad.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.515.981 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.063 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_eliminator.unchanged [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.066 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op0, op=StridedSlice [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.085 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cse [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.256 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.280 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cse.unchanged [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.293 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op0, op=StridedSlice [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.309 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_3 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.331 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_3 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.352 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.376 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.408 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op1, op=StridedSlice [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.422 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.446 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.498 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.492 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.516 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.519 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.540 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(2) [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.568 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.584 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.613 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.720 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.742 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.795 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.792 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.813 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.888 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.905 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.516.908 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op1, op=StridedSlice [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.957 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.516.975 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.028 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.020 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.054 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_3.unchanged [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.057 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op2, op=StridedSlice [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.083 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.expand_dump_flag [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.108 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.expand_dump_flag.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.128 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.switch_simplify [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.137 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.146 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_switch_simplify [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.168 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op3, op=StridedSlice [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.211 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.switch_simplify.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.232 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.248 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_loop_unroll [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.236 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.276 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.309 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.loop_unroll.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.331 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.517.348 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_1 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.372 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_6{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 1} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.428 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_8{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_6, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.482 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_9{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_10, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.519 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_11{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 0} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.566 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_12{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_11, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.614 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_13{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_14, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.687 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/Mul-op0->Default/_VirtualOutput-op0(1) [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.764 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.517.947 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 7, operator_vector: AllGather, AllGather, Split, Concat, AllGather, Split, Concat [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.518.093 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=AllGather [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.636 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_1.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.708 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.732 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_recompute_prepare [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.803 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.recompute_prepare.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.826 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_depend_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.886 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.927 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_assign_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.973 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.518.994 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_loads_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.039 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.059 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parameter_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.086 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parameter_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.105 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.123 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.141 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.202 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.219 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.274 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.291 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.345 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.361 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.412 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.428 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.476 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.493 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.519.507 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_7692796245619514736AllGather_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.561 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.583 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.519.602 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op0, op=Split [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.634 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.652 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.715 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.734 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.519.729 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_12015561575443432111Split_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.791 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.810 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.519.807 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op0, op=Concat [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.884 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.519.885 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8051664706019937323Concat_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.903 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.519.919 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op0, op=AllGather [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.957 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.519.976 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.048 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.068 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.120 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.137 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.190 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.207 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.260 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.278 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.331 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.349 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.402 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.434 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_2.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.468 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.accelerated_algorithm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.501 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_accelerated_algorithm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.583 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.607 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.626 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.770 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.790 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.813 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_shard_fg_expand [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.858 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.879 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard_inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.897 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_shard_inline [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.520.846 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_5140002550487651858AllGather_ success [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.520.935 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op1, op=Split [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.967 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard_inline.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.520.991 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_parallel [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.521.014 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8691182465882856301Split_ success [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.068 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 55 us [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.092 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_parallel.changed [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.521.103 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op1, op=Concat [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.118 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.521.152 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_6614310911506831424Concat_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.160 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.184 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.521.183 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op1, op=AllGather [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.206 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.227 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_comm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.282 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_comm.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.309 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_fusion [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.359 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_fusion.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.381 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.matmul_add_comm_reduction [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.440 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.480 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.501 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.519 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_shard_identity [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.538 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_shard_identity [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.609 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.688 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_dataset [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.765 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_dataset [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.890 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_dataset.unchanged [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.521.907 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_1898494724763908338AllGather_ success [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.915 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.get_grad_eliminate_ [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.521.972 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.521.990 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_get_grad_eliminate_ [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.042 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_15{[0]: ValueNode PrimFunc_Mul, [1]: CNode_13, [2]: CNode_9} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.080 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_16{[0]: ValueNode AllGather, [1]: CNode_15} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.113 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_17{[0]: ValueNode AllGather, [1]: CNode_18} [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.116 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.140 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_output [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.156 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_output [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.217 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_output.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.235 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_forward [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.285 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_forward.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.306 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_recompute_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.330 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.347 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.365 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.382 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.473 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.498 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.531 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.before_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.551 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_before_grad [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.546 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:918] FindCommonMirrorGroup] The common mirror group is:[const vector]{} [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.633 [mindspore/ccsrc/frontend/parallel/parallel_postprocessor.cc:352] HandleGlobalNormScale] Start to process the global norm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.642 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.before_grad.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.664 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.712 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation.unchanged [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.711 [mindspore/ccsrc/frontend/parallel/step_parallel.cc:171] StepParallel] Now leaving step parallel, used time: 10411 us [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.734 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel_renormalize [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.757 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel_renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.752 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.777 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.update_top_fg [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.799 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.update_top_fg.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.793 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.818 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cast_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.836 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_cast_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.869 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.896 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_comm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.903 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cast_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.926 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_fg_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.955 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_comm.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.522.978 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_fg_expand.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.522.978 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_fusion [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.002 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation_after_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.029 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_fusion.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.051 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.matmul_add_comm_reduction [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.059 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.082 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp_send_recv_attached [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.105 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.109 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.124 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.receive_attached [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.131 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.147 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.receive_attached.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.154 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.165 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.after_resolve [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.175 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_shard_identity [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.183 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_after_resolve [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.198 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_shard_identity [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.after_resolve.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.287 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.295 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_after_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.313 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_after_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.314 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_dataset [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.334 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_dataset [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.406 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_after_grad.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.430 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.special_op_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.447 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.466 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_dataset.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.496 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.get_grad_eliminate_ [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.509 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.special_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.518 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_get_grad_eliminate_ [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.530 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.renormalize [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.549 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.renormalize.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.567 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.add_forward_monad_depend [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.588 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.593 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.616 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.611 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_output [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.638 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.651 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_output [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.658 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.737 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.747 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_output.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.760 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.775 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_forward [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.830 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_forward.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.854 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_recompute_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.883 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.903 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.923 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.523.942 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.956 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cse.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.523.987 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_3 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.007 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_3 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.025 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.036 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.063 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.085 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.086 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.before_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.107 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.105 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_before_grad [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.161 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.179 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.204 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.before_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.227 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.240 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.260 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.278 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.299 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel_renormalize [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.310 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.328 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.379 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.395 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.445 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.462 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.512 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.526 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] ANALYZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.524.529 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.576 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.598 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_3.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.626 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_a end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.727 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute_after_opt_a start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.795 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute_after_opt_a end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.821 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_cell_reuse_recomputed_activation start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.839 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_cell_reuse_recomputed_activation end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.524.858 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_after_opt_a start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.277 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_after_opt_a end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.322 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass convert_after_rewriter start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.375 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass convert_after_rewriter end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.400 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass order_py_execute_after_rewriter start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.453 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass order_py_execute_after_rewriter end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.477 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_b start ... [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.504 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.524 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_b_r1_b_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.542 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: zero_like_fill_zero [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.525.508 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.525.567 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.605 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: zero_like_fill_zero, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.622 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: list_to_tuple_eliminator_ [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.673 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: list_to_tuple_eliminator_, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.689 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_to_list_eliminator_ [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.742 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_to_list_eliminator_, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.757 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.831 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.848 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_const_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.910 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_const_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.927 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.978 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.525.993 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_set_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.054 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.069 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_depend_reorder [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.526.082 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.526.124 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.131 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_depend_reorder, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.149 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_convert_item_index_to_positive [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.217 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_convert_item_index_to_positive, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.247 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: make_slice_get_slice_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.299 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: make_slice_get_slice_eliminator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.315 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.375 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.390 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reset_defer_inline [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.526.397 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.440 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reset_defer_inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.457 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.526.433 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.509 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.525 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.526.546 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.576 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.594 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_pure_node_eliminater [INFO] ANALYZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.526.633 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 1___main___Net_construct_5 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xd1d49c0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xd1d49c0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.643 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_pure_node_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.662 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: load_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.709 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: load_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.724 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: stopgrad_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.773 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: stopgrad_eliminater, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.787 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: special_op_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.839 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: special_op_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.854 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.905 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.921 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_add_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.980 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_add_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.526.997 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_set_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.047 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_set_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.062 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_depend_swap [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.111 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_depend_swap, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.125 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_add_const_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.175 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_add_const_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.189 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: value_based_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.241 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: value_based_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.256 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: parallel_virtual_node [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.304 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: parallel_virtual_node, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.318 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: const_output_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.367 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: const_output_eliminate, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.390 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_1.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.415 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.435 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_b_r1_b_2 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.500 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_2.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.520 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.571 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.591 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.633 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.652 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.527.658 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel_renormalize.changed [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.705 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.728 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.renormalize [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.747 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.527.739 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.update_top_fg [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.766 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.527.771 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.update_top_fg.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.527.796 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cast_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.527.819 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_cast_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.527.909 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cast_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.527.933 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_fg_expand [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.932 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.cse.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.964 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_b end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.527.984 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_fg_expand.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.527.990 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass optimize_parallel_all_gather_comm start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.005 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation_after_expand [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.528.048 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass optimize_parallel_all_gather_comm end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.528.073 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_param_gather start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.072 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation_after_expand.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.528.091 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_param_gather end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.097 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp_send_recv_attached [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.528.110 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cconv start ... [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.152 [mindspore/ccsrc/frontend/parallel/pass/flash_sp.cc:2977] FlashSPSendRecvNodeAttach] No RA/FlashSP Send/Recv grad is found to be attached. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.177 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp_send_recv_attached.changed [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.528.178 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cconv end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.528.203 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass loop_unroll start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.199 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.receive_attached [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.242 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.receive_attached.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.266 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.after_resolve [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.286 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_after_resolve [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.371 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.after_resolve.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.397 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_after_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.437 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_after_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.537 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_after_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.562 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.584 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.722 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.special_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.751 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.775 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.796 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.add_forward_monad_depend [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.829 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.853 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.877 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.898 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.528.990 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.020 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cse [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.030 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start loop_unroll_optimizer.r1.loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.079 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, loop_unroll_optimizer_r1_loop_unroll [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.161 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End loop_unroll_optimizer.r1.loop_unroll.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.192 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass loop_unroll end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.217 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_after_cconv start ... [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.238 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.c_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.254 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_after_cconv_r1_c_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.245 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cse.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.289 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_3 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.312 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_3 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.333 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.401 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.423 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.481 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.c_1.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.481 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.501 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.503 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.parameter_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.527 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.545 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.559 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.596 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.607 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.629 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.653 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.671 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.674 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.693 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.732 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.734 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.750 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.757 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.817 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.838 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.898 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.913 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.cse.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.921 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.939 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.renormalize [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.960 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.renormalize.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.529.980 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_after_cconv end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.529.980 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.001 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_dup_value start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.012 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_3.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.047 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.expand_dump_flag [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.074 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.095 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.115 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.188 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.213 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.232 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.300 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.loop_unroll.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.315 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_dup_value end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.326 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_1 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.349 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass tuple_transform start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.530.350 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.374 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.d_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.393 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_trans_graph_r1_d_1 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.695 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.d_1.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.721 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.renormalize [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.740 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.renormalize.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.759 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass tuple_transform end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.779 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass partial_unused_args_eliminate start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.799 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass partial_unused_args_eliminate end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.820 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_cache_embedding start ... [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.530.763 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group hccl_world_group [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.530.837 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 500.987 msec. [WARNING] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.530.858 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.868 [mindspore/ccsrc/frontend/parallel/cache_embedding/cache_embedding.cc:706] AddCacheEmbedding] Parameters are all not cache enable. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.530.885 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.901 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_cache_embedding end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.530.923 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_recomputation start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.134 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_recomputation end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.162 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cse_after_recomputation start ... [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.184 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_recompute.r1.cse [WARNING] DEVICE(187764,fffe057fa0f0,python):2025-02-07-15:58:04.531.260 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.286 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_recompute.r1.cse.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.316 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cse_after_recomputation end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.339 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass environ_conv start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.406 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass environ_conv end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.432 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass swap_dp_allreduce_reducescatter start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.473 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass swap_dp_allreduce_reducescatter end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.493 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass bias_add_comm_swap start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.512 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass bias_add_comm_swap end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.530 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_micro_interleaved_index start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.546 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_micro_interleaved_index end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.564 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_fine_grained_interleaved_index start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.583 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_fine_grained_interleaved_index end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.601 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass merge_cast_opt start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.615 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass merge_cast_opt end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.633 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_recompute_activation start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.676 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_recompute_activation end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.697 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass micro_interleaved_order_control start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.713 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass micro_interleaved_order_control end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.731 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass assign_add_opt start ... [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.838 [mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.cc:466] AssignAddOpt] Merge multi matmul assign add begin and concat eliminate enable flag is:0 [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.881 [mindspore/ccsrc/frontend/parallel/pass/pass_utils.cc:122] ExtractBackwardMatMul] backward_matmul_dx_dw_map size:0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.531.834 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_1.changed [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.927 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass assign_add_opt end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.531.933 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.recompute_prepare [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.952 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass ForceFp32Comm start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.531.963 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_recompute_prepare [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.970 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass ForceFp32Comm end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.531.990 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_cast_before_assign_add start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.028 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_cast_before_assign_add end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.040 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.recompute_prepare.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.048 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass full_micro_interleaved_order_control start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.068 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass full_micro_interleaved_order_control end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.066 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_depend_eliminate [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.087 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass reorder_send_recv_between_fp_bp start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.106 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass reorder_send_recv_between_fp_bp end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.125 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass comm_op_add_attrs start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.131 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.158 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_assign_eliminate [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.200 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass comm_op_add_attrs end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.207 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_assign_eliminate.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.224 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_comm_op_reuse_tag start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.232 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.284 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_loads_eliminate.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.304 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_comm_op_reuse_tag end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.307 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parameter_eliminate [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.330 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_split_concat_branches start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parameter_eliminate.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.348 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_split_concat_branches end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.360 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_2 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.367 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_parallel_branches start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.384 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_parallel_branches end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.382 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_2 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.403 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_in_pipeline start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.407 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.460 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_in_pipeline end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.473 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.482 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_grad_in_pipeline start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.494 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.503 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_grad_in_pipeline end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.532 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass control_data_broadcast_order start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.549 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass control_data_broadcast_order end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.554 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.567 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass grouped_pairwise_exchange_alltoall start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.578 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.596 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass grouped_pairwise_exchange_alltoall end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.617 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass offloading_packed_experts start ... [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.690 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:746] SetOffloadingPackedExpert] pass if (parallel::g_device_manager == nullptr) [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.717 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:707] CheckUserSettings] To activate the pass, set_auto_parallel_context 'enable_alltoall' should be true [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.731 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:751] SetOffloadingPackedExpert] CheckUserSettings_not_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.718 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.746 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass offloading_packed_experts end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.747 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.767 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_and_grad_model_parallel start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.784 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_and_grad_model_parallel end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.803 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_matmul_and_grad_allreduce start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.805 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.817 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_matmul_and_grad_allreduce end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.826 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.836 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_allgather_and_fa_grad start ... [WARNING] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.854 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.869 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_allgather_and_fa_grad end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.887 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_ring_attention start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.881 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.532.924 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.943 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_ring_attention end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.532.966 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_flash_sp start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.010 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_flash_sp end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.003 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.025 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.033 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass begin_end_overlap_inline start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.050 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass begin_end_overlap_inline end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.081 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_matmul_comm_elemetwise start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.087 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.098 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_matmul_comm_elemetwise end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.111 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.119 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_layernorm_comm start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.136 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_layernorm_comm end. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.153 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass handle_group_info start ... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.173 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass handle_group_info end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.169 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.192 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass symbol_engine_optimizer start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.193 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.216 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.build [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.256 [mindspore/ccsrc/frontend/optimizer/irpass/symbol_engine_optimizer.cc:39] operator()] There is no dynamic shape node, the SymbolEngineBuilder is disabled. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.251 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.272 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.277 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.build.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.297 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_shapecalc [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.316 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_shapecalc [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.346 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.368 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.379 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_shapecalc.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.400 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_not_effective [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.416 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_not_effective [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.427 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.449 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.490 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_not_effective.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.512 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.opt_reshape [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.527 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_opt_reshape [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.524 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.548 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.581 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.opt_reshape.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.601 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.fold_const_symbol [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.605 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.618 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_fold_const_symbol [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.629 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.688 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.689 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.fold_const_symbol.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.713 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.721 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.renormalize [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.741 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.renormalize.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.761 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass symbol_engine_optimizer end. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.770 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.785 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end optimize action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.804 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.792 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.835 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_parallel_scheduler action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.856 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_parallel_scheduler action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.872 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.871 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.893 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad_reorder action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.895 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.953 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.533.983 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad_reorder action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.533.987 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_2.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.005 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.020 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.accelerated_algorithm [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.032 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start get_jit_bprop_graph action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.050 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end get_jit_bprop_graph action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.046 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_accelerated_algorithm [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.067 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.088 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.103 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.117 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.138 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start eliminate_special_op_node action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.132 [mindspore/ccsrc/frontend/optimizer/opt.cc:232] ApplyIRToSubstitutions] There may be a problem. Substitution: opt_a.r2.accelerated_algorithm.less_batch_normalization [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.205 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.accelerated_algorithm.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.234 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.258 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.279 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.300 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.323 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_shard_fg_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.363 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.384 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.405 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_shard_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.473 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard_inline.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.496 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_parallel [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.568 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 47 us [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.597 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_parallel.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.620 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.662 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.687 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.780 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.812 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_comm [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.814 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.ad_related_special_op_eliminate [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.865 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_ad_related_special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.872 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_comm.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.897 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_fusion [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.935 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.ad_related_special_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.949 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_fusion.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.965 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.mutable_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.534.974 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.matmul_add_comm_reduction [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.534.995 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_mutable_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.028 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.048 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.mutable_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.053 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.070 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.convert_tensor_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.077 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.086 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_convert_tensor_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.098 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_shard_identity [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.121 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_shard_identity [INFO] OPTIMIZER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.133 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.convert_tensor_op_eliminate.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.161 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end eliminate_special_op_node action. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.179 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.197 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_shard_identity.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.209 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start distribtued_split action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.226 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_dataset [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.249 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_dataset [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.258 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:378] GenerateStrategy] Current parallel mode is semi_auto_parallel [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.275 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:390] GenerateStrategy] Generated distributed strategy is 1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.315 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_dataset.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.341 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.get_grad_eliminate_ [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.361 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_get_grad_eliminate_ [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.424 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.get_grad_eliminate_.unchanged [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.434 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:1277] Run] All nodes are on this process so there's no need to build and split distributed graph. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.450 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_output [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.460 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end distribtued_split action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.473 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_output [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.477 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.506 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start validate action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.535 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_output.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.558 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_forward [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.611 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_forward.unchanged [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.641 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end validate action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.637 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_recompute_pass [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.662 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.688 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.710 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.730 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.752 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] PROFILER(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.770 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc:49] IsProfilingParallelStrategyEnabled] Profiling parallel strategy is disabled. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.848 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.535.857 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start task_emit action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.875 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.901 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.before_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.535.922 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_before_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.012 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.before_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.038 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.536.086 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1679] SetRunMode] Run graph mode with kernel by kernel by configuration. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.091 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.115 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel_renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.138 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel_renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.158 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.update_top_fg [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.181 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.update_top_fg.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.201 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cast_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.221 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_cast_eliminate [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:04.536.273 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1057] CompileGraphs] Status record: start compile function graph: 4_3_1___main___Net_construct_20 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.291 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cast_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.317 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_fg_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.370 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_fg_expand.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.394 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation_after_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.458 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.499 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp_send_recv_attached [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.526 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.547 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.receive_attached [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.572 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.receive_attached.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.593 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.after_resolve [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:04.536.592 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.615 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_after_resolve [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.800 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.after_resolve.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.831 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_after_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.853 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_after_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.946 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_after_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.974 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.536.996 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.537.063 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.special_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.537.090 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.renormalize [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:04.537.196 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: 4_3_1___main___Net_construct_20 [INFO] ANALYZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.537.430 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:04.537.458 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-5488101015797526856, the max communication size is 1 MB. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:04.537.488 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-5488101015797526856, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.537.515 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-5488101015797526856. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.538.063 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.538.122 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.538.507 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.538.547 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.538.798 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:04.538.833 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.538.930 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.539.018 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 3_1___main___Net_construct_19 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xd1d49c0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xd1d49c0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.078 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.renormalize.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.162 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.add_forward_monad_depend [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.206 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.229 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.276 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.360 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.382 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.581 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cse.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.613 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_3 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.687 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_3 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.724 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.799 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.820 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.874 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.895 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.949 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.540.969 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.022 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.041 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.096 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.115 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.193 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.212 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.267 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.288 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.345 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.373 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_3.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.408 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.expand_dump_flag [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.435 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.expand_dump_flag.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.458 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.480 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.549 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.switch_simplify.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.576 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.598 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.662 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.loop_unroll.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.687 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.541.706 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.542.992 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_1.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.058 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.recompute_prepare [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.085 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_recompute_prepare [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.160 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.recompute_prepare.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.185 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_depend_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.253 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.299 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_assign_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.351 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.376 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_loads_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.425 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.449 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parameter_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.483 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parameter_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.506 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.527 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.548 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.614 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.636 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.695 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.717 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.773 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.795 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.852 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.873 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.929 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.543.949 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.024 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.048 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.106 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.126 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.199 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.222 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.279 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.301 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.375 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.396 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.454 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.476 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.550 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.573 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.664 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.694 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.753 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.773 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.829 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.850 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.906 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.927 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.544.984 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.024 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_2.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.060 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.accelerated_algorithm [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.103 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_accelerated_algorithm [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:04.545.073 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:739] Initialize] The actor thread number: 5, the kernel thread number: 25 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.192 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.225 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.245 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.266 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.288 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.312 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_shard_fg_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.361 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.385 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard_inline [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:58:04.545.384 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.409 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_shard_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.477 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard_inline.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.506 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_parallel [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.593 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 61 us [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.622 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_parallel.changed [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.650 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.692 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.716 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.742 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.766 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_comm [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.826 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_comm.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.854 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_fusion [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.906 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_fusion.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.932 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.matmul_add_comm_reduction [INFO] SYMBOLIC_SHAPE(187753,ffff8292dc10,python):2025-02-07-15:58:04.545.927 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187753,ffff8292dc10,python):2025-02-07-15:58:04.545.960 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187753,ffff8292dc10,python):2025-02-07-15:58:04.545.988 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187753,ffff8292dc10,python):2025-02-07-15:58:04.546.005 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187753,ffff8292dc10,python):2025-02-07-15:58:04.546.021 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.545.997 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.matmul_add_comm_reduction.unchanged [INFO] SYMBOLIC_SHAPE(187753,ffff8292dc10,python):2025-02-07-15:58:04.546.037 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.043 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.069 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_slice_to_reducescatter.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.546.077 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1682] Run] Pipeline run [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.092 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_shard_identity [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.546.119 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start bootstrap action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.118 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_shard_identity [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.192 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.219 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_dataset [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.242 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_dataset [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.309 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_dataset.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.get_grad_eliminate_ [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.360 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_get_grad_eliminate_ [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.426 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.452 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_output [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.471 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_output [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.537 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_output.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.565 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_forward [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.623 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_forward.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.650 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_recompute_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.682 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.708 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.730 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.752 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.853 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.884 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.926 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.before_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.546.950 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_before_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.044 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.before_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.071 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.126 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.151 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel_renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.174 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel_renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.197 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.update_top_fg [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.221 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.update_top_fg.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.243 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cast_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.263 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_cast_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.331 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cast_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.358 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_fg_expand [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.416 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_fg_expand.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.547.422 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end bootstrap action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.442 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation_after_expand [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.547.461 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 9 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.508 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation_after_expand.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.547.521 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start type_inference action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.534 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp_send_recv_attached [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.563 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.586 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.receive_attached [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.611 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.receive_attached.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.634 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.after_resolve [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.655 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_after_resolve [INFO] ANALYZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.547.726 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.735 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.after_resolve.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.790 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_after_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.811 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_after_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.907 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_after_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.934 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.547.952 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.018 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.special_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.045 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.067 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.090 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.add_forward_monad_depend [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.121 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.143 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_grad [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.174 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_grad.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.195 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.279 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.308 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.522 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cse.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.558 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_3 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.583 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_3 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.607 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.699 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.730 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.790 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.810 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.890 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.911 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.967 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.548.990 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.047 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.068 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.126 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.148 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.204 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.225 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.280 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.314 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_3.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.351 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_a end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.386 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute_after_opt_a start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.469 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute_after_opt_a end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.506 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_cell_reuse_recomputed_activation start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.531 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_cell_reuse_recomputed_activation end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.556 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_after_opt_a start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.911 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_after_opt_a end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.549.972 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass convert_after_rewriter start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.031 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass convert_after_rewriter end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.060 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass order_py_execute_after_rewriter start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.124 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass order_py_execute_after_rewriter end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.151 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_b start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.187 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.212 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_b_r1_b_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.231 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: zero_like_fill_zero [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.298 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: zero_like_fill_zero, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.320 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: list_to_tuple_eliminator_ [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.378 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: list_to_tuple_eliminator_, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.399 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_to_list_eliminator_ [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.455 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_to_list_eliminator_, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.477 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.558 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.583 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_const_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.654 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_const_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.676 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.731 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.755 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_set_item_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.826 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.852 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_depend_reorder [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.922 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_depend_reorder, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.550.946 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_convert_item_index_to_positive [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.020 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_convert_item_index_to_positive, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.060 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: make_slice_get_slice_eliminator [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.119 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: make_slice_get_slice_eliminator, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.141 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.211 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.234 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reset_defer_inline [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.291 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reset_defer_inline, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.314 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:04.551.267 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:739] Initialize] The actor thread number: 5, the kernel thread number: 25 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.370 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.392 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.448 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.473 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_pure_node_eliminater [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.528 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_pure_node_eliminater, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.549 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: load_eliminater [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.606 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: load_eliminater, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.627 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: stopgrad_eliminater [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:58:04.551.610 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.684 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: stopgrad_eliminater, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.706 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.761 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: special_op_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.783 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.838 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.861 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_add_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.934 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_add_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.551.958 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_set_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.015 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_set_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.037 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_depend_swap [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.093 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_depend_swap, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.113 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_add_const_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.169 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_add_const_eliminate, change: 0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.192 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: value_based_eliminate [INFO] SYMBOLIC_SHAPE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.552.204 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.552.242 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.249 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: value_based_eliminate, change: 0 [INFO] SYMBOLIC_SHAPE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.552.261 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[0].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.271 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: parallel_virtual_node [INFO] SYMBOLIC_SHAPE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.552.282 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[0]: max=-1, min=1, divisor=1, remainder=0 [INFO] SYMBOLIC_SHAPE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.552.324 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[1]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.328 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: parallel_virtual_node, change: 0 [INFO] SYMBOLIC_SHAPE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.552.343 [mindspore/core/symbolic_shape/utils.cc:325] BuildSymbolicShapeBySymbolInfo] SymbolInfo for input[1].shape[2]: max=-1, min=1, divisor=1, remainder=0 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.349 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: const_output_eliminate [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.552.384 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1682] Run] Pipeline run [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.409 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: const_output_eliminate, change: 0 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.552.433 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start bootstrap action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.443 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_1.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.481 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.508 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_b_r1_b_2 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.582 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_2.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.608 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.688 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.722 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.776 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.803 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.869 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.895 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.919 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.renormalize.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.552.939 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.164 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.cse.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.211 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_b end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.246 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass optimize_parallel_all_gather_comm start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.310 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass optimize_parallel_all_gather_comm end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.341 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_param_gather start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.364 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_param_gather end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.389 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cconv start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.470 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cconv end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.553.506 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass loop_unroll start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.553.780 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end bootstrap action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.553.826 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 9 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.553.894 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start type_inference action. [INFO] ANALYZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.554.136 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.377 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start loop_unroll_optimizer.r1.loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.447 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, loop_unroll_optimizer_r1_loop_unroll [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.552 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End loop_unroll_optimizer.r1.loop_unroll.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.595 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass loop_unroll end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.631 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_after_cconv start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.659 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.c_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.681 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_after_cconv_r1_c_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.922 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.c_1.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.955 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.parameter_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.554.987 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.009 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.100 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.126 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.173 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.197 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.243 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.266 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.463 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.cse.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.497 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.521 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.renormalize.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.547 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_after_cconv end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.575 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_dup_value start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.922 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_dup_value end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.555.973 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass tuple_transform start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.003 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.d_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.027 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_trans_graph_r1_d_1 [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.367 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.d_1.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.407 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.432 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.renormalize.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.458 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass tuple_transform end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.484 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass partial_unused_args_eliminate start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.508 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass partial_unused_args_eliminate end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.533 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_cache_embedding start ... [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.594 [mindspore/ccsrc/frontend/parallel/cache_embedding/cache_embedding.cc:706] AddCacheEmbedding] Parameters are all not cache enable. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.658 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_cache_embedding end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.695 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_recomputation start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.952 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_recomputation end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.556.996 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cse_after_recomputation start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.024 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_recompute.r1.cse [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.167 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_recompute.r1.cse.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.206 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cse_after_recomputation end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.232 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass environ_conv start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.312 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass environ_conv end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.343 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass swap_dp_allreduce_reducescatter start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.394 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass swap_dp_allreduce_reducescatter end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.423 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass bias_add_comm_swap start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.447 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass bias_add_comm_swap end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.473 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_micro_interleaved_index start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.495 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_micro_interleaved_index end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.520 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_fine_grained_interleaved_index start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.542 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_fine_grained_interleaved_index end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.567 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass merge_cast_opt start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.586 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass merge_cast_opt end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.607 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_recompute_activation start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.659 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_recompute_activation end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.685 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass micro_interleaved_order_control start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.706 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass micro_interleaved_order_control end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.727 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass assign_add_opt start ... [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.849 [mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.cc:466] AssignAddOpt] Merge multi matmul assign add begin and concat eliminate enable flag is:0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.906 [mindspore/ccsrc/frontend/parallel/pass/pass_utils.cc:122] ExtractBackwardMatMul] backward_matmul_dx_dw_map size:0 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.958 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass assign_add_opt end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.557.992 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass ForceFp32Comm start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.012 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass ForceFp32Comm end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.035 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_cast_before_assign_add start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.078 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_cast_before_assign_add end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.105 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass full_micro_interleaved_order_control start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.126 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass full_micro_interleaved_order_control end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.153 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass reorder_send_recv_between_fp_bp start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.174 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass reorder_send_recv_between_fp_bp end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.198 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass comm_op_add_attrs start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.291 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass comm_op_add_attrs end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.324 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_comm_op_reuse_tag start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.413 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_comm_op_reuse_tag end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.448 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_split_concat_branches start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.470 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_split_concat_branches end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.495 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_parallel_branches start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.515 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_parallel_branches end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.538 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_in_pipeline start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.573 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_in_pipeline end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.598 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_grad_in_pipeline start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.622 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_grad_in_pipeline end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.667 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass control_data_broadcast_order start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.688 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass control_data_broadcast_order end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.711 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass grouped_pairwise_exchange_alltoall start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.745 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass grouped_pairwise_exchange_alltoall end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.773 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass offloading_packed_experts start ... [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.794 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:746] SetOffloadingPackedExpert] pass if (parallel::g_device_manager == nullptr) [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.814 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:707] CheckUserSettings] To activate the pass, set_auto_parallel_context 'enable_alltoall' should be true [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.832 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:751] SetOffloadingPackedExpert] CheckUserSettings_not_pass [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.851 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass offloading_packed_experts end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.875 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_and_grad_model_parallel start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.898 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_and_grad_model_parallel end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.923 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_matmul_and_grad_allreduce start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.942 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_matmul_and_grad_allreduce end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.966 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_allgather_and_fa_grad start ... [WARNING] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.558.986 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.006 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_allgather_and_fa_grad end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.027 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_ring_attention start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.092 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_ring_attention end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.123 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_flash_sp start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.171 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_flash_sp end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.198 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass begin_end_overlap_inline start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.217 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass begin_end_overlap_inline end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.255 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_matmul_comm_elemetwise start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.279 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_matmul_comm_elemetwise end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.306 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_layernorm_comm start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.328 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_layernorm_comm end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.351 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass handle_group_info start ... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.375 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass handle_group_info end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.400 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass symbol_engine_optimizer start ... [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.435 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.build [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.489 [mindspore/ccsrc/frontend/optimizer/irpass/symbol_engine_optimizer.cc:39] operator()] There is no dynamic shape node, the SymbolEngineBuilder is disabled. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.518 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.build.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.544 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_shapecalc [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.568 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_shapecalc [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.643 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_shapecalc.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.671 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_not_effective [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.692 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_not_effective [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.782 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_not_effective.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.808 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.opt_reshape [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.831 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_opt_reshape [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.890 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.opt_reshape.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.913 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.fold_const_symbol [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.559.932 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_fold_const_symbol [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.012 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.fold_const_symbol.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.056 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.renormalize [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.082 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.renormalize.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.108 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass symbol_engine_optimizer end. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.137 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end optimize action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.157 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.197 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_parallel_scheduler action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.222 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_parallel_scheduler action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.241 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.268 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad_reorder action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.384 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad_reorder action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.413 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.446 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start get_jit_bprop_graph action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.465 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end get_jit_bprop_graph action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.484 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.510 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.529 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.547 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.560.574 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start eliminate_special_op_node action. [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.505 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.ad_related_special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.570 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_ad_related_special_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.653 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.ad_related_special_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.684 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.mutable_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.722 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_mutable_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.779 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.mutable_op_eliminate.unchanged [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.805 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.convert_tensor_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.825 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_convert_tensor_op_eliminate [INFO] OPTIMIZER(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.877 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.convert_tensor_op_eliminate.unchanged [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.912 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end eliminate_special_op_node action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.933 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.561.974 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start distribtued_split action. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.014 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:378] GenerateStrategy] Current parallel mode is semi_auto_parallel [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.034 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:390] GenerateStrategy] Generated distributed strategy is 1 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.182 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:1277] Run] All nodes are on this process so there's no need to build and split distributed graph. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.216 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end distribtued_split action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.236 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.270 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start validate action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.436 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end validate action. [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:58:04.561.985 [mindspore/_extends/parse/namespace.py:132] 'Net' object has no attribute or method: '__is_tensors_queue__', so will return None. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.463 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PROFILER(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.528 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc:49] IsProfilingParallelStrategyEnabled] Profiling parallel strategy is disabled. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.628 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start task_emit action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:04.562.875 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1679] SetRunMode] Run graph mode with kernel by kernel by configuration. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:04.563.095 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1057] CompileGraphs] Status record: start compile function graph: 4_3_1___main___Net_construct_20 [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:04.563.397 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.563.778 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.563.898 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: __main___Net_construct_2 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xb62aed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xb62aed0, value: ValueAny), Parent: } [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:04.563.928 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: 4_3_1___main___Net_construct_20 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:04.564.200 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-16453000547691086251, the max communication size is 1 MB. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:04.564.233 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-16453000547691086251, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:04.564.261 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-16453000547691086251. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.390 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end type_inference action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.427 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.474 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.790 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.815 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.848 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start graph_reusing action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.877 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end graph_reusing action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.893 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.917 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start inline action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.564.979 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.565.070 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.565.096 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass a1a2 start ... [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:58:04.568.950 [mindspore/_extends/parse/namespace.py:132] 'Net' object has no attribute or method: '__is_tensors_queue__', so will return None. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.570.871 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.571.019 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: __main___Net_construct_2 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x227c3ed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x227c3ed0, value: ValueAny), Parent: } [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.571.579 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end type_inference action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.571.623 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.571.682 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad action. [INFO] PARSER(187753,ffff8292dc10,python):2025-02-07-15:58:04.571.648 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {cast_ : Prim[Cast]} [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.571.934 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.571.962 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PARSER(187753,ffff8292dc10,python):2025-02-07-15:58:04.571.986 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:151] WriteVariable] fill_3 update var `value` with node @fill_3:value{[0]: CNode_4, [1]: param_value, [2]: param_type} [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.572.001 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start graph_reusing action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.572.025 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end graph_reusing action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.572.045 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 2, total nodes: 12 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.572.104 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start inline action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.572.182 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.572.293 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PARSER(187753,ffff8292dc10,python):2025-02-07-15:58:04.572.315 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {fillv2_ : Prim[FillV2]} [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.572.331 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass a1a2 start ... [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.694 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.expand_dump_flag [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.763 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.823 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.847 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.904 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.926 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.942 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.975 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.575.993 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.007 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.338 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_1.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.370 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.recompute_prepare [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.386 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_recompute_prepare [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.413 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.454 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.485 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.503 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.524 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.541 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.560 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.579 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parameter_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.604 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.620 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.694 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r1_a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.736 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.759 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.777 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.796 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.810 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.828 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.841 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.859 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.872 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.890 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.903 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.921 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.934 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.961 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.975 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.576.993 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.006 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.024 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.038 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.065 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.080 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.097 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.111 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.128 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.141 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.159 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.172 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.189 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.203 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.220 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.233 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.251 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.264 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.283 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.307 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_2.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.326 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parallel_inline_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.340 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_parallel_inline_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.363 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parallel_inline_pass.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.385 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.expand_dump_flag [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.403 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.419 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.434 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.455 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.472 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.485 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.507 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.523 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.538 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.675 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_1.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.694 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.recompute_prepare [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.710 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_recompute_prepare [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.731 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.747 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.770 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.786 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.805 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.824 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.843 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.867 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parameter_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.886 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.902 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.916 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r2_a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.930 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.950 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.964 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.981 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.577.994 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.012 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.025 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.043 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.056 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.073 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.087 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.105 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.118 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.135 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.148 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.166 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.179 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.197 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.216 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.241 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.254 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.272 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.285 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.303 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.318 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.336 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.349 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.366 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.379 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.396 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.409 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.427 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.440 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.457 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.473 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_2.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.492 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parallel_inline_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.506 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_parallel_inline_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.528 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parallel_inline_pass.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.549 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass a1a2 end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.572 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end inline action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.597 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.627 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol action. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.652 [mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.cc:223] ForwardHasDynamicShape] Can not find the forward graph, so find the ops in root graph [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.720 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.737 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.762 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pre_auto_parallel action. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.812 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 30 us [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.829 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pre_auto_parallel action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.843 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.578.865 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start insert-virtual-dataset action. [INFO] PARSER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.579.528 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {cast_ : Prim[Cast]} [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.579.858 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end insert-virtual-dataset action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.579.905 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PARSER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.579.890 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:151] WriteVariable] fill_3 update var `value` with node @fill_3:value{[0]: CNode_4, [1]: param_value, [2]: param_type} [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.579.945 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol-second action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.579.966 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol-second action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.579.980 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.002 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start dataset_repeat_opt action. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.100 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.129 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2248] GetCommInfo] Get global rank from communication model, the global rank is 1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.192 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 1, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.212 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARSER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.580.211 [mindspore/ccsrc/pipeline/jit/ps/parse/function_block.cc:416] HandleNamespaceSymbol] [fill_3] Added global python symbol: {fillv2_ : Prim[FillV2]} [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.226 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.256 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 1, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [WARNING] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.283 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:305] BroadcastDataset] For now on, only dataset sink mode support dataset reader optimizer. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.303 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end dataset_repeat_opt action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.318 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.344 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_split action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.370 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:239] PipelineSplit] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.387 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:256] PipelineSplit] The parameter 'stage_num' is: 1. No need Pipeline split. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.427 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_split action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.443 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.466 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start optimize action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.513 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.548 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.570 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.624 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.750 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_a start ... [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.777 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.expand_dump_flag [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.798 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.817 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.833 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.867 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.885 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.903 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.941 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.loop_unroll.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.959 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.580.974 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.285 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_1.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.308 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.recompute_prepare [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.324 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_recompute_prepare [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.350 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.368 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.395 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.417 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.439 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.459 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.481 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.498 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parameter_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.518 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.534 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.549 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.564 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.587 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.601 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.622 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.635 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.655 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.679 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.700 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.714 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.734 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.748 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.777 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.792 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.812 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.826 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.847 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.860 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.881 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.894 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.921 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.938 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.958 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.581.972 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.001 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.016 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.037 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.050 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.070 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.088 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.108 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.122 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.142 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.156 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.176 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.193 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_2.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.212 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.accelerated_algorithm [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.227 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_accelerated_algorithm [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.272 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.288 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.304 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.319 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.335 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_shard_fg_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.356 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.374 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard_inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.388 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_shard_inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.414 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard_inline.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.430 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_parallel [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.461 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 15 us [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.482 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_parallel.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.510 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.541 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.573 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 1, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.588 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.601 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.615 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 1, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.729 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3587] MarkForwardCNode] Can not find the forward graph, so mark the ops in root graph [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.812 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.831 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3182] IsInsertVirtualOutput] The current stage is: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.582.877 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=_VirtualOutput [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.583.745 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualDatasetInfo0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.583.792 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.583.879 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1), (1, 1, 1)) [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.583.901 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.024 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualDatasetInfo00: The loss divisor is 1, no need to create virtual div op. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.034 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.expand_dump_flag [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.144 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.expand_dump_flag.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.188 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator MulInfo1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.209 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.224 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.switch_simplify [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.249 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((2, 2, 2), (2, 2, 2)) [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.255 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_switch_simplify [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.267 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.326 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.switch_simplify.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.326 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.356 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.353 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.loop_unroll [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.378 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.375 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_loop_unroll [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.394 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.409 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:503] InferMirrorOps] MulInfo11: No need to insert mirror ops [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.412 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.loop_unroll.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.431 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2631] InferAsLossDivisor] MulInfo11: the dev matrix shape is [2, 2, 2], the output tensor map is [2, 1, 0], loss divisor is 1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.435 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.445 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] MulInfo11: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.459 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1334] Init] MulInfo11 : Init success. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.455 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_a_1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.537 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualOutputInfo2 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.558 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.590 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1)) [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.607 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.652 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualOutputInfo22: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.800 [mindspore/ccsrc/frontend/parallel/parameter_manager.cc:1445] HandleCameAndAdaFactorOpt] Adafactor or Came optimizer process start [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.915 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1150] MergeEntireShapeForDynamic] Into MergeEntireShapeForDynamic [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.904 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_1.changed [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.584.948 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1163] MergeEntireShapeForDynamic] Can not find the forward graph, so mark the ops in root graph [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.953 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.584.980 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.015 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.recompute_prepare.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.038 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(1) [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.042 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.083 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_depend_eliminate.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.111 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.110 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.165 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.188 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.215 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.updatestate_loads_eliminate.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.227 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 1-5 and group name is 2-12944936785892925600 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.238 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parameter_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.272 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parameter_eliminate.unchanged [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.276 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-12944936785892925600 [const vector]{1, 5}, async: 0, submit_now: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.296 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.318 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r1_a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.342 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.362 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.02 msec. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.377 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.406 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.417 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-12944936785892925600 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.433 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.444 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.058 msec. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.451 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.480 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.21 msec. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.478 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.499 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-12944936785892925600 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.498 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.523 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.542 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.563 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 1-3 and group name is 2-4190060298023907007 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.565 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.592 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-4190060298023907007 [const vector]{1, 3}, async: 0, submit_now: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.586 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.614 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.626 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.004 msec. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.632 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.659 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-4190060298023907007 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.656 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.681 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.034 msec. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.676 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.704 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.111 msec. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.718 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-4190060298023907007 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.718 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.739 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.765 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 0-1 and group name is 2-5004544844489628105 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.766 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [WARNING] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.793 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-5004544844489628105 [const vector]{0, 1}, async: 0, submit_now: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.788 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.821 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.003 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.846 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-5004544844489628105 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.838 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.861 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.867 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.026 msec. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.890 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.897 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.102 msec. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.909 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.914 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-5004544844489628105 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.933 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.954 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.585.978 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.585.982 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.002 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.031 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.051 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.078 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.099 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.128 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.134 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op0, op=StridedSlice [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.148 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.174 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.206 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.a_2.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.241 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r1.parallel_inline_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.280 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r1_parallel_inline_pass [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.312 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.318 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r1.parallel_inline_pass.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.347 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op0, op=StridedSlice [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.353 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.expand_dump_flag [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.386 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.412 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.switch_simplify [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.425 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.436 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_switch_simplify [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.456 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op1, op=StridedSlice [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.470 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.495 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.loop_unroll [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.518 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_loop_unroll [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.532 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.551 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.loop_unroll.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.557 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.575 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(2) [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.575 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.599 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_a_1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.647 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.797 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.804 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_1.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.845 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.870 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_recompute_prepare [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.903 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op1, op=StridedSlice [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.904 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.930 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.968 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.586.992 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_assign_eliminate [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.586.999 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.022 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_assign_eliminate.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.031 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op2, op=StridedSlice [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.045 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.072 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.098 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parameter_eliminate [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.104 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.133 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op3, op=StridedSlice [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.129 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.180 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.205 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, a1a2_r2_a_2 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.215 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.227 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.238 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.262 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.285 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.313 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.336 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.332 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_6{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 1} [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.359 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.382 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_8{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_6, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.379 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.406 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.431 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_9{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_10, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.427 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.453 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.463 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_11{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 0} [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.471 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.496 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.505 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_12{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_11, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.516 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.542 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.548 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_13{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_14, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.560 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.585 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.598 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/Mul-op0->Default/_VirtualOutput-op0(1) [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.604 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.629 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.649 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.658 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.695 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.732 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.761 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.782 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.808 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.811 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 7, operator_vector: AllGather, AllGather, Split, Concat, AllGather, Split, Concat [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.829 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.856 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.875 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.900 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.919 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.946 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.587.955 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=AllGather [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.964 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.587.990 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.011 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.039 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.072 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.a_2.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.105 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start a1a2.r2.parallel_inline_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.133 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, a1a2_r2_parallel_inline_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.175 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End a1a2.r2.parallel_inline_pass.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.212 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass a1a2 end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.249 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end inline action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.273 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.321 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol action. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.390 [mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.cc:223] ForwardHasDynamicShape] Can not find the forward graph, so find the ops in root graph [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.494 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.523 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.567 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pre_auto_parallel action. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.672 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 67 us [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.706 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pre_auto_parallel action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.736 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 7 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.588.780 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start insert-virtual-dataset action. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.589.057 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_7692796245619514736AllGather_ success [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.589.119 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op0, op=Split [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.589.237 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_12015561575443432111Split_ success [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.589.302 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op0, op=Concat [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.589.371 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8051664706019937323Concat_ success [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.589.401 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op0, op=AllGather [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.590.026 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_5140002550487651858AllGather_ success [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.590.081 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op1, op=Split [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.060 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end insert-virtual-dataset action. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.590.147 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8691182465882856301Split_ success [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.147 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.216 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start parallel-infer-symbol-second action. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.590.229 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op1, op=Concat [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.245 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end parallel-infer-symbol-second action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.266 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.590.273 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_6614310911506831424Concat_ success [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.590.303 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op1, op=AllGather [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.297 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start dataset_repeat_opt action. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.419 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.451 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2248] GetCommInfo] Get global rank from communication model, the global rank is 5 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.530 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 5, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 5 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.553 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.571 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.592 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 5, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [WARNING] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.659 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:305] BroadcastDataset] For now on, only dataset sink mode support dataset reader optimizer. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.685 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end dataset_repeat_opt action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.703 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.741 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_split action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.778 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:239] PipelineSplit] Get device num from communication model, the device num is 8 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.803 [mindspore/ccsrc/pipeline/jit/ps/pipeline_split.cc:256] PipelineSplit] The parameter 'stage_num' is: 1. No need Pipeline split. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.865 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_split action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.887 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 15 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.919 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start optimize action. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.590.916 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_1898494724763908338AllGather_ success [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.590.964 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.590.977 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute start ... [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.013 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_15{[0]: ValueNode PrimFunc_Mul, [1]: CNode_13, [2]: CNode_9} [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.028 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute end. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.045 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_16{[0]: ValueNode AllGather, [1]: CNode_15} [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.063 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_before_opt_a start ... [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.078 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_17{[0]: ValueNode AllGather, [1]: CNode_18} [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.143 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_before_opt_a end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.207 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_a start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.252 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.expand_dump_flag [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.284 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.expand_dump_flag.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.312 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.339 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.395 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.switch_simplify.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.425 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.loop_unroll [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.445 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_loop_unroll [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.462 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:918] FindCommonMirrorGroup] The common mirror group is:[const vector]{} [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.483 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.loop_unroll.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.518 [mindspore/ccsrc/frontend/parallel/parallel_postprocessor.cc:352] HandleGlobalNormScale] Start to process the global norm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.509 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.591.549 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_1 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.591 [mindspore/ccsrc/frontend/parallel/step_parallel.cc:171] StepParallel] Now leaving step parallel, used time: 9056 us [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.628 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.657 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.722 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.748 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_comm [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.801 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_comm.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.824 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_fusion [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.872 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_fusion.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.893 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.matmul_add_comm_reduction [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.946 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.968 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.591.987 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.004 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_shard_identity [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.021 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_shard_identity [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.006 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_1.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.067 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.095 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_recompute_prepare [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.098 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.124 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_dataset [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.140 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_dataset [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.138 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.recompute_prepare.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.165 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.210 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.239 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.254 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_dataset.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.281 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.get_grad_eliminate_ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.275 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.301 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_get_grad_eliminate_ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.302 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.333 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.359 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parameter_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.368 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.392 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.401 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_output [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.419 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_output [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.419 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.443 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.470 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.504 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_output.changed [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.509 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.530 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_forward [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.531 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.570 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.578 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_forward.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.592 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.602 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_recompute_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.625 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.674 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.691 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.624 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.708 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.720 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.755 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.796 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.800 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.820 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.838 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.before_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.831 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.855 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_before_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.855 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.905 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.927 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.944 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.before_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.592.966 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.958 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.592.979 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.006 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.593.012 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.026 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.593.034 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel_renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.055 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.074 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.120 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.142 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.168 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.188 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] ANALYZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.593.225 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.233 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.259 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.292 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.314 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.341 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.360 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.390 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.422 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.452 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.474 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.507 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.545 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_2.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.585 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.accelerated_algorithm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.612 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_accelerated_algorithm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.657 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.684 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.706 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.732 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.756 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.780 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_shard_fg_expand [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.813 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.841 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.shard_inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.863 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_shard_inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.909 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.shard_inline.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.593.937 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_parallel [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.002 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 37 us [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.594.012 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.037 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_parallel.changed [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.594.050 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.069 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.132 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.192 [mindspore/ccsrc/frontend/parallel/device_manager.cc:290] Init] The device num: 8, rank id: 5, the backend: hccl, the stage num: 1, the stage id: 0, the rank index in stage is: 5 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.243 [mindspore/ccsrc/frontend/parallel/device_manager.cc:117] InitDevice] Device initialization succeeds. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.264 [mindspore/ccsrc/frontend/parallel/device_manager.cc:118] InitDevice] g_device_manager: DeviceNum: 8 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.287 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3018] ParallelInit] The parallel context: device_num: 8, global_rank: 5, communication_backend: hccl, gradients_mean: 0, gradient_fp32_sync: 1 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.464 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3587] MarkForwardCNode] Can not find the forward graph, so mark the ops in root graph [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.594.522 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.594.559 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.593 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2230] GetCommInfo] Get device num from communication model, the device num is 8 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.629 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:3182] IsInsertVirtualOutput] The current stage is: 0 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.594.695 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=_VirtualOutput [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.594.787 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.594.829 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.594.900 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.594.962 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 1___main___Net_construct_5 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xb62aed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xb62aed0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.595.845 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel_renormalize.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.595.891 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.update_top_fg [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.595.917 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.update_top_fg.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.595.936 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.595.952 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_cast_eliminate [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.595.949 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualDatasetInfo0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.026 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cast_eliminate.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.031 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.049 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_fg_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.095 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_fg_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.114 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation_after_expand [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.151 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1), (1, 1, 1)) [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.170 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation_after_expand.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.180 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.191 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp_send_recv_attached [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.233 [mindspore/ccsrc/frontend/parallel/pass/flash_sp.cc:2977] FlashSPSendRecvNodeAttach] No RA/FlashSP Send/Recv grad is found to be attached. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.253 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp_send_recv_attached.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.272 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.receive_attached [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.293 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.receive_attached.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.310 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.after_resolve [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.325 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_after_resolve [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.337 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualDatasetInfo00: The loss divisor is 1, no need to create virtual div op. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.407 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.after_resolve.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.428 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_after_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.443 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_after_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.534 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_after_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.554 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.special_op_eliminate [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.537 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator MulInfo1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.571 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_special_op_eliminate [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.571 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.644 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.special_op_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.667 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.renormalize [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.649 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((2, 2, 2), (2, 2, 2)) [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.684 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.renormalize.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.682 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.703 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.add_forward_monad_depend [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.726 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.743 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.763 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_grad.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.761 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.779 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_eliminator [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.786 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 0 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.810 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1074] CreateGroupByTensorMap] MulInfo11: The dev size is 1, no need to create group. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.844 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_eliminator.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.855 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:491] InferMirrorOps] MulInfo11: The mirror group is empty, the input index is 1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.596.869 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cse [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.874 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:503] InferMirrorOps] MulInfo11: No need to insert mirror ops [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.903 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2631] InferAsLossDivisor] MulInfo11: the dev matrix shape is [2, 2, 2], the output tensor map is [2, 1, 0], loss divisor is 1 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.923 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] MulInfo11: The loss divisor is 1, no need to create virtual div op. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.596.942 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:1334] Init] MulInfo11 : Init success. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.059 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cse.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.064 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1364] OperatorInstanceByName] Successfully created operator VirtualOutputInfo2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.087 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_3 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.094 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2112] CreateOperatorInfo] shape_list.size(): 2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.105 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_3 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.121 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.141 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2587] ExtractStrategy] Extract information: strategy ((1, 1, 1)) [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.166 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:2536] CheckStrategyWithTupleInTuple] CheckStrategyWithTupleInTuple: has_tuple_in_tuple = 0. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.179 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.196 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.213 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:2688] InferVirtualDivOps] VirtualOutputInfo22: The loss divisor is 1, no need to create virtual div op. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.250 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.269 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.334 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.348 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.400 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.413 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.435 [mindspore/ccsrc/frontend/parallel/parameter_manager.cc:1445] HandleCameAndAdaFactorOpt] Adafactor or Came optimizer process start [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.466 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.481 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.534 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.548 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.600 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.613 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.623 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1150] MergeEntireShapeForDynamic] Into MergeEntireShapeForDynamic [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.666 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.685 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_3.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.682 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:1163] MergeEntireShapeForDynamic] Can not find the forward graph, so mark the ops in root graph [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.710 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.expand_dump_flag [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.731 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.747 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.761 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_switch_simplify [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.805 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(1) [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.822 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.843 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.857 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.917 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.loop_unroll.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.597.912 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.935 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.597.950 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_1 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.071 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 1-5 and group name is 2-12944936785892925600 [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.148 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-12944936785892925600 [const vector]{1, 5}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.247 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.021 msec. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.320 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-12944936785892925600 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.381 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.092 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.425 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.286 msec. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.449 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-12944936785892925600 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.565 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 5-7 and group name is 2-16057586909177180503 [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.609 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-16057586909177180503 [const vector]{5, 7}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.657 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.006 msec. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.696 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-16057586909177180503 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.727 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.042 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.756 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.147 msec. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.782 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-16057586909177180503 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.853 [mindspore/ccsrc/frontend/parallel/device_manager.cc:420] GenerateGroupNameByRanks] The rank list name is 4-5 and group name is 2-6541264347459079684 [WARNING] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.894 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: 2-6541264347459079684 [const vector]{4, 5}, async: 0, submit_now: 0 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.929 [mindspore/ccsrc/distributed/collective/collective_manager.cc:361] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnHostSide costs 0.005 msec. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.967 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_collective_comm_lib.cc:141] CreateCommunicationGroup] Successfully create HCCL communication group 2-6541264347459079684 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.598.997 [mindspore/ccsrc/distributed/collective/collective_manager.cc:368] CreateCommunicationGroup] [PROF]CreateCommunicationGroupOnDeviceSide costs 0.04 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.024 [mindspore/ccsrc/distributed/collective/collective_manager.cc:398] CreateCommunicationGroup] [PROF]distributed_create_group costs 0.132 msec. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.047 [mindspore/ccsrc/frontend/parallel/group_manager.cc:237] CreateGroup] Create group success, group name is 2-6541264347459079684 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.145 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.332 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_1.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.366 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.recompute_prepare [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.354 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op0, op=StridedSlice [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.385 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_recompute_prepare [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.445 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.466 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.512 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.531 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_assign_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.574 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.596 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_loads_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.637 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.658 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parameter_eliminate [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.655 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.679 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.697 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.713 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.727 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.716 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op0, op=StridedSlice [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.783 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.802 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.817 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.853 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.869 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.861 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op1, op=StridedSlice [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.919 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.936 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.599.972 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.599.986 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.002 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.006 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.033 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/_VirtualDataset-op0->Default/Mul-op0(2) [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.062 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.078 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.139 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.156 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.141 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.206 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.222 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.272 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.286 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.335 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.350 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.384 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 3, operator_vector: StridedSlice, StridedSlice, StridedSlice [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.409 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.426 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.475 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.489 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.551 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.545 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/TupleGetItem-op1, op=StridedSlice [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.570 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.619 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.705 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.770 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.787 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.768 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_4478498464723551734StridedSlice_ success [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.824 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op2, op=StridedSlice [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.837 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.865 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.916 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.930 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.922 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_14528741814808112071StridedSlice_ success [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.600.962 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=0, pre_node=Default/StridedSlice-op3, op=StridedSlice [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.600.981 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.004 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_2.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.024 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.accelerated_algorithm [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.040 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_accelerated_algorithm [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.060 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_13841291514553691519StridedSlice_ success [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.098 [mindspore/ccsrc/frontend/optimizer/opt.cc:232] ApplyIRToSubstitutions] There may be a problem. Substitution: opt_a.r2.accelerated_algorithm.less_batch_normalization [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.092 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.164 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.accelerated_algorithm.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.189 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.204 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.220 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.236 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.253 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_shard_fg_expand [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.243 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_6{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 1} [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.282 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.303 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard_inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.317 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_shard_inline [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.310 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_8{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_6, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.372 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_9{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_10, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.380 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard_inline.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.403 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_parallel [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.414 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_11{[0]: ValueNode TupleGetItem, [1]: CNode_7, [2]: ValueNode 0} [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.459 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 37 us [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.466 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_12{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_11, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.483 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_parallel.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.502 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.521 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_13{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_14, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.530 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.562 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.616 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.638 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_comm [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.627 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1339] StepRedistribution] ===========Do Redistribution start============ Default/Mul-op0->Default/_VirtualOutput-op0(1) [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.688 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_comm.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.712 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_fusion [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.732 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:746] InferTensorRedistributionOperatorList] Start to infer tensor redistribution. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.761 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_fusion.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.783 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.matmul_add_comm_reduction [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.827 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.849 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.867 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.884 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_shard_identity [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.900 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_shard_identity [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.961 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.981 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_dataset [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.601.996 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_dataset [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.601.987 [mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc:806] InferTensorRedistributionOperatorList] After InferRedistribution, operator_vector size: 7, operator_vector: AllGather, AllGather, Split, Concat, AllGather, Split, Concat [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.055 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_dataset.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.076 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.get_grad_eliminate_ [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.091 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_get_grad_eliminate_ [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.149 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.170 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_output [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.185 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_output [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.602.197 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Mul-op0, op=AllGather [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.241 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_output.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.263 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_forward [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.305 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_forward.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.333 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_recompute_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.354 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.371 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.385 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.399 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.479 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.500 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.518 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.before_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.532 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_before_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.612 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.before_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.631 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.673 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.694 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel_renormalize [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.712 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel_renormalize.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.729 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.update_top_fg [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.745 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.update_top_fg.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.762 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.776 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.834 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cast_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.851 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_fg_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.892 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_fg_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.910 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation_after_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.968 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.602.989 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp_send_recv_attached [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.007 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.023 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.receive_attached [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.041 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.receive_attached.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.057 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.after_resolve [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.071 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_after_resolve [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.134 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.after_resolve.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.154 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_after_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.169 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_after_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.252 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_after_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.270 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.special_op_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.284 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_special_op_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.340 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.special_op_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.357 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.renormalize [INFO] ANALYZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.541 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.603.802 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_7692796245619514736AllGather_ success [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.603.968 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op0, op=Split [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.603.984 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.604.020 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.604.152 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_12015561575443432111Split_ success [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.604.248 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op0, op=Concat [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.604.264 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.604.297 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.604.344 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8051664706019937323Concat_ success [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.604.384 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op0, op=AllGather [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.604.494 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:04.604.524 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.604.591 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.604.666 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 3_1___main___Net_construct_19 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xb62aed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0xb62aed0, value: ValueAny), Parent: } [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.605.416 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_5140002550487651858AllGather_ success [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.489 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.renormalize.changed [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.605.530 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/AllGather-op1, op=Split [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.541 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.add_forward_monad_depend [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.573 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.592 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.611 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.628 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_eliminator [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.605.623 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_8691182465882856301Split_ success [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.689 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.712 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cse [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.605.748 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/MakeTuple-op1, op=Concat [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.605.806 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_6614310911506831424Concat_ success [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.605.840 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:762] CreateInput] CreateInput param.empty=1, pre_node=Default/Concat-op1, op=AllGather [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.878 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cse.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.905 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_3 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.923 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_3 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.939 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.605.997 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.013 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.065 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.080 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.130 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.144 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.193 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.206 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.256 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.280 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.331 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.345 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.394 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.407 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.457 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.475 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_3.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.498 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.expand_dump_flag [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.517 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.expand_dump_flag.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.533 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.549 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.607 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.switch_simplify.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.626 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.641 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.698 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.loop_unroll.unchanged [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.606.666 [mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc:912] InsertNode] Insert redistribution_op_1898494724763908338AllGather_ success [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.717 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.606.731 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_1 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.606.749 [mindspore/ccsrc/frontend/parallel/parallel_processer.cc:1343] StepRedistribution] ===========Do Redistribution end ============ [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.606.828 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:88] CollectSuccessorDumpNodes] Node Parent is: @1___main___Net_construct_5:CNode_15{[0]: ValueNode PrimFunc_Mul, [1]: CNode_13, [2]: CNode_9} [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.606.867 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:94] CollectSuccessorDumpNodes] Parent node's successor: @1___main___Net_construct_5:CNode_16{[0]: ValueNode AllGather, [1]: CNode_15} [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.606.904 [mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.cc:67] HandleParallelTensorDump] Last Insert Redistribution: @1___main___Net_construct_5:CNode_17{[0]: ValueNode AllGather, [1]: CNode_18} [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.391 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:918] FindCommonMirrorGroup] The common mirror group is:[const vector]{} [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.469 [mindspore/ccsrc/frontend/parallel/parallel_postprocessor.cc:352] HandleGlobalNormScale] Start to process the global norm [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.591 [mindspore/ccsrc/frontend/parallel/step_parallel.cc:171] StepParallel] Now leaving step parallel, used time: 13481 us [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.643 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel.changed [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.694 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.791 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp.changed [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.825 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_comm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.895 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_comm.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.925 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_fusion [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.607.955 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_1.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.607.984 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.607.987 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_fusion.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.002 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.019 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.matmul_add_comm_reduction [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.062 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.recompute_prepare.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.084 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_depend_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.090 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.121 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.140 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.147 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.164 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_assign_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.171 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_shard_identity [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.198 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_shard_identity [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.206 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.230 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_loads_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.271 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.290 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parameter_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.310 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parameter_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.307 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.326 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.342 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.343 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_dataset [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.356 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.367 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_dataset [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.409 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.426 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.478 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.494 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.543 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.558 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.561 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_dataset.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.607 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.622 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.611 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.get_grad_eliminate_ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.656 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_get_grad_eliminate_ [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.684 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.704 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.755 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.765 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.787 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.791 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.virtual_output [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.815 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_virtual_output [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.838 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.865 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.917 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.935 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.608.937 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.virtual_output.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.608.986 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.003 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.004 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.merge_forward [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.065 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.086 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.078 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.merge_forward.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.112 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_recompute_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.137 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.152 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.147 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.176 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.199 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.219 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.222 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.237 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.287 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.302 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.351 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.365 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.351 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.387 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.414 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.417 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.before_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.430 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.441 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_before_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.479 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.494 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.542 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.563 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_2.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.567 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.before_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.594 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.accelerated_algorithm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.601 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.616 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_accelerated_algorithm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.660 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.684 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.690 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.parallel_renormalize [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.708 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.725 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.740 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.759 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.777 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_shard_fg_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.805 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.826 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard_inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.840 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_shard_inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.899 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard_inline.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.920 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_parallel [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.609.979 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 40 us [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.001 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_parallel.changed [INFO] ANALYZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.609.977 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.024 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.053 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.074 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.093 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.113 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_comm [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.161 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_comm.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.183 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_fusion [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.229 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_fusion.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.250 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.matmul_add_comm_reduction [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.307 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.329 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.348 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.364 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_shard_identity [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.380 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_shard_identity [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.441 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.462 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_dataset [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.478 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_dataset [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.537 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_dataset.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.557 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.get_grad_eliminate_ [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.572 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_get_grad_eliminate_ [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.630 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.650 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_output [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.665 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_output [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.722 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_output.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.740 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_forward [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.782 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_forward.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.803 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_recompute_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.826 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.842 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.857 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.872 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.951 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.610.983 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.001 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.before_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.016 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_before_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.099 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.before_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.120 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.611.106 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.163 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.186 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel_renormalize [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.611.184 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.204 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel_renormalize.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.221 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.update_top_fg [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.238 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.update_top_fg.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.254 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.268 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.329 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cast_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.351 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_fg_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.394 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_fg_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.412 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation_after_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.461 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.480 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp_send_recv_attached [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.498 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.515 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.receive_attached [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.534 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.receive_attached.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.550 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.after_resolve [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.564 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_after_resolve [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.639 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.after_resolve.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.661 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_after_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.675 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_after_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.759 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_after_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.779 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.special_op_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.796 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_special_op_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.854 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.special_op_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.873 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.renormalize [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.890 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.renormalize.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.907 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.add_forward_monad_depend [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.611.875 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.928 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.add_forward_monad_depend.unchanged [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.611.928 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.947 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.966 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.611.981 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.038 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.061 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cse [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.216 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cse.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.242 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_3 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.258 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_3 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.273 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.612.261 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.612.303 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.330 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.348 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.399 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.424 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.612.411 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.477 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.495 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.544 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] ANALYZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.612.538 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 1___main___Net_construct_5 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x227c3ed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x227c3ed0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.559 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.612 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.653 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.736 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.751 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.804 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.818 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.867 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.888 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_3.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.911 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_a end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.937 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute_after_opt_a start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.612.995 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute_after_opt_a end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.020 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_cell_reuse_recomputed_activation start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.038 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_cell_reuse_recomputed_activation end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.059 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_after_opt_a start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.420 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_after_opt_a end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.465 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass convert_after_rewriter start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.516 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass convert_after_rewriter end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.554 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass order_py_execute_after_rewriter start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.596 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass order_py_execute_after_rewriter end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.625 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_b start ... [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.651 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.671 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_b_r1_b_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.689 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: zero_like_fill_zero [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.751 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: zero_like_fill_zero, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.769 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: list_to_tuple_eliminator_ [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.822 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: list_to_tuple_eliminator_, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.839 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_to_list_eliminator_ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.613.796 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.parallel_renormalize.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.893 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_to_list_eliminator_, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.910 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.613.897 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.update_top_fg [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.613.934 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.update_top_fg.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.613.959 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.613.982 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.613.987 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.002 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_const_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.067 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_const_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.090 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.100 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cast_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.128 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.meta_fg_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.143 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.165 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_set_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.190 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.meta_fg_expand.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.217 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.inplace_validation_after_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.229 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.252 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_depend_reorder [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.299 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.318 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_depend_reorder, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.330 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.flash_sp_send_recv_attached [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.340 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_convert_item_index_to_positive [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.393 [mindspore/ccsrc/frontend/parallel/pass/flash_sp.cc:2977] FlashSPSendRecvNodeAttach] No RA/FlashSP Send/Recv grad is found to be attached. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.421 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.flash_sp_send_recv_attached.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.431 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_convert_item_index_to_positive, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.447 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.receive_attached [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.454 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: make_slice_get_slice_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.496 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.receive_attached.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.509 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: make_slice_get_slice_eliminator, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.527 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.524 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.after_resolve [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.546 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_after_resolve [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.591 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.614 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reset_defer_inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.653 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.after_resolve.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.670 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reset_defer_inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.689 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.686 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_after_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.709 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_a_after_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.743 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.759 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.811 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.827 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_pure_node_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.864 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_after_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.878 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_pure_node_eliminater, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.895 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: load_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.900 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.614.921 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r1_special_op_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.947 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: load_eliminater, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.614.963 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: stopgrad_eliminater [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.015 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: stopgrad_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.008 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.special_op_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.035 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.038 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.063 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.renormalize.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.086 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: special_op_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.086 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.add_forward_monad_depend [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.105 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.126 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.150 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_grad [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.159 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.179 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_grad.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.190 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_add_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.204 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.auto_monad_eliminator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.246 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_add_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.263 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_set_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.299 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.315 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_set_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.335 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_depend_swap [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.333 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.cse [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.388 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_depend_swap, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.405 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_add_const_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.458 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_add_const_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.476 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: value_based_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.528 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: value_based_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.545 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: parallel_virtual_node [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.597 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: parallel_virtual_node, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.611 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: const_output_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.620 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.cse.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.662 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: const_output_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.673 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r1.a_3 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.692 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_1.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.700 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r1_a_3 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.718 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.725 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.738 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_b_r1_b_2 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.804 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_2.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.814 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.826 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.838 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.876 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.902 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.908 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.615.934 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.946 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.615.986 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.004 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.032 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.027 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.055 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.renormalize [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.076 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.renormalize.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.093 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.cse [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.119 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.146 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.218 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.242 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.258 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.cse.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.292 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_b end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.319 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass optimize_parallel_all_gather_comm start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.313 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.337 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.377 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass optimize_parallel_all_gather_comm end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.406 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_param_gather start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.406 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.425 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_param_gather end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.430 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.447 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cconv start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.500 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.513 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cconv end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.616.543 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass loop_unroll start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.538 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r1.a_3.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.586 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.expand_dump_flag [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.619 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.expand_dump_flag.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.741 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.772 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.865 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.switch_simplify.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.898 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.loop_unroll [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.616.920 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_loop_unroll [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.617.001 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.loop_unroll.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.617.029 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.617.051 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.457 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start loop_unroll_optimizer.r1.loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.521 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, loop_unroll_optimizer_r1_loop_unroll [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.605 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End loop_unroll_optimizer.r1.loop_unroll.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.638 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass loop_unroll end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.667 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_after_cconv start ... [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.689 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.c_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.704 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_after_cconv_r1_c_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.938 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.c_1.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.963 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.parameter_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.617.988 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.022 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.073 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.095 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.137 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.159 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.198 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.216 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.cse [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.387 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.cse.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.418 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.renormalize [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.439 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.renormalize.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.459 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_after_cconv end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.482 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_dup_value start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.798 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_dup_value end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.835 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass tuple_transform start ... [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.861 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.d_1 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.618.879 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_trans_graph_r1_d_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.618.925 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_1.changed [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.019 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.077 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.168 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.recompute_prepare.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.180 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.d_1.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.197 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_depend_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.212 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.renormalize [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.231 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.renormalize.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.250 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass tuple_transform end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.270 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass partial_unused_args_eliminate start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.269 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_depend_eliminate.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.291 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass partial_unused_args_eliminate end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.296 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_assign_eliminate [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.313 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_cache_embedding start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.348 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.375 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.updatestate_loads_eliminate [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.398 [mindspore/ccsrc/frontend/parallel/cache_embedding/cache_embedding.cc:706] AddCacheEmbedding] Parameters are all not cache enable. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.423 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_cache_embedding end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.422 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.updatestate_loads_eliminate.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.448 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_recomputation start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.448 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parameter_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.480 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parameter_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.504 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.526 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.547 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.620 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.641 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.663 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_recomputation end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.698 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cse_after_recomputation start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.710 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.721 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_recompute.r1.cse [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.730 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.796 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.817 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.835 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_recompute.r1.cse.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.866 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cse_after_recomputation end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.882 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.890 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass environ_conv start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.902 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.957 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass environ_conv end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.967 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.619.983 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass swap_dp_allreduce_reducescatter start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.619.987 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.025 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass swap_dp_allreduce_reducescatter end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.050 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass bias_add_comm_swap start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.070 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass bias_add_comm_swap end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.090 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_micro_interleaved_index start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.106 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_micro_interleaved_index end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.097 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.126 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_fine_grained_interleaved_index start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.123 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.148 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_fine_grained_interleaved_index end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.172 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass merge_cast_opt start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.186 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass merge_cast_opt end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.190 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.207 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_recompute_activation start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.212 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.252 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_recompute_activation end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.279 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass micro_interleaved_order_control start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.281 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.301 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.297 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass micro_interleaved_order_control end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.334 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass assign_add_opt start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.369 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.391 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.438 [mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.cc:466] AssignAddOpt] Merge multi matmul assign add begin and concat eliminate enable flag is:0 [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.487 [mindspore/ccsrc/frontend/parallel/pass/pass_utils.cc:122] ExtractBackwardMatMul] backward_matmul_dx_dw_map size:0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.481 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.506 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.532 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass assign_add_opt end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.560 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass ForceFp32Comm start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.578 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass ForceFp32Comm end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.571 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.599 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_cast_before_assign_add start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.593 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.691 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_cast_before_assign_add end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.720 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass full_micro_interleaved_order_control start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.739 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass full_micro_interleaved_order_control end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.759 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass reorder_send_recv_between_fp_bp start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.736 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.770 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.777 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass reorder_send_recv_between_fp_bp end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.799 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass comm_op_add_attrs start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.839 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.863 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.879 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass comm_op_add_attrs end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.910 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_comm_op_reuse_tag start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.928 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.620.948 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.620.993 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_comm_op_reuse_tag end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.013 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.026 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_split_concat_branches start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.045 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_split_concat_branches end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.039 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.066 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_parallel_branches start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.083 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_parallel_branches end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.103 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_in_pipeline start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.105 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.146 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.158 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_in_pipeline end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.184 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_grad_in_pipeline start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.215 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_grad_in_pipeline end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.216 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.236 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass control_data_broadcast_order start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.253 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass control_data_broadcast_order end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.256 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_2.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.272 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass grouped_pairwise_exchange_alltoall start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.295 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.accelerated_algorithm [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.303 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass grouped_pairwise_exchange_alltoall end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.326 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass offloading_packed_experts start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.324 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_accelerated_algorithm [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.342 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:746] SetOffloadingPackedExpert] pass if (parallel::g_device_manager == nullptr) [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.358 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:707] CheckUserSettings] To activate the pass, set_auto_parallel_context 'enable_alltoall' should be true [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.372 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:751] SetOffloadingPackedExpert] CheckUserSettings_not_pass [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.387 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass offloading_packed_experts end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.406 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_and_grad_model_parallel start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.423 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_and_grad_model_parallel end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.421 [mindspore/ccsrc/frontend/optimizer/opt.cc:232] ApplyIRToSubstitutions] There may be a problem. Substitution: opt_a.r2.accelerated_algorithm.less_batch_normalization [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.442 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_matmul_and_grad_allreduce start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.458 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_matmul_and_grad_allreduce end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.476 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_allgather_and_fa_grad start ... [WARNING] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.493 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.510 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_allgather_and_fa_grad end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.514 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.accelerated_algorithm.changed [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.529 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_ring_attention start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.549 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.575 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.592 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_ring_attention end. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.600 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.623 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_flash_sp start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.623 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.648 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_shard_fg_expand [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.671 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_flash_sp end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.691 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_shard_fg_expand.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.698 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass begin_end_overlap_inline start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.716 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.shard_inline [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.733 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass begin_end_overlap_inline end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.739 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_shard_inline [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.755 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_matmul_comm_elemetwise start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.772 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_matmul_comm_elemetwise end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.792 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_layernorm_comm start ... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.809 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_layernorm_comm end. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.828 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass handle_group_info start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.821 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.shard_inline.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.852 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass handle_group_info end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.849 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_parallel [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.874 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass symbol_engine_optimizer start ... [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.901 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.build [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.927 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 49 us [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.938 [mindspore/ccsrc/frontend/optimizer/irpass/symbol_engine_optimizer.cc:39] operator()] There is no dynamic shape node, the SymbolEngineBuilder is disabled. [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.965 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.build.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.958 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_parallel.changed [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.621.986 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_shapecalc [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.621.985 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.007 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_shapecalc [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.031 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.061 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.071 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_shapecalc.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.094 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_not_effective [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.111 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_not_effective [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.134 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp.changed [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.183 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_comm [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.191 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_not_effective.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.214 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.opt_reshape [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.232 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_opt_reshape [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.249 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_comm.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.277 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_fusion [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.286 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.opt_reshape.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.308 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.fold_const_symbol [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.325 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_fold_const_symbol [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.334 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_fusion.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.359 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.matmul_add_comm_reduction [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.415 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.fold_const_symbol.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.414 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.443 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.441 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.466 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.renormalize.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.469 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.allreduce_slice_to_reducescatter.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.489 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass symbol_engine_optimizer end. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.491 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_shard_identity [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.515 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end optimize action. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.514 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_shard_identity [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.533 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.565 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_parallel_scheduler action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.586 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_parallel_scheduler action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.602 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.598 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_shard_identity.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.625 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad_reorder action. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.627 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_dataset [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.649 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_dataset [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.728 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_dataset.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.728 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad_reorder action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.754 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.753 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.get_grad_eliminate_ [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.781 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start get_jit_bprop_graph action. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.776 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_get_grad_eliminate_ [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.798 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end get_jit_bprop_graph action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.814 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.834 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.849 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end rewriter_after_jit_bprop_graph action. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.851 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.get_grad_eliminate_.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.863 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.622.885 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start eliminate_special_op_node action. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.880 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.virtual_output [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.901 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_virtual_output [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.622.977 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.virtual_output.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.004 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.merge_forward [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.062 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.merge_forward.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.090 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_recompute_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.122 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.161 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.184 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.207 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.330 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.365 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.393 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.before_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.415 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_before_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.527 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.before_grad.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.559 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.617 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.597 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.ad_related_special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.647 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.parallel_renormalize [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.659 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_ad_related_special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.672 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.parallel_renormalize.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.698 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.update_top_fg [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.724 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.update_top_fg.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.728 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.ad_related_special_op_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.749 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.773 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.mutable_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.773 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_cast_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.793 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_mutable_op_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.846 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.mutable_op_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.855 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cast_eliminate.unchanged [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.865 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.convert_tensor_op_eliminate [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.882 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_convert_tensor_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.885 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.meta_fg_expand [INFO] OPTIMIZER(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.931 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.convert_tensor_op_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.950 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.meta_fg_expand.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.963 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end eliminate_special_op_node action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.623.982 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.623.976 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.inplace_validation_after_expand [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.016 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start distribtued_split action. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.052 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:378] GenerateStrategy] Current parallel mode is semi_auto_parallel [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.046 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.inplace_validation_after_expand.unchanged [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.071 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:390] GenerateStrategy] Generated distributed strategy is 1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.078 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.flash_sp_send_recv_attached [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.128 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.156 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.receive_attached [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.182 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.receive_attached.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.208 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.after_resolve [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.230 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_after_resolve [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.258 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:1277] Run] All nodes are on this process so there's no need to build and split distributed graph. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.292 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end distribtued_split action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.309 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.328 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.after_resolve.unchanged [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.341 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start validate action. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.367 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_after_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.392 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_a_after_grad [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.487 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end validate action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.512 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.508 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_after_grad.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.541 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.565 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r2_special_op_eliminate [INFO] PROFILER(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.614 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc:49] IsProfilingParallelStrategyEnabled] Profiling parallel strategy is disabled. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.624.778 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start task_emit action. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.785 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.special_op_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.624.824 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.renormalize [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:04.625.011 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1679] SetRunMode] Run graph mode with kernel by kernel by configuration. [INFO] ANALYZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.625.178 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.cc:430] Eval] Add root_func_graph_backup [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:04.625.228 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1057] CompileGraphs] Status record: start compile function graph: 4_3_1___main___Net_construct_20 [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:04.625.526 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.625.840 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.625.893 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:04.625.990 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: 4_3_1___main___Net_construct_20 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:04.626.247 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-12944936785892925600, the max communication size is 1 MB. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:04.626.278 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-12944936785892925600, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.626.272 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:04.626.305 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-12944936785892925600. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.626.311 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.626.547 [mindspore/ops/infer/all_gather.cc:67] InferShape] For 'AllGather', input rank_size : 2. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:04.626.582 [mindspore/ops/infer/all_gather.cc:68] InferShape] For 'AllGather', x->shape()->shape()[0] : 2. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.626.675 [mindspore/ccsrc/pipeline/jit/ps/action.cc:361] AbstractAnalyze] function call depth: 0, simulate call depth: 0 [INFO] ANALYZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.626.759 [mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.cc:224] Run] Specialize set top func graph context: {FuncGraph: 3_1___main___Net_construct_19 Args: [0]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x227c3ed0, value: ValueAny), [1]: AbstractTensor(shape: (4, 4, 4), element: AbstractScalar(Type: Complex64, Value: ValueAny, Shape: NoShape), is adapter: False, value_ptr: 0x227c3ed0, value: ValueAny), Parent: } [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.627.841 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.renormalize.changed [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.627.920 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.add_forward_monad_depend [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.627.963 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.627.988 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.015 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_grad.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.038 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.auto_monad_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.121 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.147 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.cse [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.351 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.cse.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.385 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r2.a_3 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.409 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r2_a_3 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.432 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.513 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.534 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.598 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.619 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.765 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.788 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.851 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.869 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.931 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.628.951 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.012 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.054 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.117 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.137 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.201 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.228 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r2.a_3.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.262 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.expand_dump_flag [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.291 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.expand_dump_flag.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.315 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.335 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.415 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.switch_simplify.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.441 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.loop_unroll [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.461 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_loop_unroll [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.534 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.loop_unroll.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.557 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.629.577 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.184 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_1.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.241 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.269 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_recompute_prepare [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.350 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.recompute_prepare.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.379 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_depend_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.443 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.469 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_assign_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.540 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.568 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.updatestate_loads_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.619 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.643 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parameter_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.672 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parameter_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.694 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.714 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.734 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.804 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.827 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: specialize_transform [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.893 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: specialize_transform, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.913 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: merge_addn [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.976 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: merge_addn, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.631.995 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: compare_switch_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.059 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: compare_switch_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.078 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: addn_check_dump [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.140 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: addn_check_dump, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.161 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.241 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.261 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_environ_get_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.324 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_environ_get_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.343 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.408 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.440 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.504 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.522 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: arithmetic_simplify [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.601 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: arithmetic_simplify, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.622 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.769 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.795 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.878 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.900 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.966 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.632.986 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: incorporate_call_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.052 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: incorporate_call_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.072 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.135 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.155 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: depend_value_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.219 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: depend_value_elim, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.238 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reduce_all_const_elim [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.301 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reduce_all_const_elim, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.333 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_2.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.366 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.accelerated_algorithm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.391 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_accelerated_algorithm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.502 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.accelerated_algorithm.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.532 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.554 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:543] Shard] Shard pass starts. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.577 [mindspore/ccsrc/frontend/parallel/shard/shard.cc:546] Shard] Shard Prim don't exist, skip Shard pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.599 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.623 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_shard_fg_expand [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.669 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_shard_fg_expand.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.693 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.shard_inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.713 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_shard_inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.791 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.shard_inline.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.818 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_parallel [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.903 [mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc:170] StepAutoParallel] Now leaving step auto parallel, used time: 59 us [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.931 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_parallel.changed [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.633.961 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.004 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.030 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.055 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.078 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_comm [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.140 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_comm.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.166 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_fusion [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.223 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_fusion.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.248 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.matmul_add_comm_reduction [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.309 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.matmul_add_comm_reduction.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.allreduce_slice_to_reducescatter [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.374 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.allreduce_slice_to_reducescatter.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.398 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_shard_identity [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.419 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_shard_identity [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.500 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_shard_identity.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.526 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_dataset [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.546 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_dataset [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.620 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_dataset.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.645 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.get_grad_eliminate_ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.666 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_get_grad_eliminate_ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.740 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.get_grad_eliminate_.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.763 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.virtual_output [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.782 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_virtual_output [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.856 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.virtual_output.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.879 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.merge_forward [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.932 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.merge_forward.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.959 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_recompute_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.634.988 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_recompute_pass.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.011 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.031 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_cell_reuse_handle_not_recompute_node_pass [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.050 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: remove_not_recompute_node [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.157 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: remove_not_recompute_node, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.184 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cell_reuse_handle_not_recompute_node_pass.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.207 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.before_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.237 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_before_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.343 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.before_grad.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.370 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.421 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.446 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.parallel_renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.468 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.parallel_renormalize.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.488 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.update_top_fg [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.510 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.update_top_fg.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.531 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cast_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.550 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_cast_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.624 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cast_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.650 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.meta_fg_expand [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.708 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.meta_fg_expand.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.731 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.inplace_validation_after_expand [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.796 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.inplace_validation_after_expand.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.819 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.flash_sp_send_recv_attached [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.843 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.flash_sp_send_recv_attached.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.864 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.receive_attached [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.887 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.receive_attached.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.908 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.after_resolve [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.635.928 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_after_resolve [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.012 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.after_resolve.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.037 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_after_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.068 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_a_after_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.177 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_after_grad.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.204 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.224 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_a_r3_special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.295 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.special_op_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.319 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.340 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.renormalize.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.361 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.add_forward_monad_depend [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.389 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.add_forward_monad_depend.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.410 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_grad [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.437 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_grad.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.458 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.auto_monad_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.531 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.auto_monad_eliminator.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.558 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.cse [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.777 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.cse.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.815 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_a.r3.a_3 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.837 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_a_r3_a_3 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.859 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: same_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.929 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: same_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.636.952 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: check_bprop_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.015 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: check_bprop_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.036 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: switch_layer_defer_inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.097 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: switch_layer_defer_inline, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.128 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: replace_applicator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.191 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: replace_applicator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.213 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: row_tensor_add_zeros_like [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.274 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: row_tensor_add_zeros_like, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.296 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: mini_step_allgather_replace [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.360 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: mini_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.380 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: micro_step_allgather_replace [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.442 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: micro_step_allgather_replace, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.463 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: split_environ_get_set_with_tuple_value [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.525 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: split_environ_get_set_with_tuple_value, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.555 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_a.r3.a_3.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.587 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_a end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.621 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass py_interpret_to_execute_after_opt_a start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.700 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass py_interpret_to_execute_after_opt_a end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.730 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_cell_reuse_recomputed_activation start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.755 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_cell_reuse_recomputed_activation end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.637.780 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass rewriter_after_opt_a start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.218 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass rewriter_after_opt_a end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.269 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass convert_after_rewriter start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.327 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass convert_after_rewriter end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.356 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass order_py_execute_after_rewriter start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.400 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass order_py_execute_after_rewriter end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.446 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_b start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.481 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.503 [mindspore/ccsrc/frontend/optimizer/opt.cc:395] operator()] SUB >> IR, opt_b_r1_b_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.522 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: zero_like_fill_zero [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.595 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: zero_like_fill_zero, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.617 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: list_to_tuple_eliminator_ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.679 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: list_to_tuple_eliminator_, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.698 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_to_list_eliminator_ [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.759 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_to_list_eliminator_, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.778 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.862 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.883 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_const_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.957 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_const_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.638.976 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_set_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.039 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.058 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_set_item_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.135 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_set_item_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.158 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_get_item_depend_reorder [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.233 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_get_item_depend_reorder, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.254 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: tuple_list_convert_item_index_to_positive [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.334 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: tuple_list_convert_item_index_to_positive, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.355 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: make_slice_get_slice_eliminator [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.436 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: make_slice_get_slice_eliminator, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.455 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: float_tuple_getitem_switch [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.527 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: float_tuple_getitem_switch, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.546 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: reset_defer_inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.609 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: reset_defer_inline, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.627 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: inline [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.690 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: inline, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.709 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_useless_node_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.770 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_useless_node_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.788 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: updatestate_pure_node_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.850 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: updatestate_pure_node_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.869 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: load_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.930 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: load_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.639.948 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: stopgrad_eliminater [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.008 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: stopgrad_eliminater, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.028 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.088 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: special_op_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.107 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.167 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.184 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_add_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.244 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_add_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.270 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_set_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.331 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_set_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.350 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_get_depend_swap [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.409 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_get_depend_swap, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.427 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: environ_add_const_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.486 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: environ_add_const_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.504 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: value_based_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.565 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: value_based_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.582 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: parallel_virtual_node [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.654 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: parallel_virtual_node, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.675 [mindspore/ccsrc/frontend/optimizer/opt.cc:340] ApplySubstitutionsToIR] Start substitution: const_output_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.737 [mindspore/ccsrc/frontend/optimizer/opt.cc:347] ApplySubstitutionsToIR] End substitution: const_output_eliminate, change: 0 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.764 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_1.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.792 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.b_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.813 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_b_r1_b_2 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.889 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.b_2.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.913 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.969 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.640.992 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.039 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.061 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.107 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.138 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.162 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.renormalize.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.183 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_b.r1.cse [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.353 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_b.r1.cse.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.386 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_b end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.413 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass optimize_parallel_all_gather_comm start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.473 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass optimize_parallel_all_gather_comm end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.501 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_param_gather start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.521 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_param_gather end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.544 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cconv start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.612 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cconv end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.641.642 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass loop_unroll start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.420 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start loop_unroll_optimizer.r1.loop_unroll [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.465 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, loop_unroll_optimizer_r1_loop_unroll [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.554 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End loop_unroll_optimizer.r1.loop_unroll.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.589 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass loop_unroll end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.620 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass opt_after_cconv start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.645 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.c_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.665 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_after_cconv_r1_c_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.954 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.c_1.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.642.980 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.parameter_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.006 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.parameter_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.028 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_depend_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.080 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_depend_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.117 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_assign_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.165 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_assign_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.188 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.updatestate_loads_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.233 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.updatestate_loads_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.255 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.cse [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.411 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.cse.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.440 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_cconv.r1.renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.464 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_cconv.r1.renormalize.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.487 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass opt_after_cconv end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.513 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_dup_value start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.839 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_dup_value end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.875 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass tuple_transform start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.900 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.d_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.643.921 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, opt_trans_graph_r1_d_1 [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.289 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.d_1.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.314 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_trans_graph.r1.renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.337 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_trans_graph.r1.renormalize.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.361 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass tuple_transform end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.385 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass partial_unused_args_eliminate start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.407 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass partial_unused_args_eliminate end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.430 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_cache_embedding start ... [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.483 [mindspore/ccsrc/frontend/parallel/cache_embedding/cache_embedding.cc:706] AddCacheEmbedding] Parameters are all not cache enable. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.507 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_cache_embedding end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.541 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_recomputation start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.832 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_recomputation end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.867 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass cse_after_recomputation start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.896 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start opt_after_recompute.r1.cse [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.644.993 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End opt_after_recompute.r1.cse.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.024 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass cse_after_recomputation end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.049 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass environ_conv start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.121 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass environ_conv end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.150 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass swap_dp_allreduce_reducescatter start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.196 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass swap_dp_allreduce_reducescatter end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.224 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass bias_add_comm_swap start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.246 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass bias_add_comm_swap end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.269 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_micro_interleaved_index start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.290 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_micro_interleaved_index end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.313 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass label_fine_grained_interleaved_index start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.336 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass label_fine_grained_interleaved_index end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.359 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass merge_cast_opt start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.378 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass merge_cast_opt end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.400 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass slice_recompute_activation start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.450 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass slice_recompute_activation end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.477 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass micro_interleaved_order_control start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.497 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass micro_interleaved_order_control end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.519 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass assign_add_opt start ... [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.618 [mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.cc:466] AssignAddOpt] Merge multi matmul assign add begin and concat eliminate enable flag is:0 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.675 [mindspore/ccsrc/frontend/parallel/pass/pass_utils.cc:122] ExtractBackwardMatMul] backward_matmul_dx_dw_map size:0 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.723 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass assign_add_opt end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.749 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass ForceFp32Comm start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.769 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass ForceFp32Comm end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.791 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass remove_cast_before_assign_add start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.833 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass remove_cast_before_assign_add end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.860 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass full_micro_interleaved_order_control start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.881 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass full_micro_interleaved_order_control end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.904 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass reorder_send_recv_between_fp_bp start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.924 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass reorder_send_recv_between_fp_bp end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.645.946 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass comm_op_add_attrs start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.029 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass comm_op_add_attrs end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.056 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass add_comm_op_reuse_tag start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.140 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass add_comm_op_reuse_tag end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.167 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_split_concat_branches start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.187 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_split_concat_branches end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.209 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass interleave_parallel_branches start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.228 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass interleave_parallel_branches end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.250 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_in_pipeline start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.301 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_in_pipeline end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.325 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_opt_shard_grad_in_pipeline start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.347 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_opt_shard_grad_in_pipeline end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.370 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass control_data_broadcast_order start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.397 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass control_data_broadcast_order end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.420 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass grouped_pairwise_exchange_alltoall start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.452 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass grouped_pairwise_exchange_alltoall end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.475 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass offloading_packed_experts start ... [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.494 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:746] SetOffloadingPackedExpert] pass if (parallel::g_device_manager == nullptr) [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.513 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:707] CheckUserSettings] To activate the pass, set_auto_parallel_context 'enable_alltoall' should be true [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.530 [mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.cc:751] SetOffloadingPackedExpert] CheckUserSettings_not_pass [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.549 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass offloading_packed_experts end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.571 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_and_grad_model_parallel start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.591 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_and_grad_model_parallel end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.613 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_matmul_and_grad_allreduce start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.631 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_matmul_and_grad_allreduce end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.652 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_recompute_allgather_and_fa_grad start ... [WARNING] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.672 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.691 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_recompute_allgather_and_fa_grad end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.713 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_ring_attention start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.776 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_ring_attention end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.802 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass overlap_grad_flash_sp start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.857 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass overlap_grad_flash_sp end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.882 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass begin_end_overlap_inline start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.900 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass begin_end_overlap_inline end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.919 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_matmul_comm_elemetwise start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.948 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_matmul_comm_elemetwise end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.972 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass split_layernorm_comm start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.646.993 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass split_layernorm_comm end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.016 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass handle_group_info start ... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.039 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass handle_group_info end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.062 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1296] operator()] Pass symbol_engine_optimizer start ... [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.088 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.build [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.130 [mindspore/ccsrc/frontend/optimizer/irpass/symbol_engine_optimizer.cc:39] operator()] There is no dynamic shape node, the SymbolEngineBuilder is disabled. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.154 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.build.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.177 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_shapecalc [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.199 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_shapecalc [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.268 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_shapecalc.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.293 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.elim_not_effective [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.312 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_elim_not_effective [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.403 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.elim_not_effective.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.426 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.opt_reshape [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.446 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_opt_reshape [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.508 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.opt_reshape.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.531 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.fold_const_symbol [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.551 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, symbol_engine_opt_r1_fold_const_symbol [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.638 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.fold_const_symbol.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.660 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start symbol_engine_opt.r1.renormalize [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.681 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End symbol_engine_opt.r1.renormalize.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.719 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1321] operator()] Pass symbol_engine_optimizer end. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.749 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end optimize action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.769 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.805 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start pipeline_parallel_scheduler action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.828 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end pipeline_parallel_scheduler action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.846 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.872 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start auto_monad_reorder action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.968 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end auto_monad_reorder action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.647.989 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.016 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start get_jit_bprop_graph action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.036 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end get_jit_bprop_graph action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.054 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.079 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.097 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end rewriter_after_jit_bprop_graph action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.115 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.139 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start eliminate_special_op_node action. [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.799 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.ad_related_special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.847 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_ad_related_special_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.924 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.ad_related_special_op_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.953 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.mutable_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.648.972 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_mutable_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.047 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.mutable_op_eliminate.unchanged [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.073 [mindspore/ccsrc/frontend/optimizer/optimizer.h:240] operator()] Start special_op_eliminate.r1.convert_tensor_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.093 [mindspore/ccsrc/frontend/optimizer/opt.cc:391] operator()] IR >> SUB, *, special_op_eliminate_r1_convert_tensor_op_eliminate [INFO] OPTIMIZER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.152 [mindspore/ccsrc/frontend/optimizer/optimizer.h:244] operator()] End special_op_eliminate.r1.convert_tensor_op_eliminate.unchanged [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.181 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end eliminate_special_op_node action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.201 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.235 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start distribtued_split action. [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.289 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:378] GenerateStrategy] Current parallel mode is semi_auto_parallel [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.310 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:390] GenerateStrategy] Generated distributed strategy is 1 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.479 [mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc:1277] Run] All nodes are on this process so there's no need to build and split distributed graph. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.509 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end distribtued_split action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.528 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.559 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start validate action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.699 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end validate action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.721 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PROFILER(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.820 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc:49] IsProfilingParallelStrategyEnabled] Profiling parallel strategy is disabled. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.649.911 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start task_emit action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.650.139 [mindspore/ccsrc/pipeline/jit/ps/action.cc:1679] SetRunMode] Run graph mode with kernel by kernel by configuration. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:04.650.340 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1057] CompileGraphs] Status record: start compile function graph: 4_3_1___main___Net_construct_20 [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:04.650.657 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:04.651.143 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: 4_3_1___main___Net_construct_20 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:04.651.409 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-12944936785892925600, the max communication size is 1 MB. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:04.651.439 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-12944936785892925600, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:04.651.472 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-12944936785892925600. Call 'WaitCommInitDone' later to wait initialization to be done. [WARNING] DEVICE(187742,fffe7b7fe0f0,python):2025-02-07-15:58:04.730.518 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [INFO] DEVICE(187742,fffe7b7fe0f0,python):2025-02-07-15:58:04.730.622 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:90] Initialize] Start initializing hccl watchdog on device side... [INFO] DEVICE(187742,fffe7b7fe0f0,python):2025-02-07-15:58:04.730.653 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:54] Initialize] Initialize hccl watch dog handler. global rank id: 0 local rank id: 0, global rank size: 8 [INFO] DEVICE(187742,fffe7b7fe0f0,python):2025-02-07-15:58:04.730.747 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:95] Initialize] hccl watchdog on device side is successfully initialized. [INFO] DEVICE(187742,fffdfd7fa0f0,python):2025-02-07-15:58:04.730.777 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:119] WatchDogProcess] WatchDogProcess start [INFO] DEVICE(187742,fffdfd7fa0f0,python):2025-02-07-15:58:04.730.818 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:101] DoProcess] Start check watch dog thread in every 2s . [INFO] DEVICE(187742,fffe7b7fe0f0,python):2025-02-07-15:58:04.730.819 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group hccl_world_group [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.730.931 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 655.926 msec. [WARNING] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.730.967 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.731.001 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-16453000547691086251 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.731.018 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-16453000547691086251 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.732.551 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 1.475 msec. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.732.848 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-16453000547691086251 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.732.880 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.272 msec. [WARNING] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.732.895 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-16453000547691086251 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.732.916 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187742,fffdfcff90f0,python):2025-02-07-15:58:04.733.176 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-16453000547691086251, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:04.800.719 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [INFO] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:04.800.806 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:90] Initialize] Start initializing hccl watchdog on device side... [INFO] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:04.800.836 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:54] Initialize] Initialize hccl watch dog handler. global rank id: 6 local rank id: 6, global rank size: 8 [INFO] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:04.801.400 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:95] Initialize] hccl watchdog on device side is successfully initialized. [INFO] DEVICE(187818,fffe16ffd0f0,python):2025-02-07-15:58:04.801.427 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:119] WatchDogProcess] WatchDogProcess start [INFO] DEVICE(187818,fffe16ffd0f0,python):2025-02-07-15:58:04.801.468 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:101] DoProcess] Start check watch dog thread in every 2s . [INFO] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:04.801.481 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group hccl_world_group [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.801.568 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 390.591 msec. [WARNING] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.801.620 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.801.660 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-511848487187618470 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.801.679 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-511848487187618470 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.801.865 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.142 msec. [WARNING] DEVICE(187818,fffeba7fc0f0,python):2025-02-07-15:58:04.802.141 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group 2-511848487187618470 from the meta server node...Retry time: 399/400, sleep 1 [WARNING] DEVICE(187775,fffeae7fc0f0,python):2025-02-07-15:58:04.832.602 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [INFO] DEVICE(187775,fffeae7fc0f0,python):2025-02-07-15:58:04.832.752 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:90] Initialize] Start initializing hccl watchdog on device side... [INFO] DEVICE(187775,fffeae7fc0f0,python):2025-02-07-15:58:04.832.783 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:54] Initialize] Initialize hccl watch dog handler. global rank id: 3 local rank id: 3, global rank size: 8 [INFO] DEVICE(187775,fffeae7fc0f0,python):2025-02-07-15:58:04.833.247 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:95] Initialize] hccl watchdog on device side is successfully initialized. [INFO] DEVICE(187775,fffead7fa0f0,python):2025-02-07-15:58:04.833.284 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:119] WatchDogProcess] WatchDogProcess start [INFO] DEVICE(187775,fffead7fa0f0,python):2025-02-07-15:58:04.833.334 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:101] DoProcess] Start check watch dog thread in every 2s . [INFO] DEVICE(187775,fffeae7fc0f0,python):2025-02-07-15:58:04.833.338 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group hccl_world_group [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.833.459 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 580.977 msec. [WARNING] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.833.502 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.833.539 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-5488101015797526856 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.833.555 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-5488101015797526856 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.836.975 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 3.363 msec. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.837.181 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-5488101015797526856 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.837.224 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.205 msec. [WARNING] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.837.241 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5488101015797526856 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.837.261 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:04.837.538 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5488101015797526856, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:04.891.977 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [INFO] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:04.892.063 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:90] Initialize] Start initializing hccl watchdog on device side... [INFO] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:04.892.095 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:54] Initialize] Initialize hccl watch dog handler. global rank id: 7 local rank id: 7, global rank size: 8 [INFO] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:04.892.513 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:95] Initialize] hccl watchdog on device side is successfully initialized. [INFO] DEVICE(187834,fffea57fa0f0,python):2025-02-07-15:58:04.892.552 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:119] WatchDogProcess] WatchDogProcess start [INFO] DEVICE(187834,fffea57fa0f0,python):2025-02-07-15:58:04.892.591 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:101] DoProcess] Start check watch dog thread in every 2s . [INFO] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:04.892.601 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group hccl_world_group [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.892.736 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 784.662 msec. [WARNING] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.892.777 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.892.813 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-5488101015797526856 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.892.830 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-5488101015797526856 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.893.009 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.139 msec. [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.893.374 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-5488101015797526856 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.893.401 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.364 msec. [WARNING] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.893.418 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5488101015797526856 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:04.893.452 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:04.893.707 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5488101015797526856, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:04.911.137 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [INFO] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:04.911.218 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:90] Initialize] Start initializing hccl watchdog on device side... [INFO] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:04.911.249 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:54] Initialize] Initialize hccl watch dog handler. global rank id: 4 local rank id: 4, global rank size: 8 [INFO] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:04.911.907 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:95] Initialize] hccl watchdog on device side is successfully initialized. [INFO] DEVICE(187789,fffe9d7fa0f0,python):2025-02-07-15:58:04.911.932 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:119] WatchDogProcess] WatchDogProcess start [INFO] DEVICE(187789,fffe9d7fa0f0,python):2025-02-07-15:58:04.911.962 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:101] DoProcess] Start check watch dog thread in every 2s . [INFO] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:04.911.994 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group hccl_world_group [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.099 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 598.888 msec. [WARNING] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.142 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.179 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-16453000547691086251 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.198 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-16453000547691086251 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.381 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.139 msec. [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.712 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-16453000547691086251 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.759 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.347 msec. [WARNING] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.779 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-16453000547691086251 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:04.912.800 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:04.913.068 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-16453000547691086251, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187764,fffe057fa0f0,python):2025-02-07-15:58:04.916.845 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [INFO] DEVICE(187764,fffe057fa0f0,python):2025-02-07-15:58:04.916.914 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:90] Initialize] Start initializing hccl watchdog on device side... [INFO] DEVICE(187764,fffe057fa0f0,python):2025-02-07-15:58:04.916.939 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:54] Initialize] Initialize hccl watch dog handler. global rank id: 2 local rank id: 2, global rank size: 8 [INFO] DEVICE(187764,fffe057fa0f0,python):2025-02-07-15:58:04.917.012 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:95] Initialize] hccl watchdog on device side is successfully initialized. [INFO] DEVICE(187764,fffdeffff0f0,python):2025-02-07-15:58:04.917.064 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:119] WatchDogProcess] WatchDogProcess start [INFO] DEVICE(187764,fffe057fa0f0,python):2025-02-07-15:58:04.917.083 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group hccl_world_group [INFO] DEVICE(187764,fffdeffff0f0,python):2025-02-07-15:58:04.917.118 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:101] DoProcess] Start check watch dog thread in every 2s . [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.917.168 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 386.229 msec. [WARNING] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.917.201 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.917.238 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-511848487187618470 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.917.257 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-511848487187618470 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.920.729 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 3.412 msec. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.920.867 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-511848487187618470 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.920.898 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.133 msec. [WARNING] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.920.916 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-511848487187618470 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:04.920.938 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187764,fffdeeffd0f0,python):2025-02-07-15:58:04.921.233 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-511848487187618470, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:04.933.872 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [INFO] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:04.933.957 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:90] Initialize] Start initializing hccl watchdog on device side... [INFO] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:04.933.990 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:54] Initialize] Initialize hccl watch dog handler. global rank id: 5 local rank id: 5, global rank size: 8 [INFO] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:04.934.089 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:95] Initialize] hccl watchdog on device side is successfully initialized. [INFO] DEVICE(187803,fffe85ffb0f0,python):2025-02-07-15:58:04.934.156 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:119] WatchDogProcess] WatchDogProcess start [INFO] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:04.934.168 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group hccl_world_group [INFO] DEVICE(187803,fffe85ffb0f0,python):2025-02-07-15:58:04.934.216 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:101] DoProcess] Start check watch dog thread in every 2s . [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.934.293 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 595.301 msec. [WARNING] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.934.334 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.934.374 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-12944936785892925600 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.934.393 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-12944936785892925600 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:04.934.580 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.141 msec. [WARNING] DEVICE(187803,fffe877fe0f0,python):2025-02-07-15:58:04.934.870 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group 2-12944936785892925600 from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DEVICE(187753,fffe767fc0f0,python):2025-02-07-15:58:04.935.869 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [INFO] DEVICE(187753,fffe767fc0f0,python):2025-02-07-15:58:04.935.963 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:90] Initialize] Start initializing hccl watchdog on device side... [INFO] DEVICE(187753,fffe767fc0f0,python):2025-02-07-15:58:04.935.994 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:54] Initialize] Initialize hccl watch dog handler. global rank id: 1 local rank id: 1, global rank size: 8 [INFO] DEVICE(187753,fffe767fc0f0,python):2025-02-07-15:58:04.936.084 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:95] Initialize] hccl watchdog on device side is successfully initialized. [INFO] DEVICE(187753,fffe757fa0f0,python):2025-02-07-15:58:04.936.136 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:119] WatchDogProcess] WatchDogProcess start [INFO] DEVICE(187753,fffe767fc0f0,python):2025-02-07-15:58:04.936.153 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group hccl_world_group [INFO] DEVICE(187753,fffe757fa0f0,python):2025-02-07-15:58:04.936.196 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:101] DoProcess] Start check watch dog thread in every 2s . [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.936.276 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 583.006 msec. [WARNING] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.936.313 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.936.344 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-12944936785892925600 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.936.360 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-12944936785892925600 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.941.220 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 4.805 msec. [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.941.361 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-12944936785892925600 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.941.399 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.139 msec. [WARNING] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.941.414 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-12944936785892925600 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:04.941.432 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187753,fffdd67fc0f0,python):2025-02-07-15:58:04.941.731 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-12944936785892925600, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:04.953.345 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5488101015797526856 [INFO] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:04.953.435 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-5488101015797526856 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.953.496 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 116.189 msec. [WARNING] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.953.531 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5488101015797526856 [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:04.953.686 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-4190060298023907007, the max communication size is 1 MB. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:04.953.721 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-4190060298023907007, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:04.953.742 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-4190060298023907007. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.953.761 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-4190060298023907007 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.953.777 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-4190060298023907007 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.953.916 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.105 msec. [WARNING] DEVICE(187775,fffeaeffd0f0,python):2025-02-07-15:58:04.954.104 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group 2-4190060298023907007 from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DEVICE(187742,fffdfcff90f0,python):2025-02-07-15:58:04.969.629 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-16453000547691086251 [INFO] DEVICE(187742,fffdfcff90f0,python):2025-02-07-15:58:04.969.743 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-16453000547691086251 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.969.830 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 236.847 msec. [WARNING] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.969.863 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-16453000547691086251 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:04.970.026 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-5208665662337742843, the max communication size is 1 MB. [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:04.970.061 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-5208665662337742843, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:04.970.083 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-5208665662337742843. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.970.104 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-5208665662337742843 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.970.119 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-5208665662337742843 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.970.706 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.536 msec. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.970.833 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-5208665662337742843 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.970.859 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.119 msec. [WARNING] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.970.882 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5208665662337742843 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:04.970.900 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187742,fffde7fff0f0,python):2025-02-07-15:58:04.971.133 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5208665662337742843, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:05.130.022 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5488101015797526856 [INFO] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:05.130.120 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-5488101015797526856 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:05.130.173 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 236.683 msec. [WARNING] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:05.130.202 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5488101015797526856 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:05.130.361 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-16057586909177180503, the max communication size is 1 MB. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:05.130.386 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-16057586909177180503, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:05.130.410 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-16057586909177180503. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:05.130.431 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-16057586909177180503 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:05.130.448 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-16057586909177180503 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:05.130.587 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.105 msec. [WARNING] DEVICE(187834,fffea6ffd0f0,python):2025-02-07-15:58:05.130.745 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group 2-16057586909177180503 from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:05.150.114 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-16453000547691086251 [INFO] DEVICE(187789,fffe9e7fc0f0,python):2025-02-07-15:58:05.150.214 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-16453000547691086251 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.150.280 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 237.431 msec. [WARNING] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.150.315 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-16453000547691086251 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:05.150.471 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-5435772415009061329, the max communication size is 1 MB. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:05.150.494 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-5435772415009061329, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:05.150.519 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-5435772415009061329. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.150.542 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-5435772415009061329 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.150.561 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-5435772415009061329 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.153.173 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 2.56 msec. [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.153.301 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-5435772415009061329 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.153.334 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.121 msec. [WARNING] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.153.352 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5435772415009061329 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.153.383 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187789,fffdfe7fc0f0,python):2025-02-07-15:58:05.153.651 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5435772415009061329, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.302.431 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-511848487187618470 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.302.484 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 500.563 msec. [WARNING] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.302.505 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-511848487187618470 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.302.530 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:05.302.867 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-511848487187618470, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187764,fffdeeffd0f0,python):2025-02-07-15:58:05.365.523 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-511848487187618470 [INFO] DEVICE(187764,fffdeeffd0f0,python):2025-02-07-15:58:05.365.630 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-511848487187618470 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.365.705 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 444.711 msec. [WARNING] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.365.740 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-511848487187618470 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:05.365.891 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-5208665662337742843, the max communication size is 1 MB. [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:05.365.933 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-5208665662337742843, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:05.365.961 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-5208665662337742843. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.365.987 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-5208665662337742843 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.366.006 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-5208665662337742843 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.366.168 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.12 msec. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.366.366 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-5208665662337742843 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.366.404 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.208 msec. [WARNING] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.366.423 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5208665662337742843 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.366.444 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187764,fffdeeffd0f0,python):2025-02-07-15:58:05.366.658 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5208665662337742843, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187764,fffdeeffd0f0,python):2025-02-07-15:58:05.421.057 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5208665662337742843 [INFO] DEVICE(187764,fffdeeffd0f0,python):2025-02-07-15:58:05.421.143 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-5208665662337742843 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.421.185 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 54.713 msec. [WARNING] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.421.206 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5208665662337742843 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:05.421.289 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-3358271254418797552, the max communication size is 1 MB. [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:05.421.316 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-3358271254418797552, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:05.421.340 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-3358271254418797552. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.421.362 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-3358271254418797552 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.421.380 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-3358271254418797552 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.421.917 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.499 msec. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.422.013 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-3358271254418797552 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.422.042 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.093 msec. [WARNING] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.422.061 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-3358271254418797552 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:05.422.092 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187764,fffdee7fc0f0,python):2025-02-07-15:58:05.422.336 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-3358271254418797552, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187742,fffde7fff0f0,python):2025-02-07-15:58:05.425.325 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5208665662337742843 [INFO] DEVICE(187742,fffde7fff0f0,python):2025-02-07-15:58:05.425.418 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-5208665662337742843 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.425.467 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 454.534 msec. [WARNING] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.425.490 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5208665662337742843 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:05.425.576 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-5004544844489628105, the max communication size is 1 MB. [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:05.425.597 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-5004544844489628105, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:05.425.616 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-5004544844489628105. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.425.635 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-5004544844489628105 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.425.649 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-5004544844489628105 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.426.155 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.466 msec. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.426.259 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-5004544844489628105 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.426.285 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.098 msec. [WARNING] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.426.299 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5004544844489628105 [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:05.426.316 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187742,fffde77fe0f0,python):2025-02-07-15:58:05.426.569 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5004544844489628105, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.435.118 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-12944936785892925600 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.435.185 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 500.564 msec. [WARNING] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.435.207 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-12944936785892925600 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.435.233 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:05.435.526 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-12944936785892925600, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187775,fffeaeffd0f0,python):2025-02-07-15:58:05.454.285 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group 2-4190060298023907007 from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(187753,fffdd67fc0f0,python):2025-02-07-15:58:05.493.491 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-12944936785892925600 [INFO] DEVICE(187753,fffdd67fc0f0,python):2025-02-07-15:58:05.493.597 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-12944936785892925600 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.493.663 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 552.181 msec. [WARNING] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.493.694 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-12944936785892925600 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:05.493.866 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-4190060298023907007, the max communication size is 1 MB. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:05.493.899 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-4190060298023907007, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:05.493.922 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-4190060298023907007. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.493.943 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-4190060298023907007 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.493.959 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-4190060298023907007 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.494.508 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.504 msec. [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.494.617 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-4190060298023907007 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.494.644 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.103 msec. [WARNING] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.494.670 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-4190060298023907007 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:05.494.686 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187753,fffdd5ffb0f0,python):2025-02-07-15:58:05.494.927 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-4190060298023907007, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:05.546.152 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-511848487187618470 [INFO] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:05.546.268 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-511848487187618470 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.546.348 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 243.76 msec. [WARNING] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.546.389 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-511848487187618470 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:05.546.553 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-5435772415009061329, the max communication size is 1 MB. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:05.546.598 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-5435772415009061329, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:05.546.625 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-5435772415009061329. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.546.650 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-5435772415009061329 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.546.670 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-5435772415009061329 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.546.840 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.13 msec. [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.547.025 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-5435772415009061329 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.547.053 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.172 msec. [WARNING] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.547.071 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5435772415009061329 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.547.093 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:05.547.315 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5435772415009061329, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187789,fffdfe7fc0f0,python):2025-02-07-15:58:05.605.259 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5435772415009061329 [INFO] DEVICE(187789,fffdfe7fc0f0,python):2025-02-07-15:58:05.605.356 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-5435772415009061329 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.605.413 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 451.988 msec. [WARNING] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.605.445 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5435772415009061329 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:05.605.531 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-6541264347459079684, the max communication size is 1 MB. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:05.605.553 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-6541264347459079684, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:05.605.576 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-6541264347459079684. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.605.599 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-6541264347459079684 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.605.618 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-6541264347459079684 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.606.156 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.494 msec. [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.606.241 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-6541264347459079684 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.606.270 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.08 msec. [WARNING] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.606.287 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-6541264347459079684 [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:05.606.307 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187789,fffdfdffb0f0,python):2025-02-07-15:58:05.606.537 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-6541264347459079684, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187834,fffea6ffd0f0,python):2025-02-07-15:58:05.630.924 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group 2-16057586909177180503 from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:05.674.296 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-12944936785892925600 [INFO] DEVICE(187803,fffe86ffd0f0,python):2025-02-07-15:58:05.674.399 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-12944936785892925600 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.674.454 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 239.181 msec. [WARNING] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.674.479 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-12944936785892925600 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:05.674.653 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-16057586909177180503, the max communication size is 1 MB. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:05.674.684 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-16057586909177180503, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:05.674.711 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-16057586909177180503. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.674.735 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-16057586909177180503 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.674.755 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-16057586909177180503 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.677.239 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 2.429 msec. [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.677.397 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-16057586909177180503 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.677.434 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.151 msec. [WARNING] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.677.507 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-16057586909177180503 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:05.677.591 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187803,fffde2ffd0f0,python):2025-02-07-15:58:05.677.858 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-16057586909177180503, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:05.789.756 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5435772415009061329 [INFO] DEVICE(187818,fffe17fff0f0,python):2025-02-07-15:58:05.789.850 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-5435772415009061329 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.789.900 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 242.773 msec. [WARNING] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.789.928 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5435772415009061329 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:05.790.018 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-6853331267304275293, the max communication size is 1 MB. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:05.790.050 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-6853331267304275293, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:05.790.073 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-6853331267304275293. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.790.097 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-6853331267304275293 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.790.117 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-6853331267304275293 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.792.546 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 2.384 msec. [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.792.706 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-6853331267304275293 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.792.737 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.153 msec. [WARNING] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.792.772 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-6853331267304275293 [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:05.792.794 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187818,fffe15ffb0f0,python):2025-02-07-15:58:05.793.066 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-6853331267304275293, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:05.954.498 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-4190060298023907007 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:05.954.553 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 1000.61 msec. [WARNING] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:05.954.571 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-4190060298023907007 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:05.954.589 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:05.954.813 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-4190060298023907007, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187753,fffdd5ffb0f0,python):2025-02-07-15:58:06.009.550 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-4190060298023907007 [INFO] DEVICE(187753,fffdd5ffb0f0,python):2025-02-07-15:58:06.009.647 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-4190060298023907007 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.009.710 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 514.977 msec. [WARNING] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:06.009.701 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-4190060298023907007 [WARNING] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.009.743 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-4190060298023907007 [INFO] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:06.009.798 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-4190060298023907007 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.009.847 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-5004544844489628105, the max communication size is 1 MB. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.009.856 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 55.226 msec. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.009.877 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-5004544844489628105, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:06.009.899 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-5004544844489628105. Call 'WaitCommInitDone' later to wait initialization to be done. [WARNING] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.009.897 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-4190060298023907007 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.009.922 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-5004544844489628105 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.009.940 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-5004544844489628105 [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.009.983 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-3358271254418797552, the max communication size is 1 MB. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.010.007 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-3358271254418797552, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:06.010.026 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-3358271254418797552. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.010.049 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-3358271254418797552 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.010.066 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-3358271254418797552 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.010.068 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.097 msec. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.010.202 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.103 msec. [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.010.246 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-5004544844489628105 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.010.274 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.177 msec. [WARNING] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.010.288 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-5004544844489628105 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.010.305 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.010.350 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-3358271254418797552 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.010.377 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.146 msec. [WARNING] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.010.393 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-3358271254418797552 [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.010.420 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187753,fffdd5ffb0f0,python):2025-02-07-15:58:06.010.514 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-5004544844489628105, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:06.010.621 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-3358271254418797552, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(187764,fffdee7fc0f0,python):2025-02-07-15:58:06.069.598 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-3358271254418797552 [INFO] DEVICE(187764,fffdee7fc0f0,python):2025-02-07-15:58:06.069.703 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-3358271254418797552 [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:06.069.773 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 647.627 msec. [WARNING] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:06.069.808 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-3358271254418797552 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.069.880 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:247] InitCommGroup] The MOC occupied by HCCL of graph: 4_3_1___main___Net_construct_20 is 1600 MB. [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.069.936 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1072] CompileGraphs] [PROF]InitCommGroup costs 1786.04 msec. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:06.070.049 [mindspore/ccsrc/distributed/collective/collective_manager.cc:833] WaitAllCommInitDone] All device communictor is initialized. You can launch communication operators after this step. [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.070.072 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1076] CompileGraphs] [PROF]WaitAllCommInit costs 0.028 msec. [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.070.253 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1347] IsEnableControlFlowInline] Disable switch inline, executor mode:1 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.070.288 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1180] DoUnifyMindIRPass] Do unify mindir pass for graph 4_3_1___main___Net_construct_20 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.070.321 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_pm_0_erase_invalid_micro_depend in 2.71 us [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.070.489 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:937] EnableKBKCompileCache] Disable backend compile cache by front config. [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.070.750 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:620] BuildSymbolEngine] Status record: skip build symbol engine for function graph: 4_3_1___main___Net_construct_20 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.070.796 [mindspore/ccsrc/backend/graph_compiler/graph_partition.cc:866] Partition] GraphPartion Info: 4_3_1___main___Net_construct_20 inline mode:0 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.071.074 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1519] CompileGraph] Compile graph: 4_3_1___main___Net_construct_20, Split segments size: 2 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.071.141 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1555] CompileGraphFromSegment] Compile normal segment, the first node: @4_3_1___main___Net_construct_20:CNode_21{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.071.431 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:629] CompileGraph] Status record: start compile graph. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.071.483 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:2227] ConstructKernelGraph] Create graph: 0 [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.072.550 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:3487] ConstructOutput] Output:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.072.835 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.072.980 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:637] CompileGraph] [PROF]ConstructKernelGraph costs 1.511 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.073.109 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: kernel_graph0 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.073.286 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:449] RecursiveSetRunMode] Kernel graph: kernel_graph0, set run mode:KernelMode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.073.440 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:191] EliminateIllegalDataTypePass] Start eliminate illegal data type for kernel graph id:0 [WARNING] DEVICE(187753,fffdd5ffb0f0,python):2025-02-07-15:58:06.073.431 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5004544844489628105 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.073.509 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_0_convert_list_to_tuple in 23.11 us [INFO] DEVICE(187753,fffdd5ffb0f0,python):2025-02-07-15:58:06.073.521 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-5004544844489628105 [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.073.560 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 63.22 msec. [WARNING] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:06.073.580 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5004544844489628105 [WARNING] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:06.073.552 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-3358271254418797552 [INFO] DEVICE(187775,fffe0e7fc0f0,python):2025-02-07-15:58:06.073.644 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-3358271254418797552 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.073.640 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:247] InitCommGroup] The MOC occupied by HCCL of graph: 4_3_1___main___Net_construct_20 is 1600 MB. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.073.685 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1072] CompileGraphs] [PROF]InitCommGroup costs 1447.63 msec. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.073.685 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 63.237 msec. [WARNING] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:06.073.710 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-3358271254418797552 [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:06.073.712 [mindspore/ccsrc/distributed/collective/collective_manager.cc:833] WaitAllCommInitDone] All device communictor is initialized. You can launch communication operators after this step. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.073.714 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_1_eliminate_func_type in 175.35 us [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.073.734 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1076] CompileGraphs] [PROF]WaitAllCommInit costs 0.025 msec. [WARNING] DEVICE(187742,fffde77fe0f0,python):2025-02-07-15:58:06.073.725 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-5004544844489628105 [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.073.765 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:247] InitCommGroup] The MOC occupied by HCCL of graph: 4_3_1___main___Net_construct_20 is 1600 MB. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.073.760 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:204] EliminateIllegalDataTypePass] [PROF]EliminateIllegalDataTypePass costs 0.317 msec. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.073.800 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1072] CompileGraphs] [PROF]InitCommGroup costs 1536.53 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.073.794 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:151] CommonUnifyMindIR] start common unify mindir opt graph:0 [INFO] DEVICE(187742,fffde77fe0f0,python):2025-02-07-15:58:06.073.820 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-5004544844489628105 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.073.825 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: conv_transpose_to_conv_backprop_input [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:06.073.825 [mindspore/ccsrc/distributed/collective/collective_manager.cc:833] WaitAllCommInitDone] All device communictor is initialized. You can launch communication operators after this step. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.073.846 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1076] CompileGraphs] [PROF]WaitAllCommInit costs 0.024 msec. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:06.073.870 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 647.515 msec. [WARNING] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:06.073.893 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-5004544844489628105 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.073.924 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1347] IsEnableControlFlowInline] Disable switch inline, executor mode:1 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.073.953 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:247] InitCommGroup] The MOC occupied by HCCL of graph: 4_3_1___main___Net_construct_20 is 1600 MB. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.073.960 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1180] DoUnifyMindIRPass] Do unify mindir pass for graph 4_3_1___main___Net_construct_20 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.073.959 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_0_conv_transpose_to_conv_backprop_input in 130.53 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.073.986 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: custom_op_reg_info_to_attr [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.073.994 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_pm_0_erase_invalid_micro_depend in 3.83 us [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.073.996 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1072] CompileGraphs] [PROF]InitCommGroup costs 1753 msec. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:06.074.022 [mindspore/ccsrc/distributed/collective/collective_manager.cc:833] WaitAllCommInitDone] All device communictor is initialized. You can launch communication operators after this step. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.074.023 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1347] IsEnableControlFlowInline] Disable switch inline, executor mode:1 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.023 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_1_custom_op_reg_info_to_attr in 34.4 us [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.074.042 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1076] CompileGraphs] [PROF]WaitAllCommInit costs 0.022 msec. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.074.057 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1180] DoUnifyMindIRPass] Do unify mindir pass for graph 4_3_1___main___Net_construct_20 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.047 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Custom not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.067 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_2_inplace_assign_for_custom_op in 19.4 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.074.090 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_pm_0_erase_invalid_micro_depend in 3.4 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.106 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_attr_to_unify_mindir [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.074.157 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:937] EnableKBKCompileCache] Disable backend compile cache by front config. [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.074.219 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1347] IsEnableControlFlowInline] Disable switch inline, executor mode:1 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.241 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_3_convert_attr_to_unify_mindir in 130.32 us [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.074.253 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1180] DoUnifyMindIRPass] Do unify mindir pass for graph 4_3_1___main___Net_construct_20 [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.074.258 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:937] EnableKBKCompileCache] Disable backend compile cache by front config. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.074.286 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_pm_0_erase_invalid_micro_depend in 3.17 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.284 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:164] CommonUnifyMindIR] [PROF]CommonUnifyMindIR costs 0.485 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.320 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:81] BackendCommonOptimization] Status record: start common optimization. graph id: 0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.364 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_dynamic_broadcast_to [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.074.421 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:620] BuildSymbolEngine] Status record: skip build symbol engine for function graph: 4_3_1___main___Net_construct_20 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.074.445 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:937] EnableKBKCompileCache] Disable backend compile cache by front config. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.074.473 [mindspore/ccsrc/backend/graph_compiler/graph_partition.cc:866] Partition] GraphPartion Info: 4_3_1___main___Net_construct_20 inline mode:0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.480 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_0_convert_dynamic_broadcast_to in 112.1 us [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.074.517 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:620] BuildSymbolEngine] Status record: skip build symbol engine for function graph: 4_3_1___main___Net_construct_20 [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.074.567 [mindspore/ccsrc/backend/graph_compiler/graph_partition.cc:866] Partition] GraphPartion Info: 4_3_1___main___Net_construct_20 inline mode:0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.648 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_1_convert_const_input_to_attr in 138.18 us [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.074.705 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:620] BuildSymbolEngine] Status record: skip build symbol engine for function graph: 4_3_1___main___Net_construct_20 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.074.755 [mindspore/ccsrc/backend/graph_compiler/graph_partition.cc:866] Partition] GraphPartion Info: 4_3_1___main___Net_construct_20 inline mode:0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.781 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_2_custom_op_const_input_to_attr in 103.47 us [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.074.804 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1519] CompileGraph] Compile graph: 4_3_1___main___Net_construct_20, Split segments size: 2 [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.074.868 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1519] CompileGraph] Compile graph: 4_3_1___main___Net_construct_20, Split segments size: 2 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.074.878 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1555] CompileGraphFromSegment] Compile normal segment, the first node: @4_3_1___main___Net_construct_20:CNode_21{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.074.902 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_3_convert_const_input_to_tensor_input_for_print in 92.68 us [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.074.951 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1555] CompileGraphFromSegment] Compile normal segment, the first node: @4_3_1___main___Net_construct_20:CNode_21{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.075.052 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1519] CompileGraph] Compile graph: 4_3_1___main___Net_construct_20, Split segments size: 2 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.075.123 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1555] CompileGraphFromSegment] Compile normal segment, the first node: @4_3_1___main___Net_construct_20:CNode_21{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.075.143 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:629] CompileGraph] Status record: start compile graph. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.075.200 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:629] CompileGraph] Status record: start compile graph. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.075.211 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:2227] ConstructKernelGraph] Create graph: 0 [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.075.346 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:2227] ConstructKernelGraph] Create graph: 0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.075.347 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_4_convert_tuple_output_to_maketuple in 411.24 us [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.075.374 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:629] CompileGraph] Status record: start compile graph. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.075.384 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_5_convert_unused_tuple_para_to_make_tuple in 3.38 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.075.409 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_6_flatten_concat_fission is enabled. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.075.447 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:2227] ConstructKernelGraph] Create graph: 0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.075.536 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_6_flatten_concat_fission in 103.22 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.075.663 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_7_inset_input_structural_for_py_execute in 97.01 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.075.687 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_8_broadcast_to_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.075.799 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_8_broadcast_to_fusion in 88.11 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.076.106 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_9_add_attr_to_node in 275.76 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.076.134 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_10_replace_addn is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.076.253 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_10_replace_addn in 94.98 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.076.307 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:93] BackendCommonOptimization] [PROF]BackendCommonOptimization costs 1.982 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.076.330 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:100] BackendCommonOptimization] Status record: end common optimization. graph id: 0 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.076.333 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:3487] ConstructOutput] Output:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.076.380 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:258] OptimizationWithoutBackend] [PROF]OptimizationWithoutBackend costs 2.939 msec. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.076.447 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:3487] ConstructOutput] Output:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.076.572 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:3487] ConstructOutput] Output:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.076.617 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.076.759 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_0_renorm_split in 119.48 us [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.076.765 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.076.787 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:637] CompileGraph] [PROF]ConstructKernelGraph costs 1.606 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.076.791 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: reduce_axis_update [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.076.872 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.076.926 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: kernel_graph0 [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.076.929 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:637] CompileGraph] [PROF]ConstructKernelGraph costs 1.691 msec. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.037 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:637] CompileGraph] [PROF]ConstructKernelGraph costs 1.623 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.057 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_1_reduce_axis_update in 261.77 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.065 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: kernel_graph0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.085 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.113 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim ClipByNorm not exist in name to cnode [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.119 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:449] RecursiveSetRunMode] Kernel graph: kernel_graph0, set run mode:KernelMode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.135 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission in 25.76 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.158 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: space_to_batch_nd_attr_update [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.175 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: kernel_graph0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.200 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_3_space_to_batch_nd_attr_update in 39.09 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.224 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: batch_to_space_nd_attr_update [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.256 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_4_batch_to_space_nd_attr_update in 29.51 us [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.260 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:449] RecursiveSetRunMode] Kernel graph: kernel_graph0, set run mode:KernelMode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.290 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:191] EliminateIllegalDataTypePass] Start eliminate illegal data type for kernel graph id:0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.291 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdamWeightDecay not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.314 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_5_adam_weight_decay_unify_mindir in 31.47 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.357 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_6_add_depend_for_adamw in 22.88 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.364 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_0_convert_list_to_tuple in 24.69 us [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.368 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:449] RecursiveSetRunMode] Kernel graph: kernel_graph0, set run mode:KernelMode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.379 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_7_cdist_fission is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.399 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Cdist not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.422 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:191] EliminateIllegalDataTypePass] Start eliminate illegal data type for kernel graph id:0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.421 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_7_cdist_fission in 22.01 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.441 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.470 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim CdistGrad not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.492 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission in 21.45 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.495 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_0_convert_list_to_tuple in 25.88 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.511 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.532 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:191] EliminateIllegalDataTypePass] Start eliminate illegal data type for kernel graph id:0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.537 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion in 5.51 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.559 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.583 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion in 3.02 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.605 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_0_convert_list_to_tuple in 25.9 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.607 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.612 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_1_eliminate_func_type in 215.2 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.630 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_11_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir in 24.05 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.649 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.661 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:204] EliminateIllegalDataTypePass] [PROF]EliminateIllegalDataTypePass costs 0.367 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.669 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_12_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2 in 18.98 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.695 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:151] CommonUnifyMindIR] start common unify mindir opt graph:0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.716 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.725 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: conv_transpose_to_conv_backprop_input [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.726 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_1_eliminate_func_type in 199.92 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.738 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_13_sparse_softmax_cross_entropy_with_logits_unify_mindir in 46.93 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.774 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:204] EliminateIllegalDataTypePass] [PROF]EliminateIllegalDataTypePass costs 0.349 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.781 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutExt not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.808 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:151] CommonUnifyMindIR] start common unify mindir opt graph:0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.803 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_14_dropout_ext_unify_mindir1 in 41.99 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.823 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutGradExt not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.843 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_15_dropoutgrad_ext_unify_mindir in 18.25 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.837 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: conv_transpose_to_conv_backprop_input [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.854 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_1_eliminate_func_type in 215.51 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.861 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Dropout not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.874 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_0_conv_transpose_to_conv_backprop_input in 145.27 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.880 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_16_dropout_unify_mindir1 in 18.18 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.900 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: custom_op_reg_info_to_attr [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.902 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:204] EliminateIllegalDataTypePass] [PROF]EliminateIllegalDataTypePass costs 0.368 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.899 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: dropoutgrad_unify_mindir [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.936 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:151] CommonUnifyMindIR] start common unify mindir opt graph:0 [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.940 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_1_custom_op_reg_info_to_attr in 37.83 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.962 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Custom not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.077.966 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: conv_transpose_to_conv_backprop_input [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.944 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_17_dropoutgrad_unify_mindir in 41.51 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.981 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_2_inplace_assign_for_custom_op in 18.76 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.077.978 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchange not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.077.993 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_0_conv_transpose_to_conv_backprop_input in 151.86 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.000 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_18_neighbor_exchange_unify_mindir in 22.01 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.077.999 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_attr_to_unify_mindir [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.017 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: custom_op_reg_info_to_attr [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.021 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2 not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.043 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_19_neighbor_exchange_v2_unify_mindir in 21.53 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.061 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2Grad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.055 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_1_custom_op_reg_info_to_attr in 35.64 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.080 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_20_neighbor_exchange_v2_grad_unify_mindir in 17.65 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.076 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Custom not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.100 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAll not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.107 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_2_inplace_assign_for_custom_op in 29.64 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.124 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_attr_to_unify_mindir [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.118 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_0_conv_transpose_to_conv_backprop_input in 147.4 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.118 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_21_all_to_all_unify_mindir in 17.02 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.141 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAllV not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.143 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: custom_op_reg_info_to_attr [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.078.149 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_3_convert_attr_to_unify_mindir in 144.73 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.160 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_22_all_to_all_v_unify_mindir in 19.36 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.183 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_1_custom_op_reg_info_to_attr in 36.88 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.078.192 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:164] CommonUnifyMindIR] [PROF]CommonUnifyMindIR costs 0.492 msec. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.202 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Custom not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.208 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.220 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_2_inplace_assign_for_custom_op in 17.6 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.230 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_23_bn_split in 46.88 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.078.233 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:81] BackendCommonOptimization] Status record: start common optimization. graph id: 0 [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.236 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_attr_to_unify_mindir [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.248 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: bn_grad_unify_mindir [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.266 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_3_convert_attr_to_unify_mindir in 138.54 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.078.280 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_dynamic_broadcast_to [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.308 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:164] CommonUnifyMindIR] [PROF]CommonUnifyMindIR costs 0.496 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.315 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_24_bn_grad_unify_mindir in 62.84 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.349 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:81] BackendCommonOptimization] Status record: start common optimization. graph id: 0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.349 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.371 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_25_bn_grad_split in 32.22 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.388 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_3_convert_attr_to_unify_mindir in 146.31 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.391 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.078.395 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_0_convert_dynamic_broadcast_to in 110.86 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.396 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_dynamic_broadcast_to [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.412 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_26_batchnormgrad_to_bninfergrad in 20.02 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.430 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:164] CommonUnifyMindIR] [PROF]CommonUnifyMindIR costs 0.489 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.430 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.451 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.469 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:81] BackendCommonOptimization] Status record: start common optimization. graph id: 0 [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.478 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission in 24.77 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.498 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.515 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_dynamic_broadcast_to [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.517 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_28_batchnorm_to_bninfer in 18.24 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.523 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_0_convert_dynamic_broadcast_to in 122.51 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.537 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.559 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Lamb not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.078.563 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_1_convert_const_input_to_attr in 138.16 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.579 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge in 21.62 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.599 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Print not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.621 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_30_print_insert_placeholder_for_tensor_name in 21.61 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.635 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_0_convert_dynamic_broadcast_to in 114.62 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.644 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim GetNext not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.664 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_31_getnext_for_ge in 21.78 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.686 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNorm not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.078.695 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_2_custom_op_const_input_to_attr in 104.2 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.696 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_1_convert_const_input_to_attr in 145.57 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.706 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_32_sync_bn_split in 20.95 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.726 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.746 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_33_sync_bn_grad_split in 18.82 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.764 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.783 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdaptiveMaxPool2D not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.802 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_1_convert_const_input_to_attr in 139.53 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.801 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion in 19.94 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.078.815 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_3_convert_const_input_to_tensor_input_for_print in 94.59 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.839 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_2_custom_op_const_input_to_attr in 116.09 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.841 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AvgPoolGrad not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.862 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_35_avg_pool_grad_for_ge in 38.97 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.882 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.901 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.929 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion in 26.44 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.078.933 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_2_custom_op_const_input_to_attr in 103.68 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.948 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.968 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.078.971 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_3_convert_const_input_to_tensor_input_for_print in 107.02 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.078.988 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion in 19.59 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.079.051 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_3_convert_const_input_to_tensor_input_for_print in 93.67 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.079.247 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_38_add_attr_to_dump in 233.59 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.079.263 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_4_convert_tuple_output_to_maketuple in 415.21 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.079.299 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_5_convert_unused_tuple_para_to_make_tuple in 4.43 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.079.320 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_6_flatten_concat_fission is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.079.447 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_6_flatten_concat_fission in 105.29 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.079.444 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_4_convert_tuple_output_to_maketuple in 442.45 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.079.482 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_5_convert_unused_tuple_para_to_make_tuple in 4.13 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.079.486 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_4_convert_tuple_output_to_maketuple in 405.9 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.079.504 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_6_flatten_concat_fission is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.079.522 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_5_convert_unused_tuple_para_to_make_tuple in 4.19 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.079.544 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_6_flatten_concat_fission is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.079.568 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_7_inset_input_structural_for_py_execute in 93.75 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.079.590 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_8_broadcast_to_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.079.644 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_6_flatten_concat_fission in 118.38 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.079.643 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_39_ascend_mindir_op_adapter in 360.26 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.079.669 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_6_flatten_concat_fission in 103.55 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.079.674 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.079.698 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_8_broadcast_to_fusion in 87.82 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.079.701 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:263] DefineFlashAttentionPattern] Do FlashAttentionPattern V1. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.079.772 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_7_inset_input_structural_for_py_execute in 100.91 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.079.794 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_8_broadcast_to_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.079.791 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_7_inset_input_structural_for_py_execute in 95.38 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.079.816 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_8_broadcast_to_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.079.914 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_8_broadcast_to_fusion in 99.5 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.079.925 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_8_broadcast_to_fusion in 89.58 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.079.999 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 in 293.86 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.080.026 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.080.043 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_9_add_attr_to_node in 312.78 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.080.050 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:377] DefineFlashAttentionPattern] Do FlashAttentionPattern V2. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.080.072 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_10_replace_addn is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.080.201 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_10_replace_addn in 104.27 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.080.245 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:93] BackendCommonOptimization] [PROF]BackendCommonOptimization costs 2.007 msec. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.080.264 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:100] BackendCommonOptimization] Status record: end common optimization. graph id: 0 [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.080.301 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_9_add_attr_to_node in 343.95 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.080.311 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_9_add_attr_to_node in 366.89 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.080.327 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:258] OptimizationWithoutBackend] [PROF]OptimizationWithoutBackend costs 3.037 msec. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.080.332 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_10_replace_addn is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.080.342 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_10_replace_addn is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.080.336 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 in 282.49 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.080.363 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.080.458 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_10_replace_addn in 102.49 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.080.482 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_10_replace_addn in 114.54 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.080.501 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:93] BackendCommonOptimization] [PROF]BackendCommonOptimization costs 2.027 msec. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.080.531 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:100] BackendCommonOptimization] Status record: end common optimization. graph id: 0 [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.080.536 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:93] BackendCommonOptimization] [PROF]BackendCommonOptimization costs 2.182 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.080.555 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:100] BackendCommonOptimization] Status record: end common optimization. graph id: 0 [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.080.580 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:258] OptimizationWithoutBackend] [PROF]OptimizationWithoutBackend costs 3.049 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.080.594 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion in 207.23 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.080.606 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:258] OptimizationWithoutBackend] [PROF]OptimizationWithoutBackend costs 3.185 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.080.621 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.080.799 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_0_renorm_split in 184.71 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.080.831 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: reduce_axis_update [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.080.888 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce in 201.15 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.080.916 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.029 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_0_renorm_split in 117.54 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.062 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: reduce_axis_update [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.072 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_0_renorm_split in 126.45 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.104 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: reduce_axis_update [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.108 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_1_reduce_axis_update in 272.1 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.104 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm in 159.73 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.134 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.131 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.159 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim ClipByNorm not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.179 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission in 22.49 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.196 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: space_to_batch_nd_attr_update [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.238 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_3_space_to_batch_nd_attr_update in 38.34 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.259 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: batch_to_space_nd_attr_update [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.291 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_4_batch_to_space_nd_attr_update in 30.28 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.303 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion in 137.04 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.324 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdamWeightDecay not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.330 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.333 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_1_reduce_axis_update in 266.29 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.345 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_5_adam_weight_decay_unify_mindir in 31.41 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.360 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.383 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim ClipByNorm not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.393 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_6_add_depend_for_adamw in 27.2 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.402 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission in 22.02 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.399 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_1_reduce_axis_update in 289.71 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.412 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_7_cdist_fission is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.421 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: space_to_batch_nd_attr_update [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.426 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.430 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Cdist not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.448 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim ClipByNorm not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.467 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission in 22.63 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.462 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_3_space_to_batch_nd_attr_update in 38.23 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.447 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_7_cdist_fission in 17.62 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.462 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.477 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim CdistGrad not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.464 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion in 108.76 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.485 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: space_to_batch_nd_attr_update [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.483 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: batch_to_space_nd_attr_update [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.494 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission in 16.33 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.489 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.516 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_4_batch_to_space_nd_attr_update in 30.61 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.528 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_3_space_to_batch_nd_attr_update in 39.41 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.521 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.535 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdamWeightDecay not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.552 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_5_adam_weight_decay_unify_mindir in 17.35 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.549 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: batch_to_space_nd_attr_update [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.545 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion in 6.34 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.563 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.581 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_4_batch_to_space_nd_attr_update in 30.07 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.595 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_6_add_depend_for_adamw in 25.04 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.583 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion in 3.79 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.613 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdamWeightDecay not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.613 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_7_cdist_fission is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.610 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion in 97.18 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.606 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.623 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_11_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir in 19 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.635 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_5_adam_weight_decay_unify_mindir in 33.2 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.630 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Cdist not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.646 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_7_cdist_fission in 15.9 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.637 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.639 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.656 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_12_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2 in 16.3 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.661 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.676 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim CdistGrad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.678 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_6_add_depend_for_adamw in 25.29 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.700 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_7_cdist_fission is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.691 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission in 13.82 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.703 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.718 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Cdist not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.722 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.725 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_13_sparse_softmax_cross_entropy_with_logits_unify_mindir in 51.49 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.745 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion in 6.52 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.761 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.734 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_7_cdist_fission in 16.17 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.750 [mindspo[INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.778 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutExt not exist in name to cnode re/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.784 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim CdistGrad not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.780 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion in 4.25 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.798 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_14_dropout_ext_unify_mindir1 in 53.91 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.801 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission in 16.62 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.803 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.816 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutGradExt not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.816 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.820 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_11_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir in 19.32 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.836 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.834 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_15_dropoutgrad_ext_unify_mindir in 17.82 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.851 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Dropout not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.840 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion in 7.09 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.857 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.850 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_12_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2 in 13.21 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.867 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_16_dropout_unify_mindir1 in 14.64 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.883 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: dropoutgrad_unify_mindir [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.876 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion in 4.35 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.886 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion in 224.09 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.899 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.898 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.917 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_11_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir in 21.64 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.081.913 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.916 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_13_sparse_softmax_cross_entropy_with_logits_unify_mindir in 48.05 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.927 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_17_dropoutgrad_unify_mindir in 40.82 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.934 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.950 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchange not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.081.950 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_12_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2 in 14.87 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.973 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutExt not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.981 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_18_neighbor_exchange_unify_mindir in 30.48 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.081.993 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_14_dropout_ext_unify_mindir1 in 57.39 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.009 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutGradExt not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.004 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.081.998 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2 not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.014 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_19_neighbor_exchange_v2_unify_mindir in 15.35 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.025 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_15_dropoutgrad_ext_unify_mindir in 14.62 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.025 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_13_sparse_softmax_cross_entropy_with_logits_unify_mindir in 54.22 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.029 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2Grad not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.045 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_20_neighbor_exchange_v2_grad_unify_mindir in 14.93 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.040 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Dropout not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.056 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_16_dropout_unify_mindir1 in 15.14 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.059 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutExt not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.060 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAll not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.069 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: dropoutgrad_unify_mindir [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.076 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_14_dropout_ext_unify_mindir1 in 32.33 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.077 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_21_all_to_all_unify_mindir in 15.99 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.096 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAllV not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.095 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutGradExt not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.113 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_15_dropoutgrad_ext_unify_mindir in 17.58 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.112 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_17_dropoutgrad_unify_mindir in 39.39 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.111 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_22_all_to_all_v_unify_mindir in 14.56 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.131 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Dropout not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.134 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchange not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.146 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_16_dropout_unify_mindir1 in 14.33 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.162 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: dropoutgrad_unify_mindir [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.162 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.165 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_18_neighbor_exchange_unify_mindir in 30.56 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.175 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion in 237.8 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.184 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_23_bn_split in 52.77 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.183 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2 not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.199 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_19_neighbor_exchange_v2_unify_mindir in 15.47 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.201 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: bn_grad_unify_mindir [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.203 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.213 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2Grad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.218 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_17_dropoutgrad_unify_mindir in 52.59 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.234 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion in 5.82 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.229 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_20_neighbor_exchange_v2_grad_unify_mindir in 14.95 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.244 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAll not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.241 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchange not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.258 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_18_neighbor_exchange_unify_mindir in 17.58 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.255 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.263 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_24_bn_grad_unify_mindir in 59.31 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.260 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_21_all_to_all_unify_mindir in 14.61 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.275 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAllV not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.277 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2 not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.294 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.288 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_22_all_to_all_v_unify_mindir in 12.81 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.292 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_19_neighbor_exchange_v2_unify_mindir in 15.26 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.308 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2Grad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.324 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_20_neighbor_exchange_v2_grad_unify_mindir in 14.92 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.316 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_25_bn_grad_split in 29.22 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.333 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.329 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.345 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_23_bn_split in 40.19 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.339 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAll not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.355 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_21_all_to_all_unify_mindir in 14.7 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.348 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_26_batchnormgrad_to_bninfergrad in 14.45 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.364 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.360 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: bn_grad_unify_mindir [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.371 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAllV not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.386 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_22_all_to_all_v_unify_mindir in 14.76 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.379 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.395 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission in 15 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.398 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion in 121.5 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.426 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_24_bn_grad_unify_mindir in 61.64 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.410 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.430 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.424 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_12_shape_reshape is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.436 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_28_batchnorm_to_bninfer in 24.39 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.453 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.456 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.450 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_23_bn_split in 46.48 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.467 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: bn_grad_unify_mindir [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.471 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Lamb not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.476 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_25_bn_grad_split in 26.09 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.492 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.487 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge in 19.1 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.504 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Print not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.507 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_26_batchnormgrad_to_bninfergrad in 14.61 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.522 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.519 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_30_print_insert_placeholder_for_tensor_name in 14.65 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.537 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.536 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_24_bn_grad_unify_mindir in 66 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.539 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim GetNext not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.555 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_31_getnext_for_ge in 18.77 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.546 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_12_shape_reshape in 98.71 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.551 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission in 13.18 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.565 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.573 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNorm not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.571 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.577 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.594 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_28_batchnorm_to_bninfer in 15.78 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.583 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_25_bn_grad_split in 25.34 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.602 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.593 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_32_sync_bn_split in 21.17 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.609 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.609 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.625 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Lamb not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.618 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_26_batchnormgrad_to_bninfergrad in 15.57 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.634 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.625 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_33_sync_bn_grad_split in 15.06 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.641 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.641 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge in 16.86 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.658 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Print not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.648 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.660 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdaptiveMaxPool2D not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.676 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion in 18.26 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.674 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_30_print_insert_placeholder_for_tensor_name in 14.86 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.694 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim GetNext not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.676 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission in 25.44 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.694 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.709 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_31_getnext_for_ge in 17.31 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.719 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AvgPoolGrad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.711 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_28_batchnorm_to_bninfer in 15.89 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.727 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.727 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNorm not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.738 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_35_avg_pool_grad_for_ge in 43.35 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.745 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Lamb not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.743 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_32_sync_bn_split in 16.98 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.760 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.749 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion in 154.72 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.753 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.771 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.761 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge in 18.09 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.779 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Print not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.774 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_33_sync_bn_grad_split in 13.93 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.788 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.776 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.785 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion in 15.2 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.809 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.794 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_30_print_insert_placeholder_for_tensor_name in 14.89 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.814 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim GetNext not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.803 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdaptiveMaxPool2D not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.818 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion in 16.5 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.823 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.082.838 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion in 14.94 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.829 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_31_getnext_for_ge in 17.58 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.848 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNorm not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.861 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AvgPoolGrad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.863 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_32_sync_bn_split in 16.78 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.879 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.879 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_35_avg_pool_grad_for_ge in 42.95 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.896 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_33_sync_bn_grad_split in 15.4 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.894 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.909 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.911 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.928 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdaptiveMaxPool2D not exist in name to cnode [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.925 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion in 15.58 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.944 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion in 17.86 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.949 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.943 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion in 132.2 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.967 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.082.968 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.082.982 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion in 15.15 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.082.990 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AvgPoolGrad not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.009 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_35_avg_pool_grad_for_ge in 44.88 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.025 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.047 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.063 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion in 16.78 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.079 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.094 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.110 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion in 15.27 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.083.105 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_38_add_attr_to_dump in 245.07 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.083.104 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion in 111.02 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.083.130 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.083.243 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_38_add_attr_to_dump in 237.66 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.083.247 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion in 93.57 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.083.271 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.395 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_38_add_attr_to_dump in 259 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.083.413 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion in 117.69 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.083.437 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.083.571 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_39_ascend_mindir_op_adapter in 429.62 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.083.600 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.083.622 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:263] DefineFlashAttentionPattern] Do FlashAttentionPattern V1. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.083.749 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion in 284.48 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.083.776 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.083.783 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_39_ascend_mindir_op_adapter in 504.73 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.083.814 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.083.837 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:263] DefineFlashAttentionPattern] Do FlashAttentionPattern V1. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.909 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_39_ascend_mindir_op_adapter in 478.7 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.083.913 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 in 288.39 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.941 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.083.939 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.083.959 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:377] DefineFlashAttentionPattern] Do FlashAttentionPattern V2. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.083.967 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:263] DefineFlashAttentionPattern] Do FlashAttentionPattern V1. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.084.036 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion in 233.25 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.084.063 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.084.125 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 in 285 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.084.148 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.084.168 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:377] DefineFlashAttentionPattern] Do FlashAttentionPattern V2. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.084.242 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 in 279.67 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.084.267 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.084.274 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 in 306.44 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.084.299 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.084.319 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:377] DefineFlashAttentionPattern] Do FlashAttentionPattern V2. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.084.316 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion in 223.63 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.084.344 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.084.451 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 in 279.91 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.084.475 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.084.501 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion in 211.93 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.084.524 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.084.572 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops in 202.74 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.084.619 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:318] GEUnifyMindIR] [PROF]GEUnifyMindIR costs 8.178 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.084.616 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 in 294.95 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.084.711 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.084.780 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion in 280.33 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.084.806 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.084.814 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce in 266.98 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.084.841 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.084.963 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion in 227.47 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.084.987 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.019 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce in 191.06 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.042 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.041 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm in 173.85 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.066 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion is enabled. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.085.105 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.223 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce in 214.77 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.219 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion in 130.21 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.233 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm in 167.16 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.247 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.255 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.258 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion is enabled. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.085.285 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:38] MarkRefGraph] Mark graph is ref graph: 0 [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.406 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion in 129.82 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.428 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.416 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion in 136.13 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.450 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.456 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm in 185.78 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.493 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.548 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion in 99.72 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.570 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.574 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion in 102.96 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.596 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.670 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion in 152.92 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.695 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.712 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion in 95.8 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.734 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.818 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion in 226.79 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.834 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion in 118.96 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.085.841 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.856 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.085.921 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unfold_inputs_for_special_nodes_pm_0_ascend_convert_tuple_input_to_dynamic_input in 551.47 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.085.980 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion in 223.92 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.085.988 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion in 110.87 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.003 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.011 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.114 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion in 249.7 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.086.106 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_0_process_call_inline in 92.11 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.139 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.165 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion in 5.74 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.184 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.267 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion in 240.79 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.274 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion in 242.18 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.290 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.298 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.314 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion in 6.36 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.332 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.335 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion in 130.44 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.359 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_12_shape_reshape is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.086.444 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_1_seed_adapter in 298.6 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.479 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_12_shape_reshape in 99.34 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.481 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion in 128.82 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.502 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.503 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_12_shape_reshape is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.086.507 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_2_insert_tensor_move_for_communication in 27.13 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.581 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion in 261.15 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.606 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.627 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_12_shape_reshape in 102.22 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.632 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion in 6.36 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.086.627 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_3_process partial inline in 88.78 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.649 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.651 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.679 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion in 155.07 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.703 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.807 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion in 135.64 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.086.804 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_4_expander_fallback in 140.4 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.830 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_12_shape_reshape is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.828 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion in 155.95 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.086.852 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.869 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion in 143.97 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.086.901 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.086.932 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_5_convert_pad_v3_paddings in 92.45 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.963 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_12_shape_reshape in 112.09 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.086.986 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.009 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion in 136.56 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.031 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion in 109.94 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.040 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.053 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.087.044 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_6_convert_pad_v3_grad_paddings in 82.19 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.170 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion in 95.61 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.176 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion in 114.17 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.174 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion in 166 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.192 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.196 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.200 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.087.261 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_7_resize_bilinear_add_attr in 185.59 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.087.295 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_8_backend_custom_depend in 5.71 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.307 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion in 91.95 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.328 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.334 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion in 120.91 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.087.338 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:237] GEBackendOptimizeACL] [PROF]GEBackendOptimizeACL costs 1.343 msec. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.358 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.378 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion in 148.62 us [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.087.390 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:137] OptimizeACLGraph] [PROF]OptimizeACLGraph costs 2.107 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.402 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.471 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion in 123.31 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.493 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion is enabled. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.087.488 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.543 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion in 119.42 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.565 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.657 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion in 275.15 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.682 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.692 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion in 106.66 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.713 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.784 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion in 266.09 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.087.811 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.869 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion in 136.06 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.087.893 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.935 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion in 229.94 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.087.959 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.088.097 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion in 260.95 us [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.088.094 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] StridedSlice select aclop kernel [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.088.125 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.088.209 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion in 227.76 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.088.220 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion in 302.66 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.088.234 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.088.247 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.088.367 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion in 219.07 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.088.391 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.088.459 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops in 202.35 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.088.507 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:318] GEUnifyMindIR] [PROF]GEUnifyMindIR costs 8.114 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.088.525 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion in 252.04 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.088.549 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.088.608 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops in 194.97 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.088.679 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:318] GEUnifyMindIR] [PROF]GEUnifyMindIR costs 8.038 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.088.840 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion in 269.04 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.088.866 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops is enabled. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.088.864 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Mul select aclnn kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.088.905 [mindspore/ops/kernel/ascend/opapi/aclnn_kernel_build.cc:77] IsEnabledAclnnDispatch] AllGather is not defined in opdef. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.089.043 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] AllGather select hccl kernel [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.089.055 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.089.107 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops in 220.09 us [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.089.142 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op0 is view op and not support aclnn [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.089.156 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:318] GEUnifyMindIR] [PROF]GEUnifyMindIR costs 8.431 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.089.190 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.089.256 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:38] MarkRefGraph] Mark graph is ref graph: 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.089.314 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Split select aclop kernel [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.089.392 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:38] MarkRefGraph] Mark graph is ref graph: 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.089.495 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Concat select aclnn kernel [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.089.597 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op1 is view op and not support aclnn [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.089.657 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.089.862 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:38] MarkRefGraph] Mark graph is ref graph: 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.089.894 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:298] SelectKernel] [PROF]SelectKernel costs 2.468 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.089.922 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:302] PrintOpSelectedNum] Number of GE_KERNEL, INTERNAL_KERNEL, OPAPI_KERNEL, ACL_KERNEL, HCCL_KERNEL, HOST_KERNEL: [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.089.944 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:308] PrintOpSelectedNum] 0 0 3 8 3 0 [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.089.951 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unfold_inputs_for_special_nodes_pm_0_ascend_convert_tuple_input_to_dynamic_input in 599.37 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.090.067 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unfold_inputs_for_special_nodes_pm_0_ascend_convert_tuple_input_to_dynamic_input in 577.93 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.090.110 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_0_set_fracz_group_attr in 63.12 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.090.142 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_0_process_call_inline in 96.15 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.090.262 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_0_process_call_inline in 96.78 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.090.358 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_1_insert_identity in 215.69 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.090.487 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_1_seed_adapter in 293.86 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.090.552 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_2_insert_tensor_move_for_communication in 29.65 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.090.587 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unfold_inputs_for_special_nodes_pm_0_ascend_convert_tuple_input_to_dynamic_input in 627.99 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.090.614 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_1_seed_adapter in 297.61 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.090.672 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_3_process partial inline in 90.67 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.090.680 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_2_insert_tensor_move_for_communication in 28.75 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.090.780 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_2_insert_type_transform_op in 388.63 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.090.801 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_3_process partial inline in 90.58 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.090.796 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_0_process_call_inline in 106.96 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.090.846 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_4_expander_fallback in 141.75 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.090.854 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_3_graph_view_replace in 43.85 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.090.896 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:288] GEBackendOptimizeACLAfterKernelSelect] [PROF]GEBackendOptimizeACLAfterKernelSelect costs 0.924 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.090.942 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:172] OptimizeACLGraphAfterKernelSelect] [PROF]OptimizeACLGraphAfterKernelSelect costs 0.975 msec. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.090.969 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_5_convert_pad_v3_paddings in 93.56 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.090.993 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_4_expander_fallback in 161.66 us [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.091.024 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.091.082 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_6_convert_pad_v3_grad_paddings in 83.51 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.091.119 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_5_convert_pad_v3_paddings in 94.35 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.091.136 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_after_inline_pm_0_DropoutGenMask is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.091.163 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_after_inline_pm_0_DropoutGenMask in 1 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.091.197 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_1_seed_adapter in 356.82 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.091.233 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_6_convert_pad_v3_grad_paddings in 84.31 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.091.264 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_2_insert_tensor_move_for_communication in 30.06 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.091.263 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_1_cse in 73.43 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.091.298 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_7_resize_bilinear_add_attr in 186.57 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.091.306 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_2_eliminate_maketuple_getitem in 15.72 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.091.332 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_8_backend_custom_depend in 7.5 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.091.332 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_3_insert_move_to in 0.62 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.091.375 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:237] GEBackendOptimizeACL] [PROF]GEBackendOptimizeACL costs 1.352 msec. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.091.372 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:355] GEAfterInlineOptimize] [PROF]GEAfterInlineOptimize costs 0.246 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.091.394 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_3_process partial inline in 99.8 us [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.091.405 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:391] InlineCallGraph] [PROF]InlineCallGraph costs 0.437 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.091.436 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:137] OptimizeACLGraph] [PROF]OptimizeACLGraph costs 2.182 msec. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.091.449 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_7_resize_bilinear_add_attr in 186.92 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.091.483 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_8_backend_custom_depend in 7.41 us [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.091.481 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.091.525 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:237] GEBackendOptimizeACL] [PROF]GEBackendOptimizeACL costs 1.383 msec. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.091.534 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.091.570 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_4_expander_fallback in 143.7 us [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.091.584 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:137] OptimizeACLGraph] [PROF]OptimizeACLGraph costs 2.195 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.091.598 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:868] InlineSwitchGraph] [PROF]InlineSwitchGraph costs 0.165 msec. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.091.638 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1055] CompileGraphImpl] [PROF]OptimizeGraph costs 6.366 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.091.683 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.091.702 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_5_convert_pad_v3_paddings in 102.57 us [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.091.710 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.091.809 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.091.825 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_6_convert_pad_v3_grad_paddings in 94.1 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.092.060 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_7_resize_bilinear_add_attr in 204.75 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.092.095 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_8_backend_custom_depend in 7.47 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.092.136 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:237] GEBackendOptimizeACL] [PROF]GEBackendOptimizeACL costs 1.469 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.092.191 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:137] OptimizeACLGraph] [PROF]OptimizeACLGraph costs 2.333 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.092.264 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] StridedSlice select aclop kernel [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.092.295 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.092.348 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.092.380 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.092.391 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] StridedSlice select aclop kernel [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.092.469 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_pool.cc:423] BestFitAscendMemoryPool] BestFitAscendMemoryPool constructed, older memory allocator is enabled. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.092.510 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:167] Initialize] Skip initialization of memory pool since init size is not configured. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.092.545 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.093.003 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] StridedSlice select aclop kernel [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.093.049 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Mul select aclnn kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.093.091 [mindspore/ops/kernel/ascend/opapi/aclnn_kernel_build.cc:77] IsEnabledAclnnDispatch] AllGather is not defined in opdef. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.093.177 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Mul select aclnn kernel [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.093.223 [mindspore/ops/kernel/ascend/opapi/aclnn_kernel_build.cc:77] IsEnabledAclnnDispatch] AllGather is not defined in opdef. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.093.249 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] AllGather select hccl kernel [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.093.359 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op0 is view op and not support aclnn [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.093.394 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] AllGather select hccl kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.093.459 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.093.525 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op0 is view op and not support aclnn [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.093.547 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Split select aclop kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.093.598 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.093.626 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.093.651 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.093.771 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.093.784 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Split select aclop kernel [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.093.793 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Concat select aclnn kernel [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.093.795 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.093.822 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.093.832 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Mul select aclnn kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.093.875 [mindspore/ops/kernel/ascend/opapi/aclnn_kernel_build.cc:77] IsEnabledAclnnDispatch] AllGather is not defined in opdef. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.093.901 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op1 is view op and not support aclnn [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.093.932 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.093.957 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.093.976 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Concat select aclnn kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.093.982 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.094.027 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] AllGather select hccl kernel [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.094.085 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op1 is view op and not support aclnn [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.094.087 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.094.124 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.094.143 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op0 is view op and not support aclnn [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.094.148 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.094.193 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:298] SelectKernel] [PROF]SelectKernel costs 2.72 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.094.233 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:302] PrintOpSelectedNum] Number of GE_KERNEL, INTERNAL_KERNEL, OPAPI_KERNEL, ACL_KERNEL, HCCL_KERNEL, HOST_KERNEL: [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.094.254 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:308] PrintOpSelectedNum] 0 0 3 8 3 0 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.094.262 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.094.286 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.094.333 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Split select aclop kernel [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.094.384 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:298] SelectKernel] [PROF]SelectKernel costs 2.764 msec. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.094.429 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:302] PrintOpSelectedNum] Number of GE_KERNEL, INTERNAL_KERNEL, OPAPI_KERNEL, ACL_KERNEL, HCCL_KERNEL, HOST_KERNEL: [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.094.450 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:308] PrintOpSelectedNum] 0 0 3 8 3 0 [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.094.461 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_0_set_fracz_group_attr in 83.28 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.094.558 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Concat select aclnn kernel [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.094.638 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_0_set_fracz_group_attr in 67.31 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.094.670 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op1 is view op and not support aclnn [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:06.094.702 [mindspore/ccsrc/transform/acl_ir/op_api_exec.cc:145] GetAscendDefaultCustomPath] Add path [/usr/local/Ascend/latest/opp/vendors/customize/op_api/lib/libcust_opapi.so to custom opapi paths. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.094.728 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_1_insert_identity in 231.72 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.094.902 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_1_insert_identity in 229.74 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.094.954 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:298] SelectKernel] [PROF]SelectKernel costs 2.728 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.094.985 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:302] PrintOpSelectedNum] Number of GE_KERNEL, INTERNAL_KERNEL, OPAPI_KERNEL, ACL_KERNEL, HCCL_KERNEL, HOST_KERNEL: [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.095.004 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:308] PrintOpSelectedNum] 0 0 3 8 3 0 [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.156 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_2_insert_type_transform_op in 394.06 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.095.187 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_0_set_fracz_group_attr in 61.16 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.233 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_3_graph_view_replace in 46.71 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.274 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:288] GEBackendOptimizeACLAfterKernelSelect] [PROF]GEBackendOptimizeACLAfterKernelSelect costs 0.991 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.323 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:172] OptimizeACLGraphAfterKernelSelect] [PROF]OptimizeACLGraphAfterKernelSelect costs 1.046 msec. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.333 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_2_insert_type_transform_op in 396.58 us [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.402 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.409 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_3_graph_view_replace in 45.9 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.450 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:288] GEBackendOptimizeACLAfterKernelSelect] [PROF]GEBackendOptimizeACLAfterKernelSelect costs 0.971 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.095.464 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_1_insert_identity in 242.32 us [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.498 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:172] OptimizeACLGraphAfterKernelSelect] [PROF]OptimizeACLGraphAfterKernelSelect costs 1.024 msec. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.518 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_after_inline_pm_0_DropoutGenMask is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.545 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_after_inline_pm_0_DropoutGenMask in 1.49 us [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.578 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.651 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_1_cse in 83.79 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.695 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_2_eliminate_maketuple_getitem in 18.12 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.693 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_after_inline_pm_0_DropoutGenMask is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.716 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_3_insert_move_to in 0.63 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.721 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_after_inline_pm_0_DropoutGenMask in 1.4 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.752 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:355] GEAfterInlineOptimize] [PROF]GEAfterInlineOptimize costs 0.246 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.785 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:391] InlineCallGraph] [PROF]InlineCallGraph costs 0.438 msec. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.828 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_1_cse in 83.51 us [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.859 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.870 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_2_eliminate_maketuple_getitem in 17.7 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.892 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_3_insert_move_to in 0.81 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.926 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:355] GEAfterInlineOptimize] [PROF]GEAfterInlineOptimize costs 0.246 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.095.925 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_2_insert_type_transform_op in 427.54 us [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.095.959 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:391] InlineCallGraph] [PROF]InlineCallGraph costs 0.436 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.095.977 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:868] InlineSwitchGraph] [PROF]InlineSwitchGraph costs 0.167 msec. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.003 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_3_graph_view_replace in 47.31 us [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.096.010 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1055] CompileGraphImpl] [PROF]OptimizeGraph costs 6.775 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.096.034 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.045 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:288] GEBackendOptimizeACLAfterKernelSelect] [PROF]GEBackendOptimizeACLAfterKernelSelect costs 1.012 msec. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.096.082 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.097 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:172] OptimizeACLGraphAfterKernelSelect] [PROF]OptimizeACLGraphAfterKernelSelect costs 1.071 msec. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.096.151 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:868] InlineSwitchGraph] [PROF]InlineSwitchGraph costs 0.167 msec. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.178 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.096.182 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1055] CompileGraphImpl] [PROF]OptimizeGraph costs 6.815 msec. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.096.194 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.096.254 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.292 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_after_inline_pm_0_DropoutGenMask is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.318 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_after_inline_pm_0_DropoutGenMask in 1.43 us [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.096.365 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.428 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_1_cse in 87.81 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.469 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_2_eliminate_maketuple_getitem in 17.88 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.488 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_3_insert_move_to in 0.7 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.523 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:355] GEAfterInlineOptimize] [PROF]GEAfterInlineOptimize costs 0.243 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.554 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:391] InlineCallGraph] [PROF]InlineCallGraph costs 0.432 msec. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.636 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.757 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:868] InlineSwitchGraph] [PROF]InlineSwitchGraph costs 0.177 msec. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.096.762 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.096.801 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.802 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1055] CompileGraphImpl] [PROF]OptimizeGraph costs 6.965 msec. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.877 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.096.907 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_pool.cc:423] BestFitAscendMemoryPool] BestFitAscendMemoryPool constructed, older memory allocator is enabled. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.096.918 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.096.954 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:167] Initialize] Skip initialization of memory pool since init size is not configured. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.096.958 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.096.977 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.096.995 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.097.066 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_pool.cc:423] BestFitAscendMemoryPool] BestFitAscendMemoryPool constructed, older memory allocator is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.097.115 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:167] Initialize] Skip initialization of memory pool since init size is not configured. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.097.146 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.097.605 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.097.643 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.097.750 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_pool.cc:423] BestFitAscendMemoryPool] BestFitAscendMemoryPool constructed, older memory allocator is enabled. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.097.797 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:167] Initialize] Skip initialization of memory pool since init size is not configured. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.097.835 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.063 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.182 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.224 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.252 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.275 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.348 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.377 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.400 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.407 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.432 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.454 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.530 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.555 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.573 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.576 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.596 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.619 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.701 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.725 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.732 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.746 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.755 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.098.776 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.777 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.865 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.888 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.907 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.098.920 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.098.931 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.098.931 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.098.959 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.098.983 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.099.052 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.099.075 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.115 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.141 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.163 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.274 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.296 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.317 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:06.099.317 [mindspore/ccsrc/transform/acl_ir/op_api_exec.cc:145] GetAscendDefaultCustomPath] Add path [/usr/local/Ascend/latest/opp/vendors/customize/op_api/lib/libcust_opapi.so to custom opapi paths. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.425 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.460 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:06.099.455 [mindspore/ccsrc/transform/acl_ir/op_api_exec.cc:145] GetAscendDefaultCustomPath] Add path [/usr/local/Ascend/latest/opp/vendors/customize/op_api/lib/libcust_opapi.so to custom opapi paths. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.483 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.608 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.099.631 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.100.021 [mindspore/ccsrc/transform/acl_ir/op_api_exec.cc:145] GetAscendDefaultCustomPath] Add path [/usr/local/Ascend/latest/opp/vendors/customize/op_api/lib/libcust_opapi.so to custom opapi paths. [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.131.180 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-16057586909177180503 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.131.222 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 1000.6 msec. [WARNING] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.131.255 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-16057586909177180503 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.131.276 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:06.131.528 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-16057586909177180503, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.146.928 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Mul, kernel type:opapi_kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.147.036 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.147.198 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.147.271 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.147.315 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.147.339 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.147.365 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.147.723 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.147.753 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.038 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Mul, kernel type:opapi_kernel [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.188 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.373 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.465 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.511 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.533 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.556 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.942 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.154.977 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.155.095 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Mul, kernel type:opapi_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.155.261 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.155.456 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.155.558 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.155.612 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.155.635 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.155.660 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.156.075 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.156.109 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.157.373 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Mul, kernel type:opapi_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.157.543 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.157.734 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.157.834 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.157.888 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.157.910 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.157.934 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.158.369 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.158.404 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [WARNING] DEVICE(187803,fffde2ffd0f0,python):2025-02-07-15:58:06.189.292 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-16057586909177180503 [INFO] DEVICE(187803,fffde2ffd0f0,python):2025-02-07-15:58:06.189.391 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-16057586909177180503 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.189.450 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 511.816 msec. [WARNING] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.189.482 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-16057586909177180503 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.189.595 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-6541264347459079684, the max communication size is 1 MB. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.189.623 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-6541264347459079684, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:06.189.647 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-6541264347459079684. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.189.673 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-6541264347459079684 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.189.693 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-6541264347459079684 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.189.832 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.101 msec. [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.190.034 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-6541264347459079684 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.190.065 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.203 msec. [WARNING] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.190.084 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-6541264347459079684 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.190.107 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187803,fffde2ffd0f0,python):2025-02-07-15:58:06.190.312 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-6541264347459079684, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.238.768 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.238.842 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.238.922 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.238.978 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.239.017 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.239.054 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.239.081 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.239.221 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.239.247 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.239.945 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.239.985 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.240.038 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.240.082 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.240.116 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.240.139 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.240.457 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_0_erase_visit_attr in 256.68 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.240.852 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_1_deal_ref_output in 356.59 us [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:06.240.910 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:166] AclAfterCreateKernel] [PROF]AclAfterCreateKernel costs 0.731 msec. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.240.962 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:195] OptimizeACLGraphAfterCreateKernel] [PROF]OptimizeACLGraphAfterCreateKernel costs 0.794 msec. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.241.057 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.234 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1102] OptimizeExecutionOrder] [PROF]OptimizeExecutionOrder costs 0.242 msec. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.241.269 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1064] CompileGraphImpl] [PROF]CreateKernel costs 149.602 msec. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.241.457 [mindspore/ccsrc/backend/common/session/session_basic.cc:1152] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:58:06.241.483 [mindspore/ccsrc/debug/summary/summary.cc:52] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:58:06.241.513 [mindspore/ccsrc/debug/summary/summary.cc:57] RecurseSetSummaryNodesForAllGraphs] This function should be skipped on GE backend. [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:58:06.241.535 [mindspore/ccsrc/debug/data_dump/dump_json_parser.cc:1207] UpdateNeedDumpKernels] Get kernel dump flag [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.580 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1183] PreprocessBeforeRun] Current Exec Order Algo in MS Context is bfs [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.609 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1046] DoStreamAssign] Status record: start stream assign, kernel_graph0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.641 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.688 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op1 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.730 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op2 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.758 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op3 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.796 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op4 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.834 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op5 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.882 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Mul-op0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.241.913 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.243.199 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 2 for node Default/AllGather-op0, group: 2-3358271254418797552 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.243.246 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.243.292 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op2 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.243.322 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op1 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.244.422 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 3 for node Default/AllGather-op1, group: 2-5208665662337742843 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.244.466 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op1 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.244.507 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op3 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.244.547 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op2 [WARNING] DEVICE(187803,fffde2ffd0f0,python):2025-02-07-15:58:06.245.244 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-6541264347459079684 [INFO] DEVICE(187803,fffde2ffd0f0,python):2025-02-07-15:58:06.245.332 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-6541264347459079684 [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.245.373 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 55.237 msec. [WARNING] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:06.245.395 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-6541264347459079684 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.245.453 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:247] InitCommGroup] The MOC occupied by HCCL of graph: 4_3_1___main___Net_construct_20 is 1600 MB. [WARNING] DEVICE(187789,fffdfdffb0f0,python):2025-02-07-15:58:06.245.427 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-6541264347459079684 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.245.491 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1072] CompileGraphs] [PROF]InitCommGroup costs 1594.26 msec. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:06.245.519 [mindspore/ccsrc/distributed/collective/collective_manager.cc:833] WaitAllCommInitDone] All device communictor is initialized. You can launch communication operators after this step. [INFO] DEVICE(187789,fffdfdffb0f0,python):2025-02-07-15:58:06.245.521 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-6541264347459079684 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.245.542 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1076] CompileGraphs] [PROF]WaitAllCommInit costs 0.025 msec. [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:06.245.582 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 639.228 msec. [WARNING] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:06.245.619 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-6541264347459079684 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.245.676 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:247] InitCommGroup] The MOC occupied by HCCL of graph: 4_3_1___main___Net_construct_20 is 1600 MB. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.245.713 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1072] CompileGraphs] [PROF]InitCommGroup costs 1681.7 msec. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.245.733 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1347] IsEnableControlFlowInline] Disable switch inline, executor mode:1 [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:06.245.738 [mindspore/ccsrc/distributed/collective/collective_manager.cc:833] WaitAllCommInitDone] All device communictor is initialized. You can launch communication operators after this step. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.245.762 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1076] CompileGraphs] [PROF]WaitAllCommInit costs 0.026 msec. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.245.770 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1180] DoUnifyMindIRPass] Do unify mindir pass for graph 4_3_1___main___Net_construct_20 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.245.806 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_pm_0_erase_invalid_micro_depend in 3.51 us [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.245.906 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 4 for node Default/AllGather-op2, group: 2-511848487187618470 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.245.947 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1347] IsEnableControlFlowInline] Disable switch inline, executor mode:1 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.245.961 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:937] EnableKBKCompileCache] Disable backend compile cache by front config. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.245.982 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1180] DoUnifyMindIRPass] Do unify mindir pass for graph 4_3_1___main___Net_construct_20 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.246.016 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_pm_0_erase_invalid_micro_depend in 3.15 us [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.246.178 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:937] EnableKBKCompileCache] Disable backend compile cache by front config. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.246.243 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:620] BuildSymbolEngine] Status record: skip build symbol engine for function graph: 4_3_1___main___Net_construct_20 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.246.295 [mindspore/ccsrc/backend/graph_compiler/graph_partition.cc:866] Partition] GraphPartion Info: 4_3_1___main___Net_construct_20 inline mode:0 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.246.434 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:620] BuildSymbolEngine] Status record: skip build symbol engine for function graph: 4_3_1___main___Net_construct_20 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.246.482 [mindspore/ccsrc/backend/graph_compiler/graph_partition.cc:866] Partition] GraphPartion Info: 4_3_1___main___Net_construct_20 inline mode:0 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.246.644 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1519] CompileGraph] Compile graph: 4_3_1___main___Net_construct_20, Split segments size: 2 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.246.723 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1555] CompileGraphFromSegment] Compile normal segment, the first node: @4_3_1___main___Net_construct_20:CNode_21{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.246.735 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1062] DoStreamAssign] Status record: end stream assign, kernel_graph0 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.246.784 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1519] CompileGraph] Compile graph: 4_3_1___main___Net_construct_20, Split segments size: 2 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.246.800 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.246.856 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1555] CompileGraphFromSegment] Compile normal segment, the first node: @4_3_1___main___Net_construct_20:CNode_21{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.246.887 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:2 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.246.926 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.246.953 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 2, record_stream_id_ : 0. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.246.983 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.006 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:3 [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.247.009 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:629] CompileGraph] Status record: start compile graph. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.036 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.060 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 3, record_stream_id_ : 0. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.089 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.113 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:4 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.247.111 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:629] CompileGraph] Status record: start compile graph. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.141 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.247.141 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:2227] ConstructKernelGraph] Create graph: 0 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.165 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 4, record_stream_id_ : 0. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.197 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.247.214 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:2227] ConstructKernelGraph] Create graph: 0 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.221 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:5 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.253 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.276 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 5, record_stream_id_ : 0. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.304 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.326 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:0 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.354 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.376 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 0, record_stream_id_ : 2. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.415 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.438 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:6 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.466 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.490 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 6, record_stream_id_ : 0. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.516 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.539 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:1 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.565 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.587 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 1, record_stream_id_ : 3. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.616 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.639 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:7 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.665 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.692 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 7, record_stream_id_ : 0. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.720 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.742 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:8 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.770 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.793 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 8, record_stream_id_ : 2. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.820 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.843 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:9 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.870 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.892 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 9, record_stream_id_ : 3. [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.919 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.942 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:10 [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.247.968 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187764,ffff97badc10,python):2025-02-07-15:58:06.248.000 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 10, record_stream_id_ : 4. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.028 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:931] PrintGraphExecuteOrder] Graph 0 execution order: [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.101 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[0], node name[Default/StreamSend-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_24{[0]: ValueNode StreamSend}], event id[2] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.138 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[1], node name[Default/StreamRecv-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_25{[0]: ValueNode StreamRecv}], event id[2] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.170 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[2], node name[Default/StreamSend-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_26{[0]: ValueNode StreamSend}], event id[3] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.199 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[3], node name[Default/StreamRecv-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_27{[0]: ValueNode StreamRecv}], event id[3] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.231 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[4], node name[Default/StreamSend-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_28{[0]: ValueNode StreamSend}], event id[4] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.262 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[5], node name[Default/StreamRecv-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_29{[0]: ValueNode StreamRecv}], event id[4] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.248.284 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:3487] ConstructOutput] Output:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.325 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[6], node name[Default/StridedSlice-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_30{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_x, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.248.320 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:3487] ConstructOutput] Output:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.379 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[7], node name[Default/StridedSlice-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.429 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[8], node name[Default/StridedSlice-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.487 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[9], node name[Default/StridedSlice-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_33{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.535 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[10], node name[Default/StridedSlice-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.248.561 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.582 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[11], node name[Default/StridedSlice-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.622 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[12], node name[Default/Mul-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35}] [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.248.617 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.697 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[13], node name[Default/StreamSend-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_37{[0]: ValueNode StreamSend}], event id[5] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.732 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[14], node name[Default/StreamRecv-op3], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_38{[0]: ValueNode StreamRecv}], event id[5] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.772 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[15], node name[Default/AllGather-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36}], group[2-3358271254418797552] [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.248.765 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:637] CompileGraph] [PROF]ConstructKernelGraph costs 1.614 msec. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.805 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[16], node name[Default/StreamSend-op4], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_40{[0]: ValueNode StreamSend}], event id[0] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.835 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[17], node name[Default/StreamRecv-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_41{[0]: ValueNode StreamRecv}], event id[0] [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.248.844 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:637] CompileGraph] [PROF]ConstructKernelGraph costs 1.792 msec. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.872 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[18], node name[Default/Split-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.248.902 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: kernel_graph0 [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.924 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[19], node name[Default/Concat-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2}] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.958 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[20], node name[Default/StreamSend-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_46{[0]: ValueNode StreamSend}], event id[6] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.248.987 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[21], node name[Default/StreamRecv-op5], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_47{[0]: ValueNode StreamRecv}], event id[6] [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.248.993 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: kernel_graph0 [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.021 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[22], node name[Default/AllGather-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43}], group[2-5208665662337742843] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.051 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[23], node name[Default/StreamSend-op6], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_49{[0]: ValueNode StreamSend}], event id[1] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.081 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[24], node name[Default/StreamRecv-op6], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_50{[0]: ValueNode StreamRecv}], event id[1] [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.098 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:449] RecursiveSetRunMode] Kernel graph: kernel_graph0, set run mode:KernelMode [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.119 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[25], node name[Default/Split-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.163 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[26], node name[Default/Concat-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1}] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.195 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[27], node name[Default/StreamSend-op7], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_55{[0]: ValueNode StreamSend}], event id[7] [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.196 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:449] RecursiveSetRunMode] Kernel graph: kernel_graph0, set run mode:KernelMode [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.226 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[28], node name[Default/StreamRecv-op7], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_56{[0]: ValueNode StreamRecv}], event id[7] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.257 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:191] EliminateIllegalDataTypePass] Start eliminate illegal data type for kernel graph id:0 [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.263 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[29], node name[Default/AllGather-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}], group[2-511848487187618470] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.295 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[30], node name[Default/StreamSend-op8], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_58{[0]: ValueNode StreamSend}], event id[8] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.330 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_0_convert_list_to_tuple in 26.03 us [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.334 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[31], node name[Default/StreamRecv-op8], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_59{[0]: ValueNode StreamRecv}], event id[8] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.365 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[32], node name[Default/StreamSend-op9], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_60{[0]: ValueNode StreamSend}], event id[9] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.364 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:191] EliminateIllegalDataTypePass] Start eliminate illegal data type for kernel graph id:0 [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.394 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[33], node name[Default/StreamRecv-op9], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_61{[0]: ValueNode StreamRecv}], event id[9] [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.424 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[34], node name[Default/StreamSend-op10], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_62{[0]: ValueNode StreamSend}], event id[10] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.442 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_0_convert_list_to_tuple in 25.71 us [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.249.453 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[35], node name[Default/StreamRecv-op10], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_63{[0]: ValueNode StreamRecv}], event id[10] [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.249.491 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1115] CompileGraphImpl] [PROF]PreprocessBeforeRun costs 7.927 msec. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.249.536 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1161] CreateDeviceAddress] Status record: start create device address. graph id: 0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.569 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_1_eliminate_func_type in 207.15 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.616 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:204] EliminateIllegalDataTypePass] [PROF]EliminateIllegalDataTypePass costs 0.355 msec. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.649 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:151] CommonUnifyMindIR] start common unify mindir opt graph:0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.679 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: conv_transpose_to_conv_backprop_input [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.680 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_1_eliminate_func_type in 202.55 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.732 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:204] EliminateIllegalDataTypePass] [PROF]EliminateIllegalDataTypePass costs 0.364 msec. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.768 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:151] CommonUnifyMindIR] start common unify mindir opt graph:0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.800 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: conv_transpose_to_conv_backprop_input [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.834 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_0_conv_transpose_to_conv_backprop_input in 150.05 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.249.819 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.859 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: custom_op_reg_info_to_attr [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.249.892 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.899 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_1_custom_op_reg_info_to_attr in 37.6 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.921 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Custom not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.943 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_2_inplace_assign_for_custom_op in 20.27 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.249.962 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_attr_to_unify_mindir [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.959 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_0_conv_transpose_to_conv_backprop_input in 154.67 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.249.969 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.249.988 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: custom_op_reg_info_to_attr [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.250.027 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.031 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_1_custom_op_reg_info_to_attr in 40.89 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.056 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Custom not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.075 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_2_inplace_assign_for_custom_op in 19.07 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.250.065 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.093 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_attr_to_unify_mindir [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.250.105 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.250.103 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_3_convert_attr_to_unify_mindir in 136.62 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.250.130 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.250.156 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:164] CommonUnifyMindIR] [PROF]CommonUnifyMindIR costs 0.503 msec. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.250.196 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:81] BackendCommonOptimization] Status record: start common optimization. graph id: 0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.250.243 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_dynamic_broadcast_to [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.238 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_3_convert_attr_to_unify_mindir in 140.63 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.250.277 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.295 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:164] CommonUnifyMindIR] [PROF]CommonUnifyMindIR costs 0.522 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.250.303 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.341 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:81] BackendCommonOptimization] Status record: start common optimization. graph id: 0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.250.365 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_0_convert_dynamic_broadcast_to in 117.3 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.393 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_dynamic_broadcast_to [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.522 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_0_convert_dynamic_broadcast_to in 124.21 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.250.548 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_1_convert_const_input_to_attr in 154.77 us [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.250.595 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1168] CreateDeviceAddress] Status record: end create device address. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.250.637 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1123] CompileGraphImpl] [PROF]CreateDeviceAddress costs 1.092 msec. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.250.691 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_2_custom_op_const_input_to_attr in 116.87 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.701 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_1_convert_const_input_to_attr in 147.17 us [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.250.747 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1177] CacheGraphOutputToFrontNodeWithIndex] Get graph backend output nodes. [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.250.785 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1185] CacheGraphOutputToFrontNodeWithIndex] Get graph front output nodes. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.250.823 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_3_convert_const_input_to_tensor_input_for_print in 105.8 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.842 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_2_custom_op_const_input_to_attr in 109.98 us [INFO] SESSION(187764,ffff97badc10,python):2025-02-07-15:58:06.250.854 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1203] CacheGraphOutputToFrontNodeWithIndex] Backend output: Default/AllGather-op2 debug string: @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} node ptr:0x3207c790 with index: 0 map to front node: Default/AllGather-op2 debug string: @4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} node ptr: 0x3202fa90 with index: 0 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.250.902 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:766] CompileGraph] Status record: end compile graph. graph id: 0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.250.970 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_3_convert_const_input_to_tensor_input_for_print in 97.88 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.251.006 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.251.054 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.251.078 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1605] CompileGraphFromSegment] Compile cut segment, the cut node: @4_3_1___main___Net_construct_20:ValueNode_64{[0]: ValueNode Return, [1]: CNode_22} [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.251.111 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.251.116 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1129] CompileGraphs] [PROF]CompileSubGraph costs 180.418 msec. [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.251.153 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:976] ExportCompileCacheKBK] Compile cache: disable by front compile cache config. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.251.161 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.251.195 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.251.216 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.251.214 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1143] CompileGraphs] Status record: construct the graph compiler info. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.270 [mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc:1001] Parse] Control node parser is not inited. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.251.310 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_4_convert_tuple_output_to_maketuple in 456.92 us [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.321 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:859] Transform] Graph(kernel_graph_0) transforms actor begin, strategy:pipeline [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.251.347 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_5_convert_unused_tuple_para_to_make_tuple in 3.5 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.251.369 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_6_flatten_concat_fission is enabled. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.467 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:480] InitGraphParameterStore] Init graph parameter store: kernel_graph_0, outer size: 2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.251.476 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_4_convert_tuple_output_to_maketuple in 467.98 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.251.506 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_6_flatten_concat_fission in 113.73 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.506 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 0, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_x [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.251.515 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_5_convert_unused_tuple_para_to_make_tuple in 4.74 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.251.540 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_6_flatten_concat_fission is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.539 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 1, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.588 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x338fe630 for node:ValueNode 2 node addr:0x32036910 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.622 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338fe630 [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.251.623 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_0_erase_visit_attr in 321.77 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.251.638 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_7_inset_input_structural_for_py_execute in 103.69 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.251.661 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_8_broadcast_to_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.661 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x338fe940 for node:ValueNode (2, 2, 2) node addr:0x32037cf0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.683 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338fe940 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.251.682 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_6_flatten_concat_fission in 114.29 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.710 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x338fef80 for node:ValueNode (0, 2, 0) node addr:0x32038d30 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.731 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338fef80 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.755 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x338ff440 for node:ValueNode 0 node addr:0x320367e0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.776 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338ff440 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.251.778 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_8_broadcast_to_fusion in 95.55 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.801 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x338ffc30 for node:ValueNode (1, 1, 1) node addr:0x32038040 device type:2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.251.806 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_7_inset_input_structural_for_py_execute in 93.62 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.251.830 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_8_broadcast_to_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.821 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338ffc30 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.856 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x33900110 for node:ValueNode 1 node addr:0x32036150 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.877 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x33900110 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.902 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x33900900 for node:ValueNode (0, 0, 0) node addr:0x32037bc0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.923 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x33900900 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.946 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x339010f0 for node:ValueNode (2, 4, 4) node addr:0x32038fc0 device type:2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.251.947 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_8_broadcast_to_fusion in 92.66 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.251.966 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x339010f0 [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.042 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_1_deal_ref_output in 377.55 us [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.110 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:166] AclAfterCreateKernel] [PROF]AclAfterCreateKernel costs 0.845 msec. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.115 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:442] ChangeGraphMode] Enable kbk subgraph execute and set run mode for graph: 0 to GraphMode. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.252.131 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_9_add_attr_to_node in 320.66 us [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.138 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:462] TryEnableKbkSubGraphExecMode] Enable kbk subgraph execute mode for actor set: kernel_graph_0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.252.162 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_10_replace_addn is enabled. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.170 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:195] OptimizeACLGraphAfterCreateKernel] [PROF]OptimizeACLGraphAfterCreateKernel costs 0.925 msec. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.197 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:391] TryEnableInputOptimize] Enable input optimize for actor set: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.243 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_y for host data source actor. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.283 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.305 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_y for front node:@4_3_1___main___Net_construct_20:param_y index:0 position:1 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.252.309 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_10_replace_addn in 119.33 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.252.319 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_9_add_attr_to_node in 334.29 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.334 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_x for host data source actor. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.252.351 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_10_replace_addn is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.252.353 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:93] BackendCommonOptimization] [PROF]BackendCommonOptimization costs 2.153 msec. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.371 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_x for front node:@4_3_1___main___Net_construct_20:param_x index:0 position:0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.252.373 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:100] BackendCommonOptimization] Status record: end common optimization. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.406 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2268] BuildDataPrepareActorForGraphParameterStore] Create data prepare actor: kernel_graph_0_DataPrepareActor [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.252.437 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:258] OptimizationWithoutBackend] [PROF]OptimizationWithoutBackend costs 3.179 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.477 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1102] OptimizeExecutionOrder] [PROF]OptimizeExecutionOrder costs 0.276 msec. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.252.481 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_10_replace_addn in 102.26 us [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.499 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2224] BuildLoopCountActor] Create loop count actor: kernel_graph_0_LoopCountActor [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.515 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1064] CompileGraphImpl] [PROF]CreateKernel costs 155.681 msec. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.252.529 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:93] BackendCommonOptimization] [PROF]BackendCommonOptimization costs 2.182 msec. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.528 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2257] BuildOutputActor] Create output actor: kernel_graph_0_OutputActor [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.252.551 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:100] BackendCommonOptimization] Status record: end common optimization. graph id: 0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.252.622 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:258] OptimizationWithoutBackend] [PROF]OptimizationWithoutBackend costs 3.258 msec. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.625 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1509] CacheGraphOutputToActor] Cache graph 0 output node:Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} with index:0 to actor:kernel_graph0_SuperKernelActor, from front node:Default/AllGather-op2 debug string:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} with index:0 [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.803 [mindspore/ccsrc/backend/common/session/session_basic.cc:1152] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.815 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.832 [mindspore/ccsrc/debug/summary/summary.cc:52] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.841 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x338fd7e0 origin ref count:2 [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.864 [mindspore/ccsrc/debug/summary/summary.cc:57] RecurseSetSummaryNodesForAllGraphs] This function should be skipped on GE backend. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.873 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.883 [mindspore/ccsrc/debug/data_dump/dump_json_parser.cc:1207] UpdateNeedDumpKernels] Get kernel dump flag [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.895 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x338fdbb0 origin ref count:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.252.893 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_0_renorm_split in 129.23 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.252.928 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: reduce_axis_update [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.931 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1183] PreprocessBeforeRun] Current Exec Order Algo in MS Context is bfs [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.961 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1046] DoStreamAssign] Status record: start stream assign, kernel_graph0 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.252.990 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op0 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.252.989 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1711] Link] [PROF]GraphSchedulerLinkSinkMode costs 0.197 msec. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.019 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph_0_LoopCountActor@ [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.253.042 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.043 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph0_SuperKernelActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.064 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_LoopCountActor@ to actor:kernel_graph_0_OutputActor@ [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.253.086 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.087 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_OutputActor@ to actor:kernel_graph_0_DataPrepareActor@ [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.090 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_0_renorm_split in 123.98 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.253.112 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.108 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:3713] LinkControlArrowForCopyActor] Link control arrow for copy actor start, copy actor size:0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.126 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: reduce_axis_update [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.135 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x33905c40 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.253.151 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.170 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:556] AddResultArrow] Add result arrow from actor:kernel_graph0_SuperKernelActor to actor:kernel_graph_0_OutputActor@ from kernel@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} device address:0x33905c40 original ref count:18446744073709551615 ref count:18446744073709551615 dynamic ref count:2147483647 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.253.189 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op5 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.223 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_1_reduce_axis_update in 289.96 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.253.235 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.241 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.249 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission is enabled. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.253.263 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.274 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim ClipByNorm not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.286 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.297 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission in 25.04 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.316 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: space_to_batch_nd_attr_update [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.324 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.361 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_3_space_to_batch_nd_attr_update in 41.62 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.374 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.384 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: batch_to_space_nd_attr_update [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.411 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.420 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_4_batch_to_space_nd_attr_update in 33.83 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.430 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_1_reduce_axis_update in 298.44 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.443 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdamWeightDecay not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.448 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.459 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.463 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_5_adam_weight_decay_unify_mindir in 19.35 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.487 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim ClipByNorm not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.509 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_6_add_depend_for_adamw in 24.26 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.511 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission in 28.07 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.529 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_7_cdist_fission is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.534 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: space_to_batch_nd_attr_update [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.548 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Cdist not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.567 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_7_cdist_fission in 18.17 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.584 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.585 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_3_space_to_batch_nd_attr_update in 47.69 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.602 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim CdistGrad not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.611 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: batch_to_space_nd_attr_update [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.620 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission in 17.8 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.637 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.651 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_4_batch_to_space_nd_attr_update in 37.41 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.671 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion in 5.98 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.690 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.687 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdamWeightDecay not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.684 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.713 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_5_adam_weight_decay_unify_mindir in 35.42 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.716 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion in 3.96 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.731 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.740 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.760 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_11_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir in 22.25 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.763 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_6_add_depend_for_adamw in 26.46 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.777 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.786 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_7_cdist_fission is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.796 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_12_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2 in 17.41 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.793 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.806 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Cdist not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.828 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.829 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_7_cdist_fission in 22.84 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.835 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.849 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_13_sparse_softmax_cross_entropy_with_logits_unify_mindir in 31.48 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.850 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.868 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim CdistGrad not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.887 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission in 17.35 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.899 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutExt not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.912 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.915 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.920 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_14_dropout_ext_unify_mindir1 in 47.86 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.940 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutGradExt not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.945 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion in 7.08 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.957 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_15_dropoutgrad_ext_unify_mindir in 16.42 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.253.954 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.967 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.978 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Dropout not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.253.992 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion in 4.35 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.253.997 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_16_dropout_unify_mindir1 in 17.53 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.016 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: dropoutgrad_unify_mindir [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.009 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.019 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.041 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_11_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir in 25.9 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.060 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.055 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.062 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_17_dropoutgrad_unify_mindir in 41.98 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.079 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_12_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2 in 17.54 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.087 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchange not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.107 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_18_neighbor_exchange_unify_mindir in 19.44 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.129 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.134 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2 not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.129 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.152 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_13_sparse_softmax_cross_entropy_with_logits_unify_mindir in 50.4 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.155 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_19_neighbor_exchange_v2_unify_mindir in 20.06 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.175 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2Grad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.172 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.192 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_20_neighbor_exchange_v2_grad_unify_mindir in 16.65 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.195 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutExt not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.212 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAll not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.217 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_14_dropout_ext_unify_mindir1 in 41.27 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.230 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_21_all_to_all_unify_mindir in 16.82 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.229 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.238 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutGradExt not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.256 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_15_dropoutgrad_ext_unify_mindir in 17.08 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.250 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAllV not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.276 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Dropout not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.268 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_22_all_to_all_v_unify_mindir in 17.39 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.268 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.293 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_16_dropout_unify_mindir1 in 17.02 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.306 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.310 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.313 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: dropoutgrad_unify_mindir [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.332 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_23_bn_split in 44.11 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.343 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.349 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: bn_grad_unify_mindir [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.364 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_17_dropoutgrad_unify_mindir in 47.6 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.378 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.389 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchange not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.418 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_24_bn_grad_unify_mindir in 65.37 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.417 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.422 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_18_neighbor_exchange_unify_mindir in 31.19 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.450 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.441 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 4, send_actor : 0x320828a0, recv_actor : 0x32083300. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.445 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2 not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.464 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_19_neighbor_exchange_v2_unify_mindir in 18.92 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.471 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_25_bn_grad_split in 27.18 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.469 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 8, send_actor : 0x339141f0, recv_actor : 0x33914a00. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.485 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 3, send_actor : 0x32061fb0, recv_actor : 0x32081ec0. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.483 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2Grad not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.492 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.501 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_20_neighbor_exchange_v2_grad_unify_mindir in 17.6 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.502 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 11, send_actor : 0x339181a0, recv_actor : 0x33918be0. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.512 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_26_batchnormgrad_to_bninfergrad in 18.8 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.520 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAll not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.518 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 10, send_actor : 0x33916d20, recv_actor : 0x33917760. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.534 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 1, send_actor : 0x3390c950, recv_actor : 0x3390d290. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.531 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.540 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_21_all_to_all_unify_mindir in 19.52 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.550 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 5, send_actor : 0x32083d40, recv_actor : 0x32084780. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.547 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.562 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAllV not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.566 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission in 16.75 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.585 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.565 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 6, send_actor : 0x3390aaf0, recv_actor : 0x3390b4c0. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.581 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 7, send_actor : 0x3390efa0, recv_actor : 0x3390f8e0. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.580 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_22_all_to_all_v_unify_mindir in 17.47 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.254.581 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 2 for node Default/AllGather-op0, group: 2-3358271254418797552 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.594 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 9, send_actor : 0x33915a20, recv_actor : 0x339162e0. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.610 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 2, send_actor : 0x33910bd0, recv_actor : 0x33911a30. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.602 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_28_batchnorm_to_bninfer in 15.58 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.627 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.630 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge is enabled. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.254.638 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.649 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Lamb not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.648 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_23_bn_split in 45.74 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.669 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge in 21.41 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.669 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: bn_grad_unify_mindir [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.254.684 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.689 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Print not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.709 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_30_print_insert_placeholder_for_tensor_name in 19.18 us [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.254.714 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op1 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.729 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim GetNext not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.749 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_31_getnext_for_ge in 21.67 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.745 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_24_bn_grad_unify_mindir in 69.09 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.771 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNorm not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.780 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.790 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_32_sync_bn_split in 19.78 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.803 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_25_bn_grad_split in 30.47 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.808 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.827 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_33_sync_bn_grad_split in 17.53 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.827 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.834 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op0 addr:0x338fe4b0 type:48, kernel tensor addr:0x338fe240, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.845 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.848 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_26_batchnormgrad_to_bninfergrad in 21.11 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.864 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdaptiveMaxPool2D not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.869 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.882 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion in 18.79 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.886 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.906 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission in 18.64 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.925 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AvgPoolGrad not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.925 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.945 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_35_avg_pool_grad_for_ge in 41.93 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.937 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op1 addr:0x339015e0 type:48, kernel tensor addr:0x33901370, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.959 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_28_batchnorm_to_bninfer in 32.21 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.963 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.254.980 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.254.983 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.004 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion in 20.75 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.003 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Lamb not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.254.999 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.026 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge in 25.86 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.023 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.048 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Print not exist in name to cnode [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.053 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.070 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_30_print_insert_placeholder_for_tensor_name in 20.92 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.073 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion in 19.19 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.094 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim GetNext not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.095 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op2 addr:0x33901af0 type:48, kernel tensor addr:0x33901880, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.115 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_31_getnext_for_ge in 22.54 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.140 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNorm not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.160 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_32_sync_bn_split in 21.91 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.162 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.179 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.200 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_33_sync_bn_grad_split in 19.14 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.220 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.235 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op3 addr:0x33902000 type:48, kernel tensor addr:0x33901d90, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.242 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdaptiveMaxPool2D not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.263 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion in 22.46 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.305 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AvgPoolGrad not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.326 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_35_avg_pool_grad_for_ge in 43.83 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.324 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op4 addr:0x33902510 type:48, kernel tensor addr:0x339022a0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.347 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.353 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_38_add_attr_to_dump in 254.9 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.367 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.382 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.386 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion in 19.06 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.413 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.434 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.455 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion in 19.59 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.472 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op5 addr:0x33902a20 type:48, kernel tensor addr:0x339027b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.527 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.621 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_39_ascend_mindir_op_adapter in 234.46 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.649 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.652 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Mul-op0 addr:0x33902f30 type:48, kernel tensor addr:0x33902cc0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.673 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:263] DefineFlashAttentionPattern] Do FlashAttentionPattern V1. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.255.668 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 3 for node Default/AllGather-op1, group: 2-4190060298023907007 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.701 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35} is thread safe. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.255.720 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op1 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.255.743 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_38_add_attr_to_dump in 261.41 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.765 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op0 addr:0x33903440 type:48, kernel tensor addr:0x339031d0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 2)) [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.255.764 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op3 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.255.805 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.805 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.888 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op0 addr:0x339039c0 type:48, kernel tensor addr:0x339037e0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.920 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op0 addr:0x33903eb0 type:48, kernel tensor addr:0x33903cd0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.255.961 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.255.981 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 in 306.27 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.256.007 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.256.030 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:377] DefineFlashAttentionPattern] Do FlashAttentionPattern V2. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.031 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op0 addr:0x33904330 type:48, kernel tensor addr:0x339040c0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.078 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.137 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op1 addr:0x33904840 type:48, kernel tensor addr:0x339045d0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.175 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43} is thread safe. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.256.171 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_39_ascend_mindir_op_adapter in 389.84 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.256.203 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.256.231 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:263] DefineFlashAttentionPattern] Do FlashAttentionPattern V1. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.243 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op1 addr:0x33904dc0 type:48, kernel tensor addr:0x33904be0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.271 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op1 addr:0x339052b0 type:48, kernel tensor addr:0x339050d0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.308 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.256.336 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 in 302.65 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.256.361 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.380 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op1 addr:0x33905730 type:48, kernel tensor addr:0x339054c0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.425 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.492 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op2 addr:0x33905c40 type:48, kernel tensor addr:0x339059d0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.529 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} is thread safe. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.256.556 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 in 322.53 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.256.585 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.256.615 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:377] DefineFlashAttentionPattern] Do FlashAttentionPattern V2. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.256.617 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion in 230.91 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.256.696 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.692 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op1 input kernel:Default/StridedSlice-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.723 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x338fe4b0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.770 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op2 input kernel:Default/StridedSlice-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.791 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x339015e0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.839 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op4 input kernel:Default/StridedSlice-op3 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.860 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33902000 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.899 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op5 input kernel:Default/StridedSlice-op4 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.922 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33902510 origin ref count:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.256.952 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce in 227.61 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.964 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op2 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.256.976 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.256.985 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33901af0 origin ref count:2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.256.992 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 in 374.9 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.020 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op5 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.257.023 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.042 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33902a20 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.082 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op0 input kernel:Default/Mul-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.103 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33902f30 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.132 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op0 input kernel:Default/AllGather-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.153 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33903440 origin ref count:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.174 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm in 171.99 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.183 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.199 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.204 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x339039c0 origin ref count:2 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.257.208 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 4 for node Default/AllGather-op2, group: 2-5488101015797526856 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.229 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.250 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33903eb0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.277 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op1 input kernel:Default/Concat-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.257.286 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion in 234.24 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.297 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33904330 origin ref count:2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.257.314 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.324 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op1 input kernel:Default/AllGather-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.344 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33904840 origin ref count:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.356 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion in 131.66 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.371 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.381 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.391 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33904dc0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.416 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.434 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x339052b0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.470 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op2 input kernel:Default/Concat-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.490 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x33905730 origin ref count:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.524 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion in 110.11 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.532 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[y] debug_name: @kernel_graph0:param_y use count is: 1 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.547 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.257.552 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce in 210.95 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.559 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[x] debug_name: @kernel_graph0:param_x use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.582 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1603] AddControlArrowForNoInputActor] Add control arrow for no input arrow actor: kernel_graph0_SuperKernelActor [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.257.579 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm is enabled. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.601 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph0_SuperKernelActor@ [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.257.593 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.257.661 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.678 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion in 107.11 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.702 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion is enabled. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.705 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:887] Transform] [PROF]GraphSchedulerLink costs 5.005 msec. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.257.738 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.778 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 1_actor_set_kernel_graph_0_invalid_data_arrow_elimination in 1.12993 us [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.257.799 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.815 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 2_actor_set_kernel_graph_0_multi_actor_fusion in 10.36 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.257.813 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm in 206.16 us [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.257.836 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.839 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 3_actor_set_kernel_graph_0_batch_data_arrow_fusion in 0.830041 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.257.842 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion is enabled. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.257.860 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.863 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:904] Transform] Graph(kernel_graph_0) transforms actor end. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.257.901 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.257.935 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:354] Init] kernel_graph_0 has the parameter input num: 2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.971 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion in 244.27 us [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.257.983 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1153] CompileGraphs] [PROF]GraphScheduler costs 6.74 msec. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.257.996 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.257.999 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion in 130.62 us [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.011 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:29] operator()] Create MultiStreamController. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.035 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:40] Refresh] Stream manager initialize, device_context : 0x1f2b1970, stream_size : 5. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.258.036 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion is enabled. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.258.049 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.054 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:214] Resize] Task id on stream manager initialize : 0, stream_size : 5. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.258.076 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.258.081 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1166] CompileGraphs] [PROF]compile_backend_graph costs 1974.9 msec. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.080 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1062] DoStreamAssign] Status record: end stream assign, kernel_graph0 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.258.122 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1169] CompileGraphs] Status record: end compile function graph: 4_3_1___main___Net_construct_20, produce actor: kernel_graph_0 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.150 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end task_emit action. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.165 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.173 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.203 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:281] SetLoopCount] Change vm_loop_flag to 0, set loop_size to 1 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.258.222 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion in 156.8 us [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.231 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start execute action. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.258.249 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion is enabled. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.257 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end execute action. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.258.275 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.286 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion in 265.64 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.297 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.311 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.338 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion in 5.69 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.340 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.359 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion is enabled. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.369 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 2, record_stream_id_ : 0. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.258.378 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion in 104.01 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.399 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.258.402 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion is enabled. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.422 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:3 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.453 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.474 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 3, record_stream_id_ : 0. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.501 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.522 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:4 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.522 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion in 139.86 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.546 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_12_shape_reshape is enabled. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.550 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.574 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 4, record_stream_id_ : 0. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.607 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.627 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:5 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.654 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.675 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 5, record_stream_id_ : 0. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.676 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_12_shape_reshape in 108.02 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.258.676 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion in 247.09 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.704 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.699 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.258.704 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion is enabled. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.730 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:0 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.760 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.792 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 0, record_stream_id_ : 2. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.821 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.258.810 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.844 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:6 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.258.857 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.873 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.894 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 6, record_stream_id_ : 0. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.890 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion in 164.03 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.258.916 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion is enabled. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.258.919 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.924 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.945 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:1 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.971 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.258.971 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.258.991 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 1, record_stream_id_ : 3. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.259.005 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.005 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion in 270.79 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.020 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.259.028 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.033 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion is enabled. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.043 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:7 TotalTime = 2.05137, [21] [bootstrap]: 0.00126448 [type_inference]: 0.0169226 [auto_monad]: 0.00027873 [graph_reusing]: 5.601e-05 [inline]: 0.0128586, [2] [rewriter_before_opt_a]: 9.994e-05 [a1a2]: 0.0126596, [2] [Cycle 1]: 0.00189389, [11] [expand_dump_flag]: 3.12994e-06 [switch_simplify]: 9.892e-05 [loop_unroll]: 2.86299e-05 [a_1]: 0.00027107 [recompute_prepare]: 2.637e-05 [updatestate_depend_eliminate]: 7.47002e-06 [updatestate_assign_eliminate]: 3.36999e-06 [updatestate_loads_eliminate]: 2.73006e-06 [parameter_eliminate]: 4.14997e-06 [a_2]: 0.00071156 [parallel_inline_pass]: 2.40901e-05 [Cycle 2]: 0.00138136, [11] [expand_dump_flag]: 8.50065e-07 [switch_simplify]: 2.43001e-05 [loop_unroll]: 2.407e-05 [a_1]: 0.00013805 [recompute_prepare]: 2.375e-05 [updatestate_depend_eliminate]: 3.85998e-06 [updatestate_assign_eliminate]: 2.98989e-06 [updatestate_loads_eliminate]: 2.73006e-06 [parameter_eliminate]: 1.24006e-06 [a_2]: 0.00068879 [parallel_inline_pass]: 2.366e-05 [parallel-infer-symbol]: 0.00012309 [pre_auto_parallel]: 9.623e-05 [insert-virtual-dataset]: 0.0008901 [parallel-infer-symbol-second]: 5.855e-05 [dataset_repeat_opt]: 0.00030234 [pipeline_split]: 0.00011267 [optimize]: 0.0409063, [52] [py_interpret_to_execute]: 4.98e-05 [rewriter_before_opt_a]: 6.708e-05 [opt_a]: 0.0316144, [3] [Cycle 1]: 0.0161389, [46] [expand_dump_flag]: 1.16008e-06 [switch_simplify]: 3.26299e-05 [loop_unroll]: 2.805e-05 [a_1]: 0.00029243 [recompute_prepare]: 3.657e-05 [updatestate_depend_eliminate]: 7.00995e-06 [updatestate_assign_eliminate]: 5.22996e-06 [updatestate_loads_eliminate]: 5.20004e-06 [parameter_eliminate]: 1.95007e-06 [a_2]: 0.0007683 [accelerated_algorithm]: 2.854e-05 [shard]: 3.796e-05 [meta_shard_fg_expand]: 3.56999e-06 [shard_inline]: 2.78001e-05 [auto_parallel]: 3.326e-05 [parallel]: 0.00840959 [flash_sp]: 3.38e-05 [merge_comm]: 3.01801e-05 [allreduce_fusion]: 2.71599e-05 [matmul_add_comm_reduction]: 3.008e-05 [allreduce_slice_to_reducescatter]: 5.89993e-07 [virtual_shard_identity]: 7.082e-05 [virtual_dataset]: 9.814e-05 [get_grad_eliminate_]: 6.636e-05 [virtual_output]: 7.917e-05 [merge_forward]: 2.60499e-05 [cell_reuse_recompute_pass]: 3.81994e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00012814 [before_grad]: 8.18099e-05 [inplace_validation]: 2.455e-05 [parallel_renormalize]: 0.00237613 [update_top_fg]: 5.20027e-07 [cast_eliminate]: 7.112e-05 [meta_fg_expand]: 2.609e-05 [inplace_validation_after_expand]: 3.64999e-05 [flash_sp_send_recv_attached]: 4.304e-05 [receive_attached]: 3.36999e-06 [after_resolve]: 7.29e-05 [a_after_grad]: 9.319e-05 [special_op_eliminate]: 7.538e-05 [renormalize]: 9.98843e-08 [add_forward_monad_depend]: 3.50003e-06 [auto_monad_grad]: 2.41993e-06 [auto_monad_eliminator]: 4.002e-05 [cse]: 0.00017752 [a_3]: 0.00063021 [Cycle 2]: 0.00889284, [46] [expand_dump_flag]: 1.41992e-06 [switch_simplify]: 6.042e-05 [loop_unroll]: 6.057e-05 [a_1]: 0.00136976 [recompute_prepare]: 5.651e-05 [updatestate_depend_eliminate]: 2.607e-05 [updatestate_assign_eliminate]: 2.296e-05 [updatestate_loads_eliminate]: 2.198e-05 [parameter_eliminate]: 2.27999e-0[INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.062 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion in 6.5 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.071 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] 6 [a_2]: 0.00129879 [accelerated_algorithm]: 0.00012538 [shard]: 4.131e-05 [meta_shard_fg_expand]: 1.092e-05 [shard_inline]: 6.377e-05 [auto_parallel]: 5.954e-05 [parallel]: 8.30996e-06 [flash_sp]: 3.128e-05 [merge_comm]: 2.941e-05 [allreduce_fusion]: 2.511e-05 [matmul_add_comm_reduction]: 4.00899e-05 [allreduce_slice_to_reducescatter]: 6.10016e-07 [virtual_shard_identity]: 6.77101e-05 [virtual_dataset]: 6.499e-05 [get_grad_eliminate_]: 6.18499e-05 [virtual_output]: 6.31e-05 [merge_forward]: 2.384e-05 [cell_reuse_recompute_pass]: 3.03006e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00012969 [before_grad]: 7.786e-05 [inplace_validation]: 2.26699e-05 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 2.70084e-07 [cast_eliminate]: 5.70801e-05 [meta_fg_expand]: 2.335e-05 [inplace_validation_after_expand]: 2.805e-05 [flash_sp_send_recv_attached]: 9.60077e-07 [receive_attached]: 1.36998e-06 [after_resolve]: 6.20399e-05 [a_after_grad]: 8.05501e-05 [special_op_eliminate]: 5.645e-05 [renormalize]: 0.00180694 [add_forward_monad_depend]: 2.94996e-06 [auto_monad_grad]: 2.55997e-06 [auto_monad_eliminator]: 3.975e-05 [cse]: 0.00013724 [a_3]: 0.00059602 [Cycle 3]: 0.00653017, [46] [expand_dump_flag]: 1.12993e-06 [switch_simplify]: 6.02499e-05 [loop_unroll]: 5.76701e-05 [a_1]: 0.00120541 [recompute_prepare]: 5.64699e-05 [updatestate_depend_eliminate]: 2.631e-05 [updatestate_assign_eliminate]: 2.25799e-05 [updatestate_loads_eliminate]: 2.305e-05 [parameter_eliminate]: 2.02993e-06 [a_2]: 0.00123202 [accelerated_algorithm]: 6.366e-05 [shard]: 4.601e-05 [meta_shard_fg_expand]: 2.721e-05 [shard_inline]: 5.933e-05 [auto_parallel]: 5.522e-05 [parallel]: 6.60005e-06 [flash_sp]: 1.26997e-06 [merge_comm]: 2.64901e-05 [allreduce_fusion]: 2.428e-05 [matmul_add_comm_reduction]: 2.702e-05 [allreduce_slice_to_reducescatter]: 3.00002e-07 [virtual_shard_identity]: 6.777e-05 [virtual_dataset]: 5.79801e-05 [get_grad_eliminate_]: 5.725e-05 [virtual_output]: 5.79e-05 [merge_forward]: 2.223e-05 [cell_reuse_recompute_pass]: 2.55997e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00011563 [before_grad]: 7.698e-05 [inplace_validation]: 2.223e-05 [parallel_renormalize]: 8.9989e-08 [update_top_fg]: 3.00002e-07 [cast_eliminate]: 5.82699e-05 [meta_fg_expand]: 2.366e-05 [inplace_validation_after_expand]: 2.903e-05 [flash_sp_send_recv_attached]: 1.35996e-06 [receive_attached]: 1.23004e-06 [after_resolve]: 6.097e-05 [a_after_grad]: 8.09199e-05 [special_op_eliminate]: 5.92399e-05 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 1.53994e-06 [auto_monad_grad]: 1.58011e-06 [auto_monad_eliminator]: 3.347e-05 [cse]: 0.00012082 [a_3]: 0.00059527 [py_interpret_to_execute_after_opt_a]: 7.39e-05 [slice_cell_reuse_recomputed_activation]: 3.667e-05 [rewriter_after_opt_a]: 0.00034995 [convert_after_rewriter]: 6.77099e-05 [order_py_execute_after_rewriter]: 5.883e-05 [opt_b]: 0.00266278, [1] [Cycle 1]: 0.00261718, [7] [b_1]: 0.00203889 [b_2]: 6.13501e-05 [updatestate_depend_eliminate]: 2.43001e-05 [upda[INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.084 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.085 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion in 144.09 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.093 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 7, record_stream_id_ : 0. testate_assign_eliminate]: 2.21201e-05 [updatestate_loads_eliminate]: 2.194e-05 [renormalize]: 6.3004e-07 [cse]: 0.00012133 [optimize_parallel_all_gather_comm]: 6.967e-05 [overlap_param_gather]: 3.755e-05 [cconv]: 6.719e-05 [loop_unroll]: 0.00096161 [opt_after_cconv]: 0.00081667, [1] [Cycle 1]: 0.00077358, [7] [c_1]: 0.00023122 [parameter_eliminate]: 2.56998e-06 [updatestate_depend_eliminate]: 3.14e-05 [updatestate_assign_eliminate]: 2.437e-05 [updatestate_loads_eliminate]: 2.363e-05 [cse]: 0.0001228 [renormalize]: 4.09898e-07 [remove_dup_value]: 0.00030606 [tuple_transform]: 0.00041123, [1] [Cycle 1]: 0.00036716, [2] [d_1]: 0.00027325 [renormalize]: 4.1991e-07 [partial_unused_args_eliminate]: 4.02101e-05 [add_cache_embedding]: 9.69199e-05 [add_recomputation]: 0.0002043 [cse_after_recomputation]: 0.00017682, [1] [Cycle 1]: 0.00012249, [1] [cse]: 7.09499e-05 [environ_conv]: 7.318e-05 [swap_dp_allreduce_reducescatter]: 6.253e-05 [bias_add_comm_swap]: 3.732e-05 [label_micro_interleaved_index]: 3.524e-05 [label_fine_grained_interleaved_index]: 3.76301e-05 [merge_cast_opt]: 3.401e-05 [slice_recompute_activation]: 6.228e-05 [micro_interleaved_order_control]: 3.479e-05 [assign_add_opt]: 0.00018137 [ForceFp32Comm]: 4.53501e-05 [remove_cast_before_assign_add]: 5.53e-05 [full_micro_interleaved_order_control]: 3.586e-05 [reorder_send_recv_between_fp_bp]: 3.48501e-05 [comm_op_add_attrs]: 7.721e-05 [add_comm_op_reuse_tag]: 8.90599e-05 [interleave_split_concat_branches]: 3.56999e-05 [interleave_parallel_branches]: 3.416e-05 [overlap_opt_shard_in_pipeline]: 6.44e-05 [overlap_opt_shard_grad_in_pipeline]: 3.727e-05 [control_data_broadcast_order]: 3.51301e-05 [grouped_pairwise_exchange_alltoall]: 4.36601e-05 [offloading_packed_experts]: 0.00010883 [overlap_recompute_and_grad_model_parallel]: 3.82001e-05 [overlap_grad_matmul_and_grad_allreduce]: 3.395e-05 [overlap_recompute_allgather_and_fa_grad]: 5.503e-05 [overlap_grad_ring_attention]: 7.08699e-05 [overlap_grad_flash_sp]: 6.46e-05 [begin_end_overlap_inline]: 3.42e-05 [split_matmul_comm_elemetwise]: 3.57301e-05 [split_layernorm_comm]: 4.459e-05 [handle_group_info]: 4.422e-05 [symbol_engine_optimizer]: 0.00065365, [1] [Cycle 1]: 0.00061242, [6] [build]: 3.57201e-05 [elim_shapecalc]: 6.11399e-05 [elim_not_effective]: 7.63501e-05 [opt_reshape]: 6.565e-05 [fold_const_symbol]: 7.01001e-05 [renormalize]: 4.69969e-07 [pipeline_parallel_scheduler]: 5.63201e-05 [auto_monad_reorder]: 0.00012526 [get_jit_bprop_graph]: 5.622e-05 [rewriter_after_jit_bprop_graph]: 5.029e-05 [eliminate_special_op_node]: 0.00100954 [distribtued_split]: 0.00024129 [validate]: 0.00014141 [task_emit]: 1.97537 [execute]: 6.10701e-05 Sums bootstrap : 0.001264s : 0.06% type_inference : 0.016923s : 0.83% auto_monad : 0.000279s : 0.01% graph_reusing : 0.000056s : 0.00% inline.rewriter_before_opt_a : 0.000100s : 0.00% inline.a1a2.expand_dump_flag : 0.000004s : 0.00% inline.a1a2.switch_simplify : 0.000123s : 0.01% inline.a1a2.loop_unroll : 0.000053s : 0.00% inline.a1a2.a_1 : 0.000409s : 0.02% inline.a1a2.recompute_prepare : 0.000050s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000011s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000006s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000005s : 0.00% inline.a1a2.parameter_eliminate : 0.000005s : 0.00% inline.a1a2.a_2 : [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.109 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion is enabled. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.121 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.139 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:8 0.001400s : 0.07% inline.a1a2.parallel_inline_pass : 0.000048s : 0.00% parallel-infer-symbol : 0.000123s : 0.01% pre_auto_parallel : 0.000096s : 0.00% insert-virtual-dataset : 0.000890s : 0.04% parallel-infer-symbol-second : 0.000059s : 0.00% dataset_repeat_opt : 0.000302s : 0.01% pipeline_split : 0.000113s : 0.01% optimize.py_interpret_to_execute : 0.000050s : 0.00% optimize.rewriter_before_opt_a : 0.000067s : 0.00% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000153s : 0.01% optimize.opt_a.loop_unroll : 0.000146s : 0.01% optimize.opt_a.a_1 : 0.002868s : 0.14% optimize.opt_a.recompute_prepare : 0.000150s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000059s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000051s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000050s : 0.00% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.003299s : 0.16% optimize.opt_a.accelerated_algorithm : 0.000218s : 0.01% optimize.opt_a.shard : 0.000125s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000042s : 0.00% optimize.opt_a.shard_inline : 0.000151s : 0.01% optimize.opt_a.auto_parallel : 0.000148s : 0.01% optimize.opt_a.parallel : 0.008425s : 0.41% optimize.opt_a.flash_sp : 0.000066s : 0.00% optimize.opt_a.merge_comm : 0.000086s : 0.00% optimize.opt_a.allreduce_fusion : 0.000077s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000097s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000206s : 0.01% optimize.opt_a.virtual_dataset : 0.000221s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000185s : 0.01% optimize.opt_a.virtual_output : 0.000200s : 0.01% optimize.opt_a.merge_forward : 0.000072s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000373s : 0.02% optimize.opt_a.before_grad : 0.000237s : 0.01% optimize.opt_a.inplace_validation : 0.000069s : 0.00% optimize.opt_a.parallel_renormalize : 0.002376s : 0.12% optimize.opt_a.update_top_fg : 0.000001s : 0.00% optimize.opt_a.cast_eliminate : 0.000186s : 0.01% optimize.opt_a.meta_fg_expand : 0.000073s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000094s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000045s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000196s : 0.01% optimize.opt_a.a_after_grad : 0.000255s : 0.01% optimize.opt_a.special_op_eliminate : 0.000191s : 0.01% optimize.opt_a.renormalize : 0.001807s : 0.09% optimize.opt_a.add_forward_monad_depend : 0.000008s : 0.00% optimize.opt_a.auto_monad_grad : 0.000007s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000113s : 0.01% optimize.opt_a.cse : 0.000436s : 0.02% optimize.opt_a.a_3 : 0.001822s : 0.09% optimize.py_interpret_to_execute_after_opt_a : 0.000074s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000037s : 0.00% optimize.rewriter_after_opt_a : 0.000350s : 0.02% optimize.convert_after_rewriter : 0.000068s : 0.00% optimize.order_py_execute_after_rewriter : 0.000059s : 0.00% optimize.op[INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.167 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.186 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 8, record_stream_id_ : 2. t_b.b_1 : 0.002039s : 0.10% optimize.opt_b.b_2 : 0.000061s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000024s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000022s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000022s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000121s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000070s : 0.00% optimize.overlap_param_gather : 0.000038s : 0.00% optimize.cconv : 0.000067s : 0.00% optimize.loop_unroll : 0.000962s : 0.05% optimize.opt_after_cconv.c_1 : 0.000231s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000031s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000024s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000024s : 0.00% optimize.opt_after_cconv.cse : 0.000123s : 0.01% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000306s : 0.02% optimize.tuple_transform.d_1 : 0.000273s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000040s : 0.00% optimize.add_cache_embedding : 0.000097s : 0.00% optimize.add_recomputation : 0.000204s : 0.01% optimize.cse_after_recomputation.cse : 0.000071s : 0.00% optimize.environ_conv : 0.000073s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000063s : 0.00% optimize.bias_add_comm_swap : 0.000037s : 0.00% optimize.label_micro_interleaved_index : 0.000035s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000038s : 0.00% optimize.merge_cast_opt : 0.000034s : 0.00% optimize.slice_recompute_activation : 0.000062s : 0.00% optimize.micro_interleaved_order_control : 0.000035s : 0.00% optimize.assign_add_opt : 0.000181s : 0.01% optimize.ForceFp32Comm : 0.000045s : 0.00% optimize.remove_cast_before_assign_add : 0.000055s : 0.00% optimize.full_micro_interleaved_order_control : 0.000036s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000035s : 0.00% optimize.comm_op_add_attrs : 0.000077s : 0.00% optimize.add_comm_op_reuse_tag : 0.000089s : 0.00% optimize.interleave_split_concat_branches : 0.000036s : 0.00% optimize.interleave_parallel_branches : 0.000034s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000064s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000037s : 0.00% optimize.control_data_broadcast_order : 0.000035s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000044s : 0.00% optimize.offloading_packed_experts : 0.000109s : 0.01% optimize.overlap_recompute_and_grad_model_parallel : 0.000038s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000034s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000055s : 0.00% optimize.overlap_grad_ring_attention : 0.000071s : 0.00% optimize.overlap_grad_flash_sp : 0.000065s : 0.00% optimize.begin_end_overlap_inline : 0.000034s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000036s : 0.00% optimize.split_layernorm_comm : 0.000045s : 0.00% optimize.handle_group_info : 0.000044s : 0.00% optimize.symbol_engine_optimizer.build : 0.000036s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000061s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000076s : 0.00% opti[INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.209 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] mize.symbol_engine_optimizer.opt_reshape : 0.000066s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000070s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000056s : 0.00% auto_monad_reorder : 0.000125s : 0.01% get_jit_bprop_graph : 0.000056s : 0.00% rewriter_after_jit_bprop_graph : 0.000050s : 0.00% eliminate_special_op_node : 0.001010s : 0.05% distribtued_split : 0.000241s : 0.01% validate : 0.000141s : 0.01% task_emit : 1.975371s : 97.19% execute : 0.000061s : 0.00% [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.230 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:9 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.239 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion in 130.11 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.258 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.265 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_12_shape_reshape is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.266 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion in 123.94 us [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.280 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 9, record_stream_id_ : 3. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.290 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion is enabled. [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.308 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.330 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:10 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.325 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.357 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.389 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 10, record_stream_id_ : 4. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.391 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.396 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_12_shape_reshape in 104.72 us [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.259.401 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_0_erase_visit_attr in 289.54 us [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.417 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:931] PrintGraphExecuteOrder] Graph 0 execution order: [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.420 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.420 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion in 106.12 us Time group info: ------[substitution.] 0.000487 351 10.92% : 0.000053s : 10: substitution.arithmetic_simplify 1.46% : 0.000007s : 21: substitution.elim_not_effective 2.80% : 0.000014s : 14: substitution.float_tuple_getitem_switch 1.43% : 0.000007s : 21: substitution.fold_const_symbol 4.10% : 0.000020s : 28: substitution.graph_param_transform 10.52% : 0.000051s : 1: substitution.inline 5.77% : 0.000028s : 66: substitution.j_node_and_user_rematch 6.87% : 0.000033s : 4: substitution.less_batch_normalization 2.75% : 0.000013s : 10: substitution.minmaximum_grad 7.83% : 0.000038s : 66: substitution.remove_not_recompute_node 1.25% : 0.000006s : 6: substitution.replace_old_param 9.39% : 0.000046s : 18: substitution.tuple_list_convert_item_index_to_positive 6.13% : 0.000030s : 18: substitution.tuple_list_get_item_const_eliminator 5.94% : 0.000029s : 18: substitution.tuple_list_get_item_depend_reorder 14.09% : 0.000069s : 30: substitution.tuple_list_get_item_eliminator 6.24% : 0.000030s : 18: substitution.tuple_list_get_set_item_eliminator 1.96% : 0.000010s : 1: substitution.virtual_dataset_eliminate 0.55% : 0.000003s : 1: substitution.virtual_output_eliminate ------[type_inference.] 0.016811 2 97.89% : 0.016457s : 1: type_inference.infer 2.11% : 0.000354s : 1: type_inference.specialize ------[replace.] 0.000059 5 17.46% : 0.000010s : 1: replace.inline 41.14% : 0.000024s : 2: replace.tuple_list_get_item_eliminator 27.31% : 0.000016s : 1: replace.virtual_dataset_eliminate 14.09% : 0.000008s : 1: replace.virtual_output_eliminate ------[match.] 0.000064 5 79.30% : 0.000050s : 1: match.inline 5.75% : 0.000004s : 2: match.tuple_list_get_item_eliminator 12.68% : 0.000008s : 1: match.virtual_dataset_eliminate 2.26% : 0.000001s : 1: match.virtual_output_eliminate ------[predicate.] 0.001418 11209 0.82% : 0.000012s : 100: predicate.accumulaten_eliminater 0.42% : 0.000006s : 28: predicate.ad_related_special_op_eliminate 0.90% : 0.000013s : 97: predicate.addn_check_dump 0.78% : 0.000011s : 100: predicate.addn_zero_filter 0.75% : 0.000011s : 100: predicate.adjust_all_reduce_mul_add 2.24% : 0.000032s : 197: predicate.arithmetic_simplify 1.95% : 0.000028s : 226: predicate.cast_eliminate 1.10% : 0.000016s : 126: predicate.check_bprop_eliminate 0.88% : 0.000012s : 97: predicate.compare_switch_simplify 0.25% : 0.000003s : 41: predicate.const_output_eliminate 0.31% : 0.000004s : 28: predicate.convert_tensor_all_eliminate 1.01% : 0.000014s : 102: predicate.convert_tensor_eliminate 0.90% : 0.000013s : 97: predicate.depend_value_elim 0.84% : 0.000012s : 100: predicate.dict_get_item_const_eliminator 0.89% : 0.000013s : 100: predicate.dict_get_item_eliminator 0.85% : 0.000012s : 100: predicate.dict_set_item_eliminator 0.19% : 0.000003s : 28: predicate.elim_not_effective 0.40% : 0.000006s : 28: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000016s : 141: predicate.environ_add_const_eliminate 1.12% : 0.000016s : 141: predicate.environ_get_add_eliminate 1.11% : 0.000016s : 141: predicate.environ_get_depend_swap 2.08% : 0.000030s : 238: predicate.environ_get_eliminate 1.11% : 0.000016s : 141: predicate.environ_get_set_eliminate 0.81% : 0.000012s : 103: predicate.exchange_switch_depend_value 1.09% : 0.000015s : 103: predicate.float_depend_g_call 0.88% : 0.000013s : 97: predicate.float_environ_get_switch 1.25% : 0.000018s : 138: predicate.float_tuple_getitem_switch 0.17% : 0.000002s : 28: predicate.fold_const_symbol 1.18% : 0.000017s : 127: predicate.get_grad_eliminate 0.18% : 0.000003s : 28: predicate.graph_param_transform 0.92%[INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.444 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion is enabled. : 0.000013s : 97: predicate.incorporate_call 0.87% : 0.000012s : 97: predicate.incorporate_call_switch 5.31% : 0.000075s : 479: predicate.inline 1.33% : 0.000019s : 126: predicate.inline_without_move 0.67% : 0.000009s : 126: predicate.j_node_and_user_rematch 0.99% : 0.000014s : 89: predicate.less_batch_normalization 1.53% : 0.000022s : 171: predicate.list_to_tuple_eliminator_ 2.26% : 0.000032s : 284: predicate.load_eliminater 0.66% : 0.000009s : 41: predicate.loop_unroll_after_grad 1.01% : 0.000014s : 105: predicate.loop_unroll_before_grad 1.51% : 0.000021s : 182: predicate.make_slice_get_slice_eliminator 0.90% : 0.000013s : 97: predicate.merge_addn 1.05% : 0.000015s : 126: predicate.micro_step_allgather_replace 1.07% : 0.000015s : 126: predicate.mini_step_allgather_replace 0.79% : 0.000011s : 100: predicate.minmaximum_grad 0.33% : 0.000005s : 28: predicate.mutable_eliminate 0.34% : 0.000005s : 28: predicate.opt_reshape 0.40% : 0.000006s : 41: predicate.parallel_virtual_node 1.05% : 0.000015s : 103: predicate.partial_defer_inline 1.31% : 0.000019s : 143: predicate.partial_eliminate 0.79% : 0.000011s : 100: predicate.print_const_string_wrapper 0.91% : 0.000013s : 97: predicate.reduce_all_const_elim 0.96% : 0.000014s : 100: predicate.reduce_eliminate 0.68% : 0.000010s : 126: predicate.remove_not_recompute_node 1.58% : 0.000022s : 228: predicate.replace_applicator 0.72% : 0.000010s : 126: predicate.replace_old_param 0.25% : 0.000004s : 41: predicate.reset_defer_inline 0.79% : 0.000011s : 100: predicate.reshape_eliminate 1.11% : 0.000016s : 126: predicate.row_tensor_add_zeros_like 0.39% : 0.000005s : 41: predicate.row_tensor_eliminate 1.27% : 0.000018s : 126: predicate.same_eliminate 0.62% : 0.000009s : 97: predicate.set_cell_output_no_recompute 1.24% : 0.000018s : 127: predicate.shard_identity_eliminate 1.56% : 0.000022s : 167: predicate.special_op_eliminate 1.00% : 0.000014s : 97: predicate.specialize_transform 1.15% : 0.000016s : 126: predicate.split_environ_get_set_with_tuple_value 1.26% : 0.000018s : 126: predicate.stack_unstack_eliminate 2.27% : 0.000032s : 284: predicate.stopgrad_eliminater 0.36% : 0.000005s : 41: predicate.switch_call_monad_eliminater 0.88% : 0.000012s : 103: predicate.switch_defer_inline 2.00% : 0.000028s : 229: predicate.switch_layer_defer_inline 3.01% : 0.000043s : 305: predicate.switch_simplify 0.79% : 0.000011s : 100: predicate.tile_eliminate 0.77% : 0.000011s : 100: predicate.transpose_eliminate 1.52% : 0.000022s : 169: predicate.tuple_list_convert_item_index_to_positive 1.51% : 0.000021s : 169: predicate.tuple_list_get_item_const_eliminator 1.37% : 0.000019s : 169: predicate.tuple_list_get_item_depend_reorder 3.25% : 0.000046s : 268: predicate.tuple_list_get_item_eliminator 1.43% : 0.000020s : 169: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000035s : 266: predicate.tuple_list_set_item_eliminator 1.49% : 0.000021s : 171: predicate.tuple_to_list_eliminator_ 2.25% : 0.000032s : 284: predicate.updatestate_pure_node_eliminater 3.24% : 0.000046s : 381: predicate.updatestate_useless_node_eliminater 0.38% : 0.000005s : 41: predicate.value_based_eliminate 1.26% : 0.000018s : 130: predicate.virtual_dataset_eliminate 1.20% : 0.000017s : 128: predicate.virtual_output_eliminate 0.37% : 0.000005s : 41: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000364 5 5.55% : 0.000020s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.45% : 0.000344s : 4: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 -[INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.469 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 -----[others.] 2.118544 283 0.00% : 0.000050s : 1: ForceFp32Comm 0.60% : 0.012665s : 1: a1a2 0.00% : 0.000102s : 1: add_cache_embedding 0.00% : 0.000095s : 1: add_comm_op_reuse_tag 0.01% : 0.000211s : 1: add_recomputation 0.01% : 0.000186s : 1: assign_add_opt 0.01% : 0.000289s : 1: auto_monad 0.01% : 0.000134s : 1: auto_monad_reorder 0.00% : 0.000038s : 1: begin_end_overlap_inline 0.00% : 0.000042s : 1: bias_add_comm_swap 0.06% : 0.001294s : 1: bootstrap 0.00% : 0.000073s : 1: cconv 0.00% : 0.000082s : 1: comm_op_add_attrs 0.00% : 0.000039s : 1: control_data_broadcast_order 0.00% : 0.000073s : 1: convert_after_rewriter 0.01% : 0.000181s : 1: cse_after_recomputation 0.01% : 0.000311s : 1: dataset_repeat_opt 0.01% : 0.000250s : 1: distribtued_split 0.05% : 0.001020s : 1: eliminate_special_op_node 0.00% : 0.000078s : 1: environ_conv 0.00% : 0.000069s : 1: execute 0.00% : 0.000040s : 1: full_micro_interleaved_order_control 0.00% : 0.000063s : 1: get_jit_bprop_graph 0.00% : 0.000063s : 1: graph_reusing 0.00% : 0.000048s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000049s : 1: handle_group_info 0.61% : 0.012869s : 1: inline 0.04% : 0.000904s : 1: insert-virtual-dataset 0.00% : 0.000038s : 1: interleave_parallel_branches 0.00% : 0.000040s : 1: interleave_split_concat_branches 0.00% : 0.000042s : 1: label_fine_grained_interleaved_index 0.00% : 0.000039s : 1: label_micro_interleaved_index 0.05% : 0.000968s : 1: loop_unroll 0.00% : 0.000038s : 1: merge_cast_opt 0.00% : 0.000038s : 1: micro_interleaved_order_control 0.01% : 0.000113s : 1: offloading_packed_experts 0.03% : 0.000576s : 44: opt.transform.a1a2 0.00% : 0.000055s : 1: opt.transform.loop_unroll_optimizer 0.34% : 0.007124s : 123: opt.transform.opt_a 0.01% : 0.000212s : 1: opt.transform.opt_after_cconv 0.05% : 0.000999s : 27: opt.transform.opt_b 0.01% : 0.000254s : 1: opt.transform.opt_trans_graph 0.00% : 0.000103s : 3: opt.transform.special_op_eliminate 0.01% : 0.000199s : 4: opt.transform.symbol_engine_opt 1.49% : 0.031620s : 1: opt_a 0.04% : 0.000822s : 1: opt_after_cconv 0.13% : 0.002668s : 1: opt_b 1.93% : 0.040918s : 1: optimize 0.00% : 0.000074s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000063s : 1: order_py_execute_after_rewriter 0.00% : 0.000069s : 1: overlap_grad_flash_sp 0.00% : 0.000038s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000075s : 1: overlap_grad_ring_attention 0.00% : 0.000042s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000069s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000042s : 1: overlap_param_gather 0.00% : 0.000059s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000042s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000132s : 1: parallel-infer-symbol 0.00% : 0.000066s : 1: parallel-infer-symbol-second 0.00% : 0.000044s : 1: partial_unused_args_eliminate 0.00% : 0.000064s : 1: pipeline_parallel_scheduler 0.01% : 0.000120s : 1: pipeline_split 0.00% : 0.000104s : 1: pre_auto_parallel 0.00% : 0.000055s : 1: py_interpret_to_execute 0.00% : 0.000079s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000060s : 1: remove_cast_before_assign_add 0.01% : 0.000313s : 1: remove_dup_value 0.13% : 0.002681s : 2: renormalize.infer 0.07% : 0.001490s : 2: renormalize.specialize 0.00% : 0.000039s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000057s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000357s : 1: rewriter_after_opt_a 0.01% : 0.000178s : 2: rewriter_before_opt_a 0.00% : 0.000041s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000067s : 1: slice_recompute_activation 0.00% : 0.000049s : 1: split_layernorm_comm 0.00% : 0.000040s : 1: split_matmul_comm_elemetwise 0.00% : 0.000068s : 1: swap_dp_allreduce_reducescatter 0.03% : 0.000659s : 1: symbol_engine_optimizer 93.24% : 1.975405s : 1: task_emit 0.02% : 0.000416s : 1: tuple_transform 0.80% : 0.016939s : 1: type_inference 0.01% : 0.000274s : 1: validate [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.506 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[0], node name[Default/StreamSend-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_24{[0]: ValueNode StreamSend}], event id[2] [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.259.520 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1785] Run] End [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.531 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.542 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[1], node name[Default/StreamRecv-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_25{[0]: ValueNode StreamRecv}], event id[2] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.569 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.572 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[2], node name[Default/StreamSend-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_26{[0]: ValueNode StreamSend}], event id[3] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.593 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion in 126.75 us [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.600 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[3], node name[Default/StreamRecv-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_27{[0]: ValueNode StreamRecv}], event id[3] [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.592 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.600 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion in 152.63 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.616 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion is enabled. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.628 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[4], node name[Default/StreamSend-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_28{[0]: ValueNode StreamSend}], event id[4] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.629 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.259.632 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:239] SavePassesConfig] Running_passes: ['a1a2.r1.a_1', 'a1a2.r1.a_1.inline', 'opt_a.r1.auto_parallel', 'opt_a.r1.flash_sp', 'opt_a.r1.flash_sp_send_recv_attached', 'opt_a.r1.parallel', 'opt_a.r1.parallel_renormalize', 'opt_a.r1.receive_attached', 'opt_a.r1.virtual_dataset', 'opt_a.r1.virtual_dataset.virtual_dataset_eliminate', 'opt_a.r1.virtual_output', 'opt_a.r1.virtual_output.virtual_output_eliminate', 'opt_a.r2.a_1', 'opt_a.r2.a_1.tuple_list_get_item_eliminator', 'opt_a.r2.accelerated_algorithm', 'opt_a.r2.accelerated_algorithm.less_batch_normalization', 'opt_a.r2.auto_parallel', 'opt_a.r2.flash_sp', 'opt_a.r2.renormalize', 'opt_a.r3.auto_parallel'] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.627 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion is enabled. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.654 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[5], node name[Default/StreamRecv-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_29{[0]: ValueNode StreamRecv}], event id[4] [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.259.691 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1047] SaveCompiledGraph] Save compiled func graph(4_3_1___main___Net_construct_20) phase(train.1738915084037101568.281470258177424.0..)! [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.714 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[6], node name[Default/StridedSlice-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_30{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_x, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.259.724 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1065] SaveCompiledGraph] End save compiled func graph! [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.768 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[7], node name[Default/StridedSlice-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.259.767 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1326] CompileInner] [PROF]ParallelPostProcess costs 0.008 msec. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.774 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.259.789 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1154] CleanCompileRes] Clean compile resource start [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.259.798 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.259.797 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_1_deal_ref_output in 357.59 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.797 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion in 144.99 us [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.815 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[8], node name[Default/StridedSlice-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.824 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion is enabled. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:06.259.864 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:166] AclAfterCreateKernel] [PROF]AclAfterCreateKernel costs 0.788 msec. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.874 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[9], node name[Default/StridedSlice-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_33{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.920 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[10], node name[Default/StridedSlice-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.259.926 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:195] OptimizeACLGraphAfterCreateKernel] [PROF]OptimizeACLGraphAfterCreateKernel costs 0.867 msec. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.929 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion in 288.06 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.259.956 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion is enabled. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.259.965 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[11], node name[Default/StridedSlice-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.259.984 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion in 122.88 us [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.003 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[12], node name[Default/Mul-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35}] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.260.010 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion is enabled. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.032 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[13], node name[Default/StreamSend-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_37{[0]: ValueNode StreamSend}], event id[5] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.033 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.060 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[14], node name[Default/StreamRecv-op3], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_38{[0]: ValueNode StreamRecv}], event id[5] [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.096 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[15], node name[Default/AllGather-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36}], group[2-3358271254418797552] [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.124 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[16], node name[Default/StreamSend-op4], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_40{[0]: ValueNode StreamSend}], event id[0] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.260.135 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion in 99.24 us [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.152 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[17], node name[Default/StreamRecv-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_41{[0]: ValueNode StreamRecv}], event id[0] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.260.161 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion is enabled. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.187 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[18], node name[Default/Split-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.234 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[19], node name[Default/Concat-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2}] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.231 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1102] OptimizeExecutionOrder] [PROF]OptimizeExecutionOrder costs 0.276 msec. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.263 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[20], node name[Default/StreamSend-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_46{[0]: ValueNode StreamSend}], event id[6] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.260.264 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion in 283.58 us [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.269 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1064] CompileGraphImpl] [PROF]CreateKernel costs 164.228 msec. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.291 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[21], node name[Default/StreamRecv-op5], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_47{[0]: ValueNode StreamRecv}], event id[6] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.260.291 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion is enabled. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.260.312 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion in 125.79 us [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.325 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[22], node name[Default/AllGather-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43}], group[2-4190060298023907007] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.260.338 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion is enabled. [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.354 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[23], node name[Default/StreamSend-op6], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_49{[0]: ValueNode StreamSend}], event id[1] [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.380 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[24], node name[Default/StreamRecv-op6], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_50{[0]: ValueNode StreamRecv}], event id[1] [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.412 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[25], node name[Default/Split-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.449 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[26], node name[Default/Concat-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1}] [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.475 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[27], node name[Default/StreamSend-op7], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_55{[0]: ValueNode StreamSend}], event id[7] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.492 [mindspore/ccsrc/backend/common/session/session_basic.cc:1152] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.501 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[28], node name[Default/StreamRecv-op7], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_56{[0]: ValueNode StreamRecv}], event id[7] [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.260.488 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.519 [mindspore/ccsrc/debug/summary/summary.cc:52] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.533 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[29], node name[Default/AllGather-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}], group[2-5488101015797526856] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.260.533 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.537 [mindspore/ccsrc/debug/summary/summary.cc:57] RecurseSetSummaryNodesForAllGraphs] This function should be skipped on GE backend. [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.554 [mindspore/ccsrc/debug/data_dump/dump_json_parser.cc:1207] UpdateNeedDumpKernels] Get kernel dump flag [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.560 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[30], node name[Default/StreamSend-op8], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_58{[0]: ValueNode StreamSend}], event id[8] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.260.561 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion in 244.25 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.260.587 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops is enabled. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.260.592 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.596 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[31], node name[Default/StreamRecv-op8], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_59{[0]: ValueNode StreamRecv}], event id[8] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.614 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1183] PreprocessBeforeRun] Current Exec Order Algo in MS Context is bfs [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.623 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[32], node name[Default/StreamSend-op9], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_60{[0]: ValueNode StreamSend}], event id[9] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.260.669 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.682 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[33], node name[Default/StreamRecv-op9], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_61{[0]: ValueNode StreamRecv}], event id[9] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.685 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1046] DoStreamAssign] Status record: start stream assign, kernel_graph0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.260.683 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion in 314.75 us [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.260.707 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.708 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[34], node name[Default/StreamSend-op10], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_62{[0]: ValueNode StreamSend}], event id[10] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.725 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.260.718 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion is enabled. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.260.729 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.735 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[35], node name[Default/StreamRecv-op10], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_63{[0]: ValueNode StreamRecv}], event id[10] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.778 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op1 [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.771 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1115] CompileGraphImpl] [PROF]PreprocessBeforeRun costs 7.86 msec. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.260.815 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1161] CreateDeviceAddress] Status record: start create device address. graph id: 0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.821 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op2 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.848 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op3 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.260.866 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops in 256.19 us [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.883 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op4 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.260.917 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:318] GEUnifyMindIR] [PROF]GEUnifyMindIR costs 8.437 msec. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.923 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op5 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.968 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Mul-op0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.260.995 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.260.997 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion in 253.17 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.261.024 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion is enabled. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.261.095 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_0_erase_visit_attr in 288.9 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.261.296 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion in 244.54 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.261.323 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops is enabled. [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.261.404 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.261.476 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_1_deal_ref_output in 344.3 us [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:06.261.540 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:166] AclAfterCreateKernel] [PROF]AclAfterCreateKernel costs 0.77 msec. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.261.565 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops in 215.37 us [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.261.597 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:195] OptimizeACLGraphAfterCreateKernel] [PROF]OptimizeACLGraphAfterCreateKernel costs 0.842 msec. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.261.609 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:38] MarkRefGraph] Mark graph is ref graph: 0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.261.617 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:318] GEUnifyMindIR] [PROF]GEUnifyMindIR costs 8.895 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.261.703 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.261.714 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1175] CleanCompileRes] Clean compile resource end [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.261.752 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] End compiling 'Net.construct'. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.261.785 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1334] CompileInner] [PROF]CleanCompileRes costs 1.987 msec. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.261.804 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1335] CompileInner] Finish compiling. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:06.261.826 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1336] CompileInner] [PROF]compile_graph costs 2221.4 msec. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.261.892 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1102] OptimizeExecutionOrder] [PROF]OptimizeExecutionOrder costs 0.266 msec. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.261.928 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1064] CompileGraphImpl] [PROF]CreateKernel costs 165.715 msec. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.261.989 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1168] CreateDeviceAddress] Status record: end create device address. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.039 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1123] CompileGraphImpl] [PROF]CreateDeviceAddress costs 1.211 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.083 [mindspore/ccsrc/backend/common/session/session_basic.cc:1152] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.107 [mindspore/ccsrc/debug/summary/summary.cc:52] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.125 [mindspore/ccsrc/debug/summary/summary.cc:57] RecurseSetSummaryNodesForAllGraphs] This function should be skipped on GE backend. [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.142 [mindspore/ccsrc/debug/data_dump/dump_json_parser.cc:1207] UpdateNeedDumpKernels] Get kernel dump flag [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.262.136 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.172 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1177] CacheGraphOutputToFrontNodeWithIndex] Get graph backend output nodes. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.201 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1183] PreprocessBeforeRun] Current Exec Order Algo in MS Context is bfs [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.210 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1185] CacheGraphOutputToFrontNodeWithIndex] Get graph front output nodes. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.232 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1046] DoStreamAssign] Status record: start stream assign, kernel_graph0 [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:06.262.250 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_x, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.265 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op0 [INFO] SESSION(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.283 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1203] CacheGraphOutputToFrontNodeWithIndex] Backend output: Default/AllGather-op2 debug string: @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} node ptr:0x377bef60 with index: 0 map to front node: Default/AllGather-op2 debug string: @4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} node ptr: 0x37771bb0 with index: 0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.262.278 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unfold_inputs_for_special_nodes_pm_0_ascend_convert_tuple_input_to_dynamic_input in 575.01 us [INFO] PARALLEL(187764,ffff97badc10,python):2025-02-07-15:58:06.262.302 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_y, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.316 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op1 [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.331 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:766] CompileGraph] Status record: end compile graph. graph id: 0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.262.329 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 2 for node Default/AllGather-op0, group: 2-5004544844489628105 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.359 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op2 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.262.352 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:38] MarkRefGraph] Mark graph is ref graph: 0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.386 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op3 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.262.385 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.424 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op4 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.262.434 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op2 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.460 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op5 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.262.466 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op1 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.262.475 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_0_process_call_inline in 104.03 us [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.505 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Mul-op0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.262.532 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op0 [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.536 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1605] CompileGraphFromSegment] Compile cut segment, the cut node: @4_3_1___main___Net_construct_20:ValueNode_64{[0]: ValueNode Return, [1]: CNode_22} [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.577 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1129] CompileGraphs] [PROF]CompileSubGraph costs 188.122 msec. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.613 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:976] ExportCompileCacheKBK] Compile cache: disable by front compile cache config. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.675 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1143] CompileGraphs] Status record: construct the graph compiler info. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.711 [mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc:1001] Parse] Control node parser is not inited. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.767 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:859] Transform] Graph(kernel_graph_0) transforms actor begin, strategy:pipeline [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.262.795 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_1_seed_adapter in 282.45 us [INFO] UTILS(187764,ffff97badc10,python):2025-02-07-15:58:06.262.826 [mindspore/ccsrc/utils/dynamic_obfuscation/registry_opaque_predicate.cc:112] init_calling_count] calling_count_ has been initialized to 0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.262.866 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_2_insert_tensor_move_for_communication in 29.12 us [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.925 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:480] InitGraphParameterStore] Init graph parameter store: kernel_graph_0, outer size: 2 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.262.934 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1893] RunGraph] Status record: start run actor: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.967 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 0, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_x [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.262.989 [mindspore/ccsrc/runtime/device/pre_launch_comm.cc:200] PreLaunchCommKernel] No hccl kernel to pre launch [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.262.997 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 1, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_y [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.262.992 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_3_process partial inline in 96.63 us [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:06.263.018 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1918] RunGraph] [PROF]PreLaunchCommKernel costs 0.04 msec. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.038 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3902e070 for node:ValueNode 0 node addr:0x377789e0 device type:2 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.057 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:966] SpawnMultiPipelineActor] Enable runtime asynchronously launch kernel, default actor thread num 5, current actor thread num: 5 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.263.063 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_4_expander_fallback in 42.8 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.075 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3902e070 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.101 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x390414c0 for node:ValueNode 2 node addr:0x37778b10 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.093 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.117 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x390414c0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.263.109 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unfold_inputs_for_special_nodes_pm_0_ascend_convert_tuple_input_to_dynamic_input in 658.33 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.142 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x39041900 for node:ValueNode (0, 0, 0) node addr:0x3777c2e0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.158 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39041900 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.148 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.177 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x39041d20 for node:ValueNode 1 node addr:0x37778350 device type:2 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.182 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:394] operator()] Init defrag memory step freq. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.193 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39041d20 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.263.188 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_5_convert_pad_v3_paddings in 96.09 us [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.202 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:396] operator()] Config defrag memory step freq : . [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.216 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x39042510 for node:ValueNode (1, 1, 1) node addr:0x3777a240 device type:2 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.219 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:405] operator()] Defrag memory step freq : 100. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.231 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39042510 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.265 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x39042d00 for node:ValueNode (2, 2, 4) node addr:0x37779ef0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.280 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39042d00 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.277 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:638] PrepareDataForDeviceTensorStore] Prepare store data, input tensor size: 0, arg size: 2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.301 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x390434f0 for node:ValueNode (0, 2, 0) node addr:0x3777af30 device type:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.263.305 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_6_convert_pad_v3_grad_paddings in 89.67 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.317 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x390434f0 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.263.310 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:835] AllocGEFixMemory] Start AllocGEFixMemory [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.338 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x39043ce0 for node:ValueNode (2, 4, 4) node addr:0x3777b1c0 device type:2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.263.310 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_0_process_call_inline in 98.86 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.353 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39043ce0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.350 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:652] PrepareDataForDeviceTensorStore] prepare data for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.375 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x390444d0 for node:ValueNode (0, 0, 2) node addr:0x37779dc0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.391 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x390444d0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.386 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-0, debug name:ValueNode 2, front node:ValueNode 2 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.413 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 2 front node:ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.437 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338fe630 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.263.457 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 2 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:06.263.497 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.530 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:442] ChangeGraphMode] Enable kbk subgraph execute and set run mode for graph: 0 to GraphMode. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.263.542 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_7_resize_bilinear_add_attr in 207.08 us [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.550 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:462] TryEnableKbkSubGraphExecMode] Enable kbk subgraph execute mode for actor set: kernel_graph_0 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.263.575 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_8_backend_custom_depend in 7.52 us [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.606 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:391] TryEnableInputOptimize] Enable input optimize for actor set: kernel_graph_0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.263.605 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 3 for node Default/AllGather-op1, group: 2-4190060298023907007 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.263.617 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:237] GEBackendOptimizeACL] [PROF]GEBackendOptimizeACL costs 1.268 msec. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.649 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_y for host data source actor. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.263.659 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.263.675 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:137] OptimizeACLGraph] [PROF]OptimizeACLGraph costs 2.07 msec. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.703 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_y for front node:@4_3_1___main___Net_construct_20:param_y index:0 position:1 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.263.704 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op3 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.263.692 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_1_seed_adapter in 325.63 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.729 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_x for host data source actor. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.263.733 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.762 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_x for front node:@4_3_1___main___Net_construct_20:param_x index:0 position:0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.263.763 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_2_insert_tensor_move_for_communication in 32.49 us [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.263.763 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 2 for node Default/AllGather-op0, group: 2-5004544844489628105 [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.263.785 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.803 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2268] BuildDataPrepareActorForGraphParameterStore] Create data prepare actor: kernel_graph_0_DataPrepareActor [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.263.818 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.263.866 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op2 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.263.895 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op1 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.263.890 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_3_process partial inline in 94.5 us [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.927 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2224] BuildLoopCountActor] Create loop count actor: kernel_graph_0_LoopCountActor [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.263.956 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2257] BuildOutputActor] Create output actor: kernel_graph_0_OutputActor [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.040 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1509] CacheGraphOutputToActor] Cache graph 0 output node:Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} with index:0 to actor:kernel_graph0_SuperKernelActor, from front node:Default/AllGather-op2 debug string:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} with index:0 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.264.086 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_4_expander_fallback in 159.47 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.087 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-1, debug name:ValueNode (2, 2, 2), front node:ValueNode (2, 2, 2) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.126 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 2) front node:ValueNode (2, 2, 2) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.150 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338fe940 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.173 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 2) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.218 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.264.223 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_5_convert_pad_v3_paddings in 99.72 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.243 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x390404a0 origin ref count:2 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.264.246 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] StridedSlice select aclop kernel [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.271 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.289 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39040870 origin ref count:2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.264.344 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_6_convert_pad_v3_grad_paddings in 87.87 us [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.382 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1711] Link] [PROF]GraphSchedulerLinkSinkMode costs 0.204 msec. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.410 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.416 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-2, debug name:ValueNode (0, 2, 0), front node:ValueNode (0, 2, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.431 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph0_SuperKernelActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.448 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_LoopCountActor@ to actor:kernel_graph_0_OutputActor@ [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.450 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 2, 0) front node:ValueNode (0, 2, 0) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.465 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_OutputActor@ to actor:kernel_graph_0_DataPrepareActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.482 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:3713] LinkControlArrowForCopyActor] Link control arrow for copy actor start, copy actor size:0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.473 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338fef80 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.495 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 2, 0) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.512 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39049050 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.547 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:556] AddResultArrow] Add result arrow from actor:kernel_graph0_SuperKernelActor to actor:kernel_graph_0_OutputActor@ from kernel@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} device address:0x39049050 original ref count:18446744073709551615 ref count:18446744073709551615 dynamic ref count:2147483647 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.264.580 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_7_resize_bilinear_add_attr in 204.18 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.656 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.264.617 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_8_backend_custom_depend in 8.39 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.720 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.264.719 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:237] GEBackendOptimizeACL] [PROF]GEBackendOptimizeACL costs 1.531 msec. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.760 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.763 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-3, debug name:ValueNode 0, front node:ValueNode 0 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.794 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.264.789 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:137] OptimizeACLGraph] [PROF]OptimizeACLGraph costs 2.44 msec. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.796 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 0 front node:ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.819 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338ff440 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.831 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.264.838 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.264.863 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.264.901 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.264.897 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Mul select aclnn kernel [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.264.938 [mindspore/ops/kernel/ascend/opapi/aclnn_kernel_build.cc:77] IsEnabledAclnnDispatch] AllGather is not defined in opdef. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.057 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] AllGather select hccl kernel [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.066 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-4, debug name:ValueNode (1, 1, 1), front node:ValueNode (1, 1, 1) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.084 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.099 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (1, 1, 1) front node:ValueNode (1, 1, 1) [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.102 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op0 is view op and not support aclnn [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.119 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x338ffc30 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.265.088 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 4 for node Default/AllGather-op2, group: 2-12944936785892925600 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.129 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.265.113 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 3 for node Default/AllGather-op1, group: 2-5208665662337742843 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.139 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (1, 1, 1) [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.265.169 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.187 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.265.209 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op3 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.228 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.265.237 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op2 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.241 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Split select aclop kernel [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.298 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.338 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.348 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Concat select aclnn kernel [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.366 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-5, debug name:ValueNode 1, front node:ValueNode 1 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.388 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.397 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 1 front node:ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.420 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x33900110 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.431 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.441 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.447 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op1 is view op and not support aclnn [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.502 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.540 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.588 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.265.579 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] StridedSlice select aclop kernel [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.624 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.656 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.668 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-6, debug name:ValueNode (0, 0, 0), front node:ValueNode (0, 0, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.689 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.700 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 0) front node:ValueNode (0, 0, 0) [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.718 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:298] SelectKernel] [PROF]SelectKernel costs 2.006 msec. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.735 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.724 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x33900900 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.756 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 0) [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.748 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:302] PrintOpSelectedNum] Number of GE_KERNEL, INTERNAL_KERNEL, OPAPI_KERNEL, ACL_KERNEL, HCCL_KERNEL, HOST_KERNEL: [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.768 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.782 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:308] PrintOpSelectedNum] 0 0 3 8 3 0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.787 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 4, send_actor : 0x377c5060, recv_actor : 0x377c5a00. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.803 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 8, send_actor : 0x390576a0, recv_actor : 0x39057eb0. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.816 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 3, send_actor : 0x377a3dc0, recv_actor : 0x377c4770. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.830 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 11, send_actor : 0x3905b650, recv_actor : 0x3905c090. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.843 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 10, send_actor : 0x3905a1d0, recv_actor : 0x3905ac10. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.856 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 1, send_actor : 0x3904fe00, recv_actor : 0x39050740. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.869 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 5, send_actor : 0x377c6440, recv_actor : 0x377c6e80. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.882 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 6, send_actor : 0x3904dfa0, recv_actor : 0x3904e970. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.896 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 7, send_actor : 0x39052450, recv_actor : 0x39052d90. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.909 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 9, send_actor : 0x39058ed0, recv_actor : 0x39059790. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.265.922 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 2, send_actor : 0x39054080, recv_actor : 0x39054ee0. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.265.968 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_0_set_fracz_group_attr in 42.4 us [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.265.982 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-7, debug name:ValueNode (2, 4, 4), front node:ValueNode (2, 4, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.266.014 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 4, 4) front node:ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.266.035 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x339010f0 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.035 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1062] DoStreamAssign] Status record: end stream assign, kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.266.055 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 4, 4) [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.119 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.189 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op0 addr:0x39041170 type:48, kernel tensor addr:0x39040f00, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.230 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.266.248 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_1_insert_identity in 244.92 us [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.272 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.301 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 2, record_stream_id_ : 0. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.266.297 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_y front node:@4_3_1___main___Net_construct_20:param_y backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.298 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op1 addr:0x390449c0 type:48, kernel tensor addr:0x39044750, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.334 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:06.266.343 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_x front node:@4_3_1___main___Net_construct_20:param_x backend is weight:0 front is weight:0 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.357 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:3 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.376 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.391 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.415 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 3, record_stream_id_ : 0. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8a7fc0f0,python):2025-02-07-15:58:06.266.420 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:0, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.266.439 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph0_SuperKernelActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:1, sequential num:2001075757 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.446 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.468 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:4 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.470 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op2 addr:0x39044ed0 type:48, kernel tensor addr:0x39044c60, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.266.469 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Mul select aclnn kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.494 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.517 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 4, record_stream_id_ : 0. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.266.512 [mindspore/ops/kernel/ascend/opapi/aclnn_kernel_build.cc:77] IsEnabledAclnnDispatch] AllGather is not defined in opdef. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.529 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.549 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.570 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:5 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.266.572 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 4 for node Default/AllGather-op2, group: 2-16453000547691086251 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.598 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.605 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op3 addr:0x390453e0 type:48, kernel tensor addr:0x39045170, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.621 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 5, record_stream_id_ : 0. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.649 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] GE_ADPT(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.266.643 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.670 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.266.676 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] AllGather select hccl kernel [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.689 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op4 addr:0x390458f0 type:48, kernel tensor addr:0x39045680, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.699 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.722 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 0, record_stream_id_ : 2. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.266.715 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_2_insert_type_transform_op in 432.38 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.744 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.750 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.784 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:6 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.266.781 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op0 is view op and not support aclnn [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.266.790 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_3_graph_view_replace in 46.37 us [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.818 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.831 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op5 addr:0x39045e00 type:48, kernel tensor addr:0x39045b90, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.266.831 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:288] GEBackendOptimizeACLAfterKernelSelect] [PROF]GEBackendOptimizeACLAfterKernelSelect costs 1.02 msec. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.841 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 6, record_stream_id_ : 0. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.871 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.889 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.266.883 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:172] OptimizeACLGraphAfterKernelSelect] [PROF]OptimizeACLGraphAfterKernelSelect costs 1.078 msec. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.266.888 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.916 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.266.921 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 6_Default/StridedSlice-op0, front node: @4_3_1___main___Net_construct_20:param_x, with index: 0, addr index: 0, device type: 2 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.939 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 1, record_stream_id_ : 3. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.966 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.266.966 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.266.964 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 6_Default/StridedSlice-op0, outer index: 0, inner index:0, front node: @4_3_1___main___Net_construct_20:param_x [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.266.985 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:7 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.012 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.267.012 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Split select aclop kernel [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.023 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 0, inner index: 0, dynamic is 0 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.033 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 7, record_stream_id_ : 0. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.029 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Mul-op0 addr:0x39046310 type:48, kernel tensor addr:0x390460a0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.061 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.080 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:8 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.079 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_after_inline_pm_0_DropoutGenMask is enabled. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.081 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35} is thread safe. [INFO] GE_ADPT(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.080 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.107 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.106 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_after_inline_pm_0_DropoutGenMask in 1.7 us [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.130 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 8, record_stream_id_ : 2. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.154 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.160 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op0 addr:0x39046820 type:48, kernel tensor addr:0x390465b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 2)) [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.174 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:9 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.200 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.205 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36} is thread safe. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.213 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_1_cse in 83.27 us [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.226 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 9, record_stream_id_ : 3. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.267.230 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Concat select aclnn kernel [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.254 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.254 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_2_eliminate_maketuple_getitem in 17.07 us [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.274 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:10 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.277 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op0 addr:0x39046da0 type:48, kernel tensor addr:0x39046bc0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.278 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_3_insert_move_to in 0.57 us [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.275 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 6_Default/StridedSlice-op0, input index: 0, device tensor: 0x338fdbb0, ptr: 0x12c7fd801000, ref cnt: 1 [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.302 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.305 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op0 addr:0x39047290 type:48, kernel tensor addr:0x390470b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.314 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:355] GEAfterInlineOptimize] [PROF]GEAfterInlineOptimize costs 0.247 msec. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.300 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 0_Default/StreamSend-op0, task_id_on_stream : 1. [INFO] KERNEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.323 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 10, record_stream_id_ : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.343 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.347 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:391] InlineCallGraph] [PROF]InlineCallGraph costs 0.439 msec. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.322 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor6_Default/StridedSlice-op0, actor input: 0, graph input: 1, device tensor: 0x338fdbb0, ptr: 0x12c7fd801000, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.342 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.267.346 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op1 is view op and not support aclnn [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.350 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:931] PrintGraphExecuteOrder] Graph 0 execution order: [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.386 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 9_Default/StridedSlice-op3, front node: @4_3_1___main___Net_construct_20:param_y, with index: 0, addr index: 1, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.410 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op0 addr:0x39047710 type:48, kernel tensor addr:0x390474a0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.420 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 9_Default/StridedSlice-op3, outer index: 1, inner index:0, front node: @4_3_1___main___Net_construct_20:param_y [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.425 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.449 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[0], node name[Default/StreamSend-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_24{[0]: ValueNode StreamSend}], event id[2] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.454 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.448 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 1, inner index: 0, dynamic is 0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.450 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1062] DoStreamAssign] Status record: end stream assign, kernel_graph0 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.490 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[1], node name[Default/StreamRecv-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_25{[0]: ValueNode StreamRecv}], event id[2] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.513 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op1 addr:0x39047c20 type:48, kernel tensor addr:0x390479b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 4)) [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.521 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[2], node name[Default/StreamSend-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_26{[0]: ValueNode StreamSend}], event id[3] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.532 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.537 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.542 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:868] InlineSwitchGraph] [PROF]InlineSwitchGraph costs 0.167 msec. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.549 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[3], node name[Default/StreamRecv-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_27{[0]: ValueNode StreamRecv}], event id[3] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.550 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43} is thread safe. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.576 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[4], node name[Default/StreamSend-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_28{[0]: ValueNode StreamSend}], event id[4] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.568 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 2, actor name : 1_Default/StreamRecv-op0, task_id_on_stream : 1. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.573 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1055] CompileGraphImpl] [PROF]OptimizeGraph costs 5.979 msec. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.601 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[5], node name[Default/StreamRecv-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_29{[0]: ValueNode StreamRecv}], event id[4] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.591 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.607 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 9_Default/StridedSlice-op3, input index: 0, device tensor: 0x338fd7e0, ptr: 0x12c7fd801400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.622 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op1 addr:0x390481a0 type:48, kernel tensor addr:0x39047fc0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.612 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.634 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor9_Default/StridedSlice-op3, actor input: 0, graph input: 0, device tensor: 0x338fd7e0, ptr: 0x12c7fd801400, ref cnt: 1 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.638 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.649 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op1 addr:0x390486c0 type:48, kernel tensor addr:0x390484e0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.648 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.661 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[6], node name[Default/StridedSlice-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_30{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_x, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.267.672 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:298] SelectKernel] [PROF]SelectKernel costs 2.843 msec. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.683 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.688 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.711 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 2, record_stream_id_ : 0. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.712 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[7], node name[Default/StridedSlice-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.267.703 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:302] PrintOpSelectedNum] Number of GE_KERNEL, INTERNAL_KERNEL, OPAPI_KERNEL, ACL_KERNEL, HCCL_KERNEL, HOST_KERNEL: [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.721 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph0_SuperKernelActor and check running condition:1, sequential num:2001075757 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.743 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.267.740 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:308] PrintOpSelectedNum] 0 0 3 8 3 0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.762 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op1 addr:0x39048b40 type:48, kernel tensor addr:0x390488d0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.764 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:3 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.763 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[8], node name[Default/StridedSlice-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.267.749 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.728 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8a7fc0f0,python):2025-02-07-15:58:06.267.738 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:317] RunOpData] Actor(kernel_graph_0_OutputActor) receive the input op data and output position:0 device tensor:0x33905c40 ptr:0 ref count:18446744073709551615 origin ref count:18446744073709551615 dynamic ref count:2147483647 from memory pool:0 output node:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} index:0 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.796 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.806 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.787 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 2_Default/StreamSend-op1, task_id_on_stream : 2. [INFO] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.762 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:57] IncreaseLoopCount] Loop count actor(kernel_graph_0_LoopCountActor) running, loop count: 1, current count: 1, total running count: 1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.809 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:06.267.821 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.819 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 3, record_stream_id_ : 0. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.816 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[9], node name[Default/StridedSlice-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_33{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8a7fc0f0,python):2025-02-07-15:58:06.267.837 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:388] CreateOutputTensor] Create output tensor, output node: Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}, output index: 0, output position: 0, output kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.847 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.863 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op2 addr:0x39049050 type:48, kernel tensor addr:0x39048de0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.866 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:4 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.875 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[10], node name[Default/StridedSlice-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.892 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.267.899 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8a7fc0f0,python):2025-02-07-15:58:06.267.894 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:470] CreateOutputTensor] Create device tensor:0xfffe740095a0, size: 512 type:48 output node:Default/AllGather-op2 output index:0 output position:0, origin output device tensor: 0x33905c40 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.914 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 4, record_stream_id_ : 0. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.922 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[11], node name[Default/StridedSlice-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.942 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.267.933 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_0_set_fracz_group_attr in 67.45 us [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.945 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op1 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.960 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[12], node name[Default/Mul-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35}] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.963 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:5 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.972 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 3, actor name : 3_Default/StreamRecv-op1, task_id_on_stream : 1. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.267.990 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[13], node name[Default/StreamSend-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_37{[0]: ValueNode StreamSend}], event id[5] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.267.996 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.267.995 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 2. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.018 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[14], node name[Default/StreamRecv-op3], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_38{[0]: ValueNode StreamRecv}], event id[5] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.015 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op1 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.019 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 5, record_stream_id_ : 0. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.048 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.056 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[15], node name[Default/AllGather-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36}], group[2-5004544844489628105] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.052 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op1 input kernel:Default/StridedSlice-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.068 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.080 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39041170 origin ref count:2 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.085 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[16], node name[Default/StreamSend-op4], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_40{[0]: ValueNode StreamSend}], event id[0] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.096 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.115 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 0, record_stream_id_ : 2. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.112 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[17], node name[Default/StreamRecv-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_41{[0]: ValueNode StreamRecv}], event id[0] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.105 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.118 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op2 input kernel:Default/StridedSlice-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.136 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x390449c0 origin ref count:2 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.148 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[18], node name[Default/Split-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2}] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.132 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 4_Default/StreamSend-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.149 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebu[INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.140 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.180 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op4 input kernel:Default/StridedSlice-op3 need copy:1 for actor:kernel_graph0_SuperKernelActor@ g] Begin launch kernel: Default/StreamSend-op2 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.177 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:6 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.197 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x390453e0 origin ref count:2 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.194 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[19], node name[Default/Concat-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2}] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.203 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.229 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op5 input kernel:Default/StridedSlice-op4 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.268.221 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_1_insert_identity in 250.84 us [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.231 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[20], node name[Default/StreamSend-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_46{[0]: ValueNode StreamSend}], event id[6] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.224 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 6, record_stream_id_ : 0. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.246 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x390458f0 origin ref count:2 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.260 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[21], node name[Default/StreamRecv-op5], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_47{[0]: ValueNode StreamRecv}], event id[6] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.251 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.270 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:1 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.268.256 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.278 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op2 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.293 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[22], node name[Default/AllGather-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43}], group[2-4190060298023907007] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.294 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.300 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op2 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.268.291 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.292 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39044ed0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.331 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op5 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.322 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[23], node name[Default/StreamSend-op6], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_49{[0]: ValueNode StreamSend}], event id[1] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.348 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[24], node name[Default/StreamRecv-op6], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_50{[0]: ValueNode StreamRecv}], event id[1] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.313 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 1, record_stream_id_ : 3. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.338 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.357 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:7 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.325 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 4, actor name : 5_Default/StreamRecv-op2, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.344 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.360 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.347 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39045e00 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.371 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op0 input kernel:Default/Mul-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.268.371 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_pool.cc:423] BestFitAscendMemoryPool] BestFitAscendMemoryPool constructed, older memory allocator is enabled. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.380 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[25], node name[Default/Split-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.382 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.401 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 7, record_stream_id_ : 0. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.386 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39046310 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.409 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op0 input kernel:Default/AllGather-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.423 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39046820 origin ref count:2 [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.268.413 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:167] Initialize] Skip initialization of memory pool since init size is not configured. [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.418 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[26], node name[Default/Concat-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1}] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.443 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[27], node name[Default/StreamSend-op7], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_55{[0]: ValueNode StreamSend}], event id[7] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.430 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.450 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:8 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.444 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.459 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39046da0 origin ref count:2 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.268.454 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.468 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[28], node name[Default/StreamRecv-op7], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_56{[0]: ValueNode StreamRecv}], event id[7] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.459 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op2 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.474 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.505 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 6_Default/StridedSlice-op0, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.478 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.497 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[29], node name[Default/AllGather-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}], group[2-12944936785892925600] [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.493 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 8, record_stream_id_ : 2. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.519 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.493 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39047290 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.517 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op1 input kernel:Default/Concat-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.530 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39047710 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:06.268.526 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op0 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.538 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:9 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.522 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[30], node name[Default/StreamSend-op8], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_58{[0]: ValueNode StreamSend}], event id[8] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.549 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[31], node name[Default/StreamR[DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.554 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op1 input kernel:Default/AllGather-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.568 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39047c20 origin ref count:2 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.563 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] ecv-op8], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_59{[0]: ValueNode StreamRecv}], event id[8] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.588 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.582 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 9, record_stream_id_ : 3. [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.605 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.593 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[32], node name[Default/StreamSend-op9], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_60{[0]: ValueNode StreamSend}], event id[9] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.620 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[33], node name[Default/StreamRecv-op9], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_61{[0]: ValueNode StreamRecv}], event id[9] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.611 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x390481a0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.659 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.623 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:10 [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.675 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[34], node name[Default/StreamSend-op10], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_62{[0]: ValueNode StreamSend}], event id[10] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.680 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x390486c0 origin ref count:2 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.687 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.710 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[35], node name[Default/StreamRecv-op10], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_63{[0]: ValueNode StreamRecv}], event id[10] [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.705 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op2 input kernel:Default/Concat-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.720 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x39048b40 origin ref count:2 [INFO] KERNEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.709 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 10, record_stream_id_ : 4. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.748 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1115] CompileGraphImpl] [PROF]PreprocessBeforeRun costs 8.155 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.736 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:931] PrintGraphExecuteOrder] Graph 0 execution order: [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.769 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[y] debug_name: @kernel_graph0:param_y use count is: 1 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.268.756 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_2_insert_type_transform_op in 495.11 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.794 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[x] debug_name: @kernel_graph0:param_x use count is: 1 [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.268.798 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1161] CreateDeviceAddress] Status record: start create device address. graph id: 0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.816 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1603] AddControlArrowForNoInputActor] Add control arrow for no input arrow actor: kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.835 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph0_SuperKernelActor@ [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.840 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[0], node name[Default/StreamSend-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_24{[0]: ValueNode StreamSend}], event id[2] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.268.843 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_3_graph_view_replace in 49.49 us [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.878 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[1], node name[Default/StreamRecv-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_25{[0]: ValueNode StreamRecv}], event id[2] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.268.891 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:288] GEBackendOptimizeACLAfterKernelSelect] [PROF]GEBackendOptimizeACLAfterKernelSelect costs 1.118 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.908 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[2], node name[Default/StreamSend-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_26{[0]: ValueNode StreamSend}], event id[3] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.935 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[3], node name[Default/StreamRecv-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_27{[0]: ValueNode StreamRecv}], event id[3] [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.268.938 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:887] Transform] [PROF]GraphSchedulerLink costs 4.867 msec. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.268.948 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:172] OptimizeACLGraphAfterKernelSelect] [PROF]OptimizeACLGraphAfterKernelSelect costs 1.182 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.962 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[4], node name[Default/StreamSend-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_28{[0]: ValueNode StreamSend}], event id[4] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.268.988 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[5], node name[Default/StreamRecv-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_29{[0]: ValueNode StreamRecv}], event id[4] [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.019 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 1_actor_set_kernel_graph_0_invalid_data_arrow_elimination in 1.42003 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.035 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.051 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[6], node name[Default/StridedSlice-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_30{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_x, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.062 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 2_actor_set_kernel_graph_0_multi_actor_fusion in 18.55 us [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.083 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 3_actor_set_kernel_graph_0_batch_data_arrow_fusion in 1.15007 us [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.101 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[7], node name[Default/StridedSlice-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.106 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:904] Transform] Graph(kernel_graph_0) transforms actor end. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.148 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[8], node name[Default/StridedSlice-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.161 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_after_inline_pm_0_DropoutGenMask is enabled. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.181 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:354] Init] kernel_graph_0 has the parameter input num: 2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.192 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_after_inline_pm_0_DropoutGenMask in 1.75 us [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.194 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[9], node name[Default/StridedSlice-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_33{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.232 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1153] CompileGraphs] [PROF]GraphScheduler costs 6.534 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.249 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[10], node name[Default/StridedSlice-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.259 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:29] operator()] Create MultiStreamController. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.291 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:40] Refresh] Stream manager initialize, device_context : 0x249fc190, stream_size : 5. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.294 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[11], node name[Default/StridedSlice-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.310 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:214] Resize] Task id on stream manager initialize : 0, stream_size : 5. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.311 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_1_cse in 88.59 us [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.333 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[12], node name[Default/Mul-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35}] [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.336 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1166] CompileGraphs] [PROF]compile_backend_graph costs 1733.03 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.362 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[13], node name[Default/StreamSend-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_37{[0]: ValueNode StreamSend}], event id[5] [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.360 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_2_eliminate_maketuple_getitem in 18.85 us [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.366 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1169] CompileGraphs] Status record: end compile function graph: 4_3_1___main___Net_construct_20, produce actor: kernel_graph_0 [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.389 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[14], node name[Default/StreamRecv-op3], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_38{[0]: ValueNode StreamRecv}], event id[5] [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.393 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end task_emit action. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.387 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_3_insert_move_to in 0.63 us [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.413 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.425 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[15], node name[Default/AllGather-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36}], group[2-5004544844489628105] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.420 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.432 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:355] GEAfterInlineOptimize] [PROF]GEAfterInlineOptimize costs 0.284 msec. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.445 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:281] SetLoopCount] Change vm_loop_flag to 0, set loop_size to 1 [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.453 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[16], node name[Default/StreamSend-op4], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_40{[0]: ValueNode StreamSend}], event id[0] [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.477 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start execute action. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.470 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:391] InlineCallGraph] [PROF]InlineCallGraph costs 0.493 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.480 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[17], node name[Default/StreamRecv-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_41{[0]: ValueNode StreamRecv}], event id[0] [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.506 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end execute action. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.513 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[18], node name[Default/Split-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.269.523 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.552 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[19], node name[Default/Concat-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2}] [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.554 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.586 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[20], node name[Default/StreamSend-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_46{[0]: ValueNode StreamSend}], event id[6] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.584 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.612 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[21], node name[Default/StreamRecv-op5], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_47{[0]: ValueNode StreamRecv}], event id[6] [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.611 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.637 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.643 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[22], node name[Default/AllGather-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43}], group[2-5208665662337742843] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.670 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[23], node name[Default/StreamSend-op6], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_49{[0]: ValueNode StreamSend}], event id[1] [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.684 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:868] InlineSwitchGraph] [PROF]InlineSwitchGraph costs 0.183 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.695 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[24], node name[Default/StreamRecv-op6], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_50{[0]: ValueNode StreamRecv}], event id[1] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.727 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[25], node name[Default/Split-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.719 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1055] CompileGraphImpl] [PROF]OptimizeGraph costs 7.392 msec. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.765 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[26], node name[Default/Concat-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1}] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.762 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.787 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.791 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[27], node name[Default/StreamSend-op7], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_55{[0]: ValueNode StreamSend}], event id[7] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.818 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[28], node name[Default/StreamRecv-op7], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_56{[0]: ValueNode StreamRecv}], event id[7] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.811 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.799 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.848 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[29], node name[Default/AllGather-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}], group[2-16453000547691086251] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.874 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[30], node name[Default/StreamSend-op8], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_58{[0]: ValueNode StreamSend}], event id[8] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.899 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[31], node name[Default/StreamRecv-op8], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_59{[0]: ValueNode StreamRecv}], event id[8] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.929 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[32], node name[Default/StreamSend-op9], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_60{[0]: ValueNode StreamSend}], event id[9] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.269.924 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.925 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.269.920 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1168] CreateDeviceAddress] Status record: end create device address. graph id: 0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.950 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.955 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[33], node name[Default/StreamRecv-op9], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_61{[0]: ValueNode StreamRecv}], event id[9] [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.269.969 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1123] CompileGraphImpl] [PROF]CreateDeviceAddress costs 1.16 msec. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.269.973 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.269.982 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[34], node name[Default/StreamSend-op10], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_62{[0]: ValueNode StreamSend}], event id[10] [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.270.007 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[35], node name[Default/StreamRecv-op10], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_63{[0]: ValueNode StreamRecv}], event id[10] [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.270.040 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1115] CompileGraphImpl] [PROF]PreprocessBeforeRun costs 7.86 msec. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.270.084 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1161] CreateDeviceAddress] Status record: start create device address. graph id: 0 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.270.082 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.087 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1177] CacheGraphOutputToFrontNodeWithIndex] Get graph backend output nodes. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.270.107 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.126 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1185] CacheGraphOutputToFrontNodeWithIndex] Get graph front output nodes. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.270.132 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] SESSION(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.201 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1203] CacheGraphOutputToFrontNodeWithIndex] Backend output: Default/AllGather-op2 debug string: @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} node ptr:0x30cb7bf0 with index: 0 map to front node: Default/AllGather-op2 debug string: @4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} node ptr: 0x30c6aff0 with index: 0 [INFO] GE_ADPT(187764,fffe8affd0f0,python):2025-02-07-15:58:06.270.215 [mindspore/ccsrc/transform/acl_ir/acl_allocator.cc:104] RegisterAllocator] Register AclAllocator [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.247 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:766] CompileGraph] Status record: end compile graph. graph id: 0 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.270.265 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice TotalTime = 1.82234, [21] [bootstrap]: 0.00154899 [type_inference]: 0.0172957 [auto_monad]: 0.00025203 [graph_reusing]: 5.846e-05 [inline]: 0.0150259, [2] [rewriter_before_opt_a]: 0.00012251 [a1a2]: 0.0147689, [2] [Cycle 1]: 0.00187685, [11] [expand_dump_flag]: 6.32997e-06 [switch_simplify]: 6.09e-05 [loop_unroll]: 2.84299e-05 [a_1]: 0.00035015 [recompute_prepare]: 2.67e-05 [updatestate_depend_eliminate]: 1.41801e-05 [updatestate_assign_eliminate]: 5.20993e-06 [updatestate_loads_eliminate]: 4.34997e-06 [parameter_eliminate]: 1.11799e-05 [a_2]: 0.0006319 [parallel_inline_pass]: 2.645e-05 [Cycle 2]: 0.0014369, [11] [expand_dump_flag]: 2.34996e-06 [switch_simplify]: 2.269e-05 [loop_unroll]: 2.155e-05 [a_1]: 0.0001455 [recompute_prepare]: 2.31001e-05 [updatestate_depend_eliminate]: 8.72998e-06 [updatestate_assign_eliminate]: 4.22995e-06 [updatestate_loads_eliminate]: 3.69002e-06 [parameter_eliminate]: 4.33996e-06 [a_2]: 0.00071173 [parallel_inline_pass]: 2.79901e-05 [parallel-infer-symbol]: 0.00013894 [pre_auto_parallel]: 0.00011121 [insert-virtual-dataset]: 0.00135071 [parallel-infer-symbol-second]: 5.00701e-05 [dataset_repeat_opt]: 0.00036406 [pipeline_split]: 0.00012572 [optimize]: 0.0500815, [52] [py_interpret_to_execute]: 6.27e-05 [rewriter_before_opt_a]: 8.05201e-05 [opt_a]: 0.0407288, [3] [Cycle 1]: 0.021938, [46] [expand_dump_flag]: 3.10992e-06 [switch_simplify]: 4.21901e-05 [loop_unroll]: 2.721e-05 [a_1]: 0.00032741 [recompute_prepare]: 3.535e-05 [updatestate_depend_eliminate]: 1.361e-05 [updatestate_assign_eliminate]: 6.49004e-06 [updatestate_loads_eliminate]: 6.66e-06 [parameter_eliminate]: 7.87003e-06 [a_2]: 0.00074822 [accelerated_algorithm]: 3.355e-05 [shard]: 3.63999e-05 [meta_shard_fg_expand]: 6.73998e-06 [shard_inline]: 3.217e-05 [auto_parallel]: 5.665e-05 [parallel]: 0.0122619 [flash_sp]: 5.233e-05 [merge_comm]: 3.79001e-05 [allreduce_fusion]: 2.863e-05 [matmul_add_comm_reduction]: 3.639e-05 [allreduce_slice_to_reducescatter]: 8.10018e-07 [virtual_shard_identity]: 8.816e-05 [virtual_dataset]: 0.00014585 [get_grad_eliminate_]: 6.58099e-05 [virtual_output]: 9.31701e-05 [merge_forward]: 3.316e-05 [cell_reuse_recompute_pass]: 6.34999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00014216 [before_grad]: 8.56799e-05 [inplace_validation]: 2.96801e-05 [parallel_renormalize]: 0.00365693 [update_top_fg]: 1.49e-06 [cast_eliminate]: 8.912e-05 [meta_fg_expand]: 2.82701e-05 [inplace_validation_after_expand]: 4.731e-05 [flash_sp_send_recv_attached]: 5.944e-05 [receive_attached]: 2.431e-05 [after_resolve]: 7.647e-05 [a_after_grad]: 9.22399e-05 [special_op_eliminate]: 6.199e-05 [renormalize]: 3.99305e-08 [add_forward_monad_depend]: 1.218e-05 [auto_monad_grad]: 4.82006e-06 [auto_monad_eliminator]: 6.54e-05 [cse]: 0.00022132 [a_3]: 0.00068934 [Cycle 2]: 0.0111195, [46] [expand_dump_flag]: 4.21004e-06 [switch_simplify]: 6.935e-05 [loop_unroll]: 6.123e-05 [a_1]: 0.00141995 [recompute_prepare]: 7.618e-05 [updatestate_depend_eliminate]: 4.232e-05 [updatestate_assign_eliminate]: 2.45899e-05 [updatestate_loads_eliminate]: 2.35001e-05 [parameter_eliminate]: 7.15e-06 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.270.290 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [a_2]: 0.0014579 [accelerated_algorithm]: 0.000167 [shard]: 4.18499e-05 [meta_shard_fg_expand]: 1.94099e-05 [shard_inline]: 6.46299e-05 [auto_parallel]: 8.00099e-05 [parallel]: 1.825e-05 [flash_sp]: 4.264e-05 [merge_comm]: 3.3e-05 [allreduce_fusion]: 2.613e-05 [matmul_add_comm_reduction]: 2.771e-05 [allreduce_slice_to_reducescatter]: 8.30041e-07 [virtual_shard_identity]: 7.249e-05 [virtual_dataset]: 6.276e-05 [get_grad_eliminate_]: 5.95399e-05 [virtual_output]: 5.992e-05 [merge_forward]: 3.002e-05 [cell_reuse_recompute_pass]: 8.27003e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00014519 [before_grad]: 8.7e-05 [inplace_validation]: 2.802e-05 [parallel_renormalize]: 4.00003e-07 [update_top_fg]: 9.00007e-07 [cast_eliminate]: 6.64099e-05 [meta_fg_expand]: 3.06499e-05 [inplace_validation_after_expand]: 4.001e-05 [flash_sp_send_recv_attached]: 5.81006e-06 [receive_attached]: 3.41993e-06 [after_resolve]: 7.49499e-05 [a_after_grad]: 8.901e-05 [special_op_eliminate]: 6.177e-05 [renormalize]: 0.00286333 [add_forward_monad_depend]: 1.093e-05 [auto_monad_grad]: 4.09002e-06 [auto_monad_eliminator]: 6.198e-05 [cse]: 0.00017029 [a_3]: 0.00072126 [Cycle 3]: 0.00753778, [46] [expand_dump_flag]: 3.51993e-06 [switch_simplify]: 6.439e-05 [loop_unroll]: 5.721e-05 [a_1]: 0.0012369 [recompute_prepare]: 6.763e-05 [updatestate_depend_eliminate]: 4.008e-05 [updatestate_assign_eliminate]: 2.681e-05 [updatestate_loads_eliminate]: 2.50801e-05 [parameter_eliminate]: 7.91997e-06 [a_2]: 0.00130031 [accelerated_algorithm]: 8.02099e-05 [shard]: 0.0001631 [meta_shard_fg_expand]: 2.154e-05 [shard_inline]: 6.25199e-05 [auto_parallel]: 8.199e-05 [parallel]: 1.699e-05 [flash_sp]: 2.21992e-06 [merge_comm]: 3.43099e-05 [allreduce_fusion]: 2.76699e-05 [matmul_add_comm_reduction]: 3.577e-05 [allreduce_slice_to_reducescatter]: 9.69972e-07 [virtual_shard_identity]: 6.786e-05 [virtual_dataset]: 0.00012026 [get_grad_eliminate_]: 0.00012207 [virtual_output]: 5.534e-05 [merge_forward]: 3.088e-05 [cell_reuse_recompute_pass]: 7.13009e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00013053 [before_grad]: 8.60001e-05 [inplace_validation]: 2.756e-05 [parallel_renormalize]: 1.30036e-07 [update_top_fg]: 1.55997e-06 [cast_eliminate]: 6.141e-05 [meta_fg_expand]: 2.96299e-05 [inplace_validation_after_expand]: 3.649e-05 [flash_sp_send_recv_attached]: 4.82006e-06 [receive_attached]: 2.78e-06 [after_resolve]: 6.818e-05 [a_after_grad]: 8.637e-05 [special_op_eliminate]: 5.76399e-05 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.42998e-06 [auto_monad_grad]: 4.71994e-06 [auto_monad_eliminator]: 5.554e-05 [cse]: 0.0001691 [a_3]: 0.00058941 [py_interpret_to_execute_after_opt_a]: 8.69901e-05 [slice_cell_reuse_recomputed_activation]: 3.41299e-05 [rewriter_after_opt_a]: 0.00044985 [convert_after_rewriter]: 7.23e-05 [order_py_execute_after_rewriter]: 7.061e-05 [opt_b]: 0.00250404, [1] [Cycle 1]: 0.0024603, [7] [b_1]: 0.00186244 [b_2]: 6.394e-05 [updatestate_depend_eliminate]: 3.101e-05 [updatestate_assign_eliminate]: 2.404e-05 [updatestate_loads_eliminate]: 2.36999e-05 [renormalize]: 1.01991e-06 [cse]: 0.00014228 [optimize_parallel_all_gather_comm]: 7.753e-05 [overlap_param_gather]: 3.308e-05 [cconv]: 8.63801e-05 [loop_unroll]: 0.00100321 [opt_after_cconv]: 0.00077763, [1] [Cycle 1]: 0.00073991, [7] [c_1]: 0.00022424 [parameter_eliminate]: 5.25999e-06 [updatestate_depend_eliminate]: 3.179e-05 [updatestate_assign_eliminate]: 2.36999e-05 [updatestate_loads_eliminate]: 2.192e-05 [cse]: 0.00014009 [renormalize]: 9.49949e-07 [remove_dup_value]: 0.00033806 [tuple_transform]: 0.00042447, [1] [Cycle 1]: 0.00038478, [2] [d_1]: 0.00029511 [renormalize]: 5.50062e-07 [partial_unused_args_eliminate]: 3.541e-05 [add_cache_embedding]: 9.72899e-05 [add_recomputation]: 0.0002308 [cse_after_recomputation]: 0.00017026, [1] [Cycle 1]: 0.00012962, [1] [cse]: 8.038e-05 [environ_conv]: 8.582e-05 [swap_dp_allreduce_reducescatter]: 5.594e-05 [bias_add_comm_swap]: 3.231e-05 [label_micro_interleaved_index]: 2.94399e-05 [label_fine_grained_interleaved_index]: 3.146e-05 [merge_cast_opt]: 2.753e-05 [slice_recompute_activation]: 5.771e-05 [micro_interleaved_order_control]: 3.027e-05 [assign_add_opt]: 0.00021283 [ForceFp32Comm]: 3.40199e-05 [remove_cast_before_assign_add]: 5.22101e-05 [full_micro_interleaved_order_control]: 3.48e-05 [reorder_send_recv_between_fp_bp]: 3.227e-05 [comm_op_add_attrs]: 9.24601e-05 [add_comm_op_reuse_tag]: 9.87899e-05 [interleave_split_concat_branches]: 3.30199e-05 [interleave_parallel_branches]: 3.132e-05 [overlap_opt_shard_in_pipeline]: 7.26701e-05 [overlap_opt_shard_grad_in_pipeline]: 4.43601e-05 [control_data_broadcast_order]: 3.09e-05 [grouped_pairwise_exchange_alltoall]: 4.428e-05 [offloading_packed_experts]: 0.00014433 [overlap_recompute_and_grad_model_parallel]: 3.14e-05 [overlap_grad_matmul_and_grad_allreduce]: 2.908e-05 [overlap_recompute_allgather_and_fa_grad]: 4.572e-05 [overlap_grad_ring_attention]: 7.309e-05 [overlap_grad_flash_sp]: 6.181e-05 [begin_end_overlap_inline]: 4.23701e-05 [split_matmul_comm_elemetwise]: 3.27601e-05 [split_layernorm_comm]: 2.98599e-05 [handle_group_info]: 3.469e-05 [symbol_engine_optimizer]: 0.00058287, [1] [Cycle 1]: 0.00054531, [6] [build]: 4.07699e-05 [elim_shapecalc]: 6.161e-05 [elim_not_effective]: 7.14699e-05 [opt_reshape]: 5.00201e-05 [fold_const_symbol]: 6.91899e-05 [renormalize]: 4.69969e-07 [pipeline_parallel_scheduler]: 5.078e-05 [auto_monad_reorder]: 0.00012854 [get_jit_bprop_graph]: 4.97199e-05 [rewriter_after_jit_bprop_graph]: 4.29701e-05 [eliminate_special_op_node]: 0.00105498 [distribtued_split]: 0.00028448 [validate]: 0.00017382 [task_emit]: 1.73357 [execute]: 6.022e-05 Sums bootstrap : 0.001549s : 0.09% type_inference : 0.017296s : 0.96% auto_monad : 0.000252s : 0.01% graph_reusing : 0.000058s : 0.00% inline.rewriter_before_opt_a : 0.000123s : 0.01% inline.a1a2.expand_dump_flag : 0.000009s : 0.00% inline.a1a2.switch_simplify : 0.000084s : 0.00% inline.a1a2.loop_unroll : 0.000050s : 0.00% inline.a1a2.a_1 : 0.000496s : 0.03% inline.a1a2.recompute_prepare : 0.000050s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000023s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000009s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000008s : 0.00% inline.a1a2.parameter_eliminate : 0.000016s : 0.00% inline.a1a2.a_2 : 0.001344s : 0.07% inline.a1a2.parallel_inline_pass : 0.000054s : 0.00% parallel-infer-symbol : 0.000139s : 0.01% pre_auto_parallel : 0.000111s : 0.01% insert-virtual-dataset : 0.001351s : 0.08% parallel-infer-symbol-second : 0.000050s : 0.00% dataset_repeat_opt : 0.000364s : 0.02% pipeline_split : 0.000126s : 0.01% optimize.py_interpret_to_execute : 0.000063s : 0.00% optimize.rewriter_before_opt_a : 0.000081s : 0.00% optimize.opt_a.expand_dump_flag : 0.000011s : 0.00% optimize.opt_a.switch_simplify : 0.000176s : 0.01% optimize.opt_a.loop_unroll : 0.000146s : 0.01% optimize.opt_a.a_1 : 0.002984s : 0.17% optimize.opt_a.recompute_prepare : 0.000179s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000096s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000058s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000055s : 0.00% optimize.opt_a.parameter_eliminate : 0.000023s : 0.00% optimize.opt_a.a_2 : 0.003506s : 0.19% optimize.opt_a.accelerated_algorithm : 0.000281s : 0.02% optimize.opt_a.shard : 0.000241s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000048s : 0.00% optimize.opt_a.shard_inline : 0.000159s : 0.01% optimize.opt_a.auto_parallel : 0.000219s : 0.01% optimize.opt_a.parallel : 0.012297s : 0.68% optimize.opt_a.flash_sp : 0.000097s : 0.01% optimize.opt_a.merge_comm : 0.000105s : 0.01% optimize.opt_a.allreduce_fusion : 0.000082s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000100s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000003s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000229s : 0.01% optimize.opt_a.virtual_dataset : 0.000329s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000247s : 0.01% optimize.opt_a.virtual_output : 0.000208s : 0.01% optimize.opt_a.merge_forward : 0.000094s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000022s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000418s : 0.02% optimize.opt_a.before_grad : 0.000259s : 0.01% optimize.opt_a.inplace_validation : 0.000085s : 0.00% optimize.opt_a.parallel_renormalize : 0.003657s : 0.20% optimize.opt_a.update_top_fg : 0.000004s : 0.00% optimize.opt_a.cast_eliminate : 0.000217s : 0.01% optimize.opt_a.meta_fg_expand : 0.000089s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000124s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000070s : 0.00% optimize.opt_a.receive_attached : 0.000031s : 0.00% optimize.opt_a.after_resolve : 0.000220s : 0.01% optimize.opt_a.a_after_grad : 0.000268s : 0.01% optimize.opt_a.special_op_eliminate : 0.000181s : 0.01% optimize.opt_a.renormalize : 0.002863s : 0.16% optimize.opt_a.add_forward_monad_depend : 0.000031s : 0.00% optimize.opt_a.auto_monad_grad : 0.000014s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000183s : 0.01% optimize.opt_a.cse : 0.000561s : 0.03% optimize.opt_a.a_3 : 0.002000s : 0.11% optimize.py_interpret_to_execute_after_opt_a : 0.000087s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000034s : 0.00% optimize.rewriter_after_opt_a : 0.000450s : 0.02% optimize.convert_after_rewriter : 0.000072s : 0.00% optimize.order_py_execute_after_rewriter : 0.000071s : 0.00% optimize.opt_b.b_1 : 0.001862s : 0.10% optimize.opt_b.b_2 : 0.000064s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000031s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000024s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000024s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000142s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000078s : 0.00% optimize.overlap_param_gather : 0.000033s : 0.00% optimize.cconv : 0.000086s : 0.00% optimize.loop_unroll : 0.001003s : 0.06% optimize.opt_after_cconv.c_1 : 0.000224s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000032s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000024s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000022s : 0.00% optimize.opt_after_cconv.cse : 0.000140s : 0.01% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000338s : 0.02% optimize.tuple_transform.d_1 : 0.000295s : 0.02% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000035s : 0.00% optimize.add_cache_embedding : 0.000097s : 0.01% optimize.add_recomputation : 0.000231s : 0.01% optimize.cse_after_recomputation.cse : 0.000080s : 0.00% optimize.environ_conv : 0.000086s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000056s : 0.00% optimize.bias_add_comm_swap : 0.000032s : 0.00% optimize.label_micro_interleaved_index : 0.000029s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000031s : 0.00% optimize.merge_cast_opt : 0.000028s : 0.00% optimize.slice_recompute_activation : 0.000058s : 0.00% optimize.micro_interleaved_order_control : 0.000030s : 0.00% optimize.assign_add_opt : 0.000213s : 0.01% optimize.ForceFp32Comm : 0.000034s : 0.00% optimize.remove_cast_before_assign_add : 0.000052s : 0.00% optimize.full_micro_interleaved_order_control : 0.000035s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000032s : 0.00% optimize.comm_op_add_attrs : 0.000092s : 0.01% optimize.add_comm_op_reuse_tag : 0.000099s : 0.01% optimize.interleave_split_concat_branches : 0.000033s : 0.00% optimize.interleave_parallel_branches : 0.000031s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000073s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000044s : 0.00% optimize.control_data_broadcast_order : 0.000031s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000044s : 0.00% optimize.offloading_packed_experts : 0.000144s : 0.01% optimize.overlap_recompute_and_grad_model_parallel : 0.000031s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000029s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000046s : 0.00% optimize.overlap_grad_ring_attention : 0.000073s : 0.00% optimize.overlap_grad_flash_sp : 0.000062s : 0.00% optimize.begin_end_overlap_inline : 0.000042s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000033s : 0.00% optimize.split_layernorm_comm : 0.000030s : 0.00% optimize.handle_group_info : 0.000035s : 0.00% optimize.symbol_engine_optimizer.build : 0.000041s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000062s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000071s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000050s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000069s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000051s : 0.00% auto_monad_reorder : 0.000129s : 0.01% get_jit_bprop_graph : 0.000050s : 0.00% rewriter_after_jit_bprop_graph : 0.000043s : 0.00% eliminate_special_op_node : 0.001055s : 0.06% distribtued_split : 0.000284s : 0.02% validate : 0.000174s : 0.01% task_emit : 1.733572s : 96.31% execute : 0.000060s : 0.00% [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.460 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1605] CompileGraphFromSegment] Compile cut segment, the cut node: @4_3_1___main___Net_construct_20:ValueNode_64{[0]: ValueNode Return, [1]: CNode_22} [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.504 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1129] CompileGraphs] [PROF]CompileSubGraph costs 196.142 msec. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.530 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:976] ExportCompileCacheKBK] Compile cache: disable by front compile cache config. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.270.539 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.270.577 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.593 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1143] CompileGraphs] Status record: construct the graph compiler info. [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:06.270.624 [mindspore/ccsrc/transform/acl_ir/op_api_exec.cc:145] GetAscendDefaultCustomPath] Add path [/usr/local/Ascend/latest/opp/vendors/customize/op_api/lib/libcust_opapi.so to custom opapi paths. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.650 [mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc:1001] Parse] Control node parser is not inited. Time group info: ------[substitution.] 0.000756 352 15.27% : 0.000115s : 10: substitution.arithmetic_simplify 0.96% : 0.000007s : 21: substitution.elim_not_effective 3.36% : 0.000025s : 14: substitution.float_tuple_getitem_switch 0.93% : 0.000007s : 21: substitution.fold_const_symbol 3.09% : 0.000023s : 29: substitution.graph_param_transform 12.98% : 0.000098s : 1: substitution.inline 4.35% : 0.000033s : 66: substitution.j_node_and_user_rematch 6.87% : 0.000052s : 4: substitution.less_batch_normalization 2.18% : 0.000016s : 10: substitution.minmaximum_grad 5.56% : 0.000042s : 66: substitution.remove_not_recompute_node 1.84% : 0.000014s : 6: substitution.replace_old_param 7.80% : 0.000059s : 18: substitution.tuple_list_convert_item_index_to_positive 5.50% : 0.000042s : 18: substitution.tuple_list_get_item_const_eliminator 4.71% : 0.000036s : 18: substitution.tuple_list_get_item_depend_reorder 16.05% : 0.000121s : 30: substitution.tuple_list_get_item_eliminator 5.02% : 0.000038s : 18: substitution.tuple_list_get_set_item_eliminator 3.01% : 0.000023s : 1: substitution.virtual_dataset_eliminate 0.54% : 0.000004s : 1: substitution.virtual_output_eliminate ------[type_inference.] 0.017150 2 96.84% : 0.016607s : 1: type_inference.infer 3.16% : 0.000543s : 1: type_inference.specialize ------[replace.] 0.000131 5 13.09% : 0.000017s : 1: replace.inline 44.05% : 0.000058s : 2: replace.tuple_list_get_item_eliminator 31.51% : 0.000041s : 1: replace.virtual_dataset_eliminate 11.35% : 0.000015s : 1: replace.virtual_output_eliminate ------[match.] 0.000130 5 74.56% : 0.000097s : 1: match.inline 6.86% : 0.000009s : 2: match.tuple_list_get_item_eliminator 16.24% : 0.000021s : 1: match.virtual_dataset_eliminate 2.33% : 0.000003s : 1: match.virtual_output_eliminate ------[predicate.] 0.001574 11225 0.76% : 0.000012s : 100: predicate.accumulaten_eliminater 0.50% : 0.000008s : 29: predicate.ad_related_special_op_eliminate 4.01% : 0.000063s : 97: predicate.addn_check_dump 0.74% : 0.000012s : 100: predicate.addn_zero_filter 0.69% : 0.000011s : 100: predicate.adjust_all_reduce_mul_add 2.49% : 0.000039s : 197: predicate.arithmetic_simplify 2.13% : 0.000034s : 226: predicate.cast_eliminate 1.09% : 0.000017s : 126: predicate.check_bprop_eliminate 0.84% : 0.000013s : 97: predicate.compare_switch_simplify 0.22% : 0.000003s : 41: predicate.const_output_eliminate 0.28% : 0.000004s : 29: predicate.convert_tensor_all_eliminate 1.21% : 0.000019s : 102: predicate.convert_tensor_eliminate 0.87% : 0.000014s : 97: predicate.depend_value_elim 0.76% : 0.000012s : 100: predicate.dict_get_item_const_eliminator 0.86% : 0.000013s : 100: predicate.dict_get_item_eliminator 0.80% : 0.000013s : 100: predicate.dict_set_item_eliminator 0.18% : 0.000003s : 29: predicate.elim_not_effective 0.32% : 0.000005s : 29: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000018s : 141: predicate.environ_add_const_eliminate 1.02% : 0.000016s : 141: predicate.environ_get_add_eliminate 1.02% : 0.000016s : 141: predicate.environ_get_depend_swap 2.07% : 0.000033s : 238: predicate.environ_get_eliminate 1.05% : 0.000016s : 141: predicate.environ_get_set_eliminate 0.74% : 0.000012s : 103: predicate.exchange_switch_depend_value 1.24% : 0.000020s : 103: predicate.float_depend_g_call 0.87% : 0.000014s : 97: predicate.float_environ_get_switch 1.26% : 0.000020s : 138: predicate.float_tuple_getitem_switch 0.16% : 0.000003s : 29: predicate.fold_const_symbol 1.18% : 0.000019s : 127: predicate.get_grad_eliminate 0.18% : 0.000003s : 29: predicate.graph_param_transform 0.87% : 0.000014s : 97: predicate.incorporate_call 0.74% : 0.000012s : 97: predicate.incorporate_call_switch 5.14% : 0.000081s : 479: predicate.inline 1.42% : 0.000022s : 126: predicate.inline_without_move 0.59% : 0.000009s : 126: predicate.j_node_and_user_rematch 1.02% : 0.000016s : 89: predicate.less_batch_normalization 1.45% : 0.000023s : 172: predicate.list_to_tuple_eliminator_ 2.07% : 0.000033s : 284: predicate.load_eliminater 0.62% : 0.000010s : 41: predicate.loop_unroll_after_grad 1.13% : 0.000018s : 105: predicate.loop_unroll_before_grad 1.48% : 0.000023s : 182: predicate.make_slice_get_slice_eliminator 0.87% : 0.000014s : 97: predicate.merge_addn 1.01% : 0.000016s : 126: predicate.micro_step_allgather_replace 1.03% : 0.000016s : 126: predicate.mini_step_allgather_replace 0.74% : 0.000012s : 100: predicate.minmaximum_grad 0.37% : 0.000006s : 29: predicate.mutable_eliminate 0.30% : 0.000005s : 29: predicate.opt_reshape 0.35% : 0.000006s : 41: predicate.parallel_virtual_node 1.12% : 0.000018s : 103: predicate.partial_defer_inline 1.14% : 0.000018s : 143: predicate.partial_eliminate 0.71% : 0.000011s : 100: predicate.print_const_string_wrapper 0.89% : 0.000014s : 97: predicate.reduce_all_const_elim 0.89% : 0.000014s : 100: predicate.reduce_eliminate 0.67% : 0.000011s : 126: predicate.remove_not_recompute_node 1.51% : 0.000024s : 228: predicate.replace_applicator 0.74% : 0.000012s : 126: predicate.replace_old_param 0.24% : 0.000004s : 41: predicate.reset_defer_inline 0.73% : 0.000012s : 100: predicate.reshape_eliminate 1.09% : 0.000017s : 126: predicate.row_tensor_add_zeros_like 0.38% : 0.000006s : 41: predicate.row_tensor_eliminate 1.27% : 0.000020s : 126: predicate.same_eliminate 0.62% : 0.000010s : 97: predicate.set_cell_output_no_recompute 1.28% : 0.000020s : 127: predicate.shard_identity_eliminate 1.60% : 0.000025s : 167: predicate.special_op_eliminate 1.15% : 0.000018s : 97: predicate.specialize_transform 1.19% : 0.000019s : 126: predicate.split_environ_get_set_with_tuple_value 1.24% : 0.000020s : 126: predicate.stack_unstack_eliminate 2.08% : 0.000033s : 284: predicate.stopgrad_eliminater 0.31% : 0.000005s : 41: predicate.switch_call_monad_eliminater 0.79% : 0.000012s : 103: predicate.switch_defer_inline 1.91% : 0.000030s : 229: predicate.switch_layer_defer_inline 3.22% : 0.000051s : 305: predicate.switch_simplify 0.72% : 0.000011s : 100: predicate.tile_eliminate 0.73% : 0.000012s : 100: predicate.transpose_eliminate 1.43% : 0.000022s : 170: predicate.tuple_list_convert_item_index_to_positive 1.42% : 0.000022s : 170: predicate.tuple_list_get_item_const_eliminator 1.26% : 0.000020s : 170: predicate.tuple_list_get_item_depend_reorder 2.53% : 0.000040s : 269: predicate.tuple_list_get_item_eliminator 1.33% : 0.000021s : 170: predicate.tuple_list_get_set_item_eliminator 2.34% : 0.000037s : 267: predicate.tuple_list_set_item_eliminator 1.40% : 0.000022s : 172: predicate.tuple_to_list_eliminator_ 2.03% : 0.000032s : 284: predicate.updatestate_pure_node_eliminater 2.99% : 0.000047s : 381: predicate.updatestate_useless_node_eliminater 0.41% : 0.000007s : 41: predicate.value_based_eliminate 1.25% : 0.000020s : 130: predicate.virtual_dataset_eliminate 1.17% : 0.000018s : 128: predicate.virtual_output_eliminate 0.36% : 0.000006s : 41: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000573 5 6.02% : 0.000034s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.98% : 0.000538s : 4: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.904026 283 0.00% : 0.000038s : 1: ForceFp32Comm 0.78% : 0.014776s : 1: a1a2 0.01% : 0.000103s : 1: add_cache_embedding 0.01% : 0.000104s : 1: add_comm_op_reuse_tag 0.01% : 0.000238s : 1: add_recomputation 0.01% : 0.000219s : 1: assign_add_opt 0.01% : 0.000266s : 1: auto_monad 0.01% : 0.000137s : 1: auto_monad_reorder 0.00% : 0.000048s : 1: begin_end_overlap_inline 0.00% : 0.000037s : 1: bias_add_comm_swap 0.08% : 0.001588s : 1: bootstrap 0.00% : 0.000092s : 1: cconv 0.01% : 0.000098s : 1: comm_op_add_attrs 0.00% : 0.000035s : 1: control_data_broadcast_order 0.00% : 0.000079s : 1: convert_after_rewriter 0.01% : 0.000176s : 1: cse_after_recomputation 0.02% : 0.000378s : 1: dataset_repeat_opt 0.02% : 0.000296s : 1: distribtued_split 0.06% : 0.001068s : 1: eliminate_special_op_node 0.00% : 0.000091s : 1: environ_conv 0.00% : 0.000070s : 1: execute 0.00% : 0.000038s : 1: full_micro_interleaved_order_control 0.00% : 0.000056s : 1: get_jit_bprop_graph 0.00% : 0.000067s : 1: graph_reusing 0.00% : 0.000049s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000039s : 1: handle_group_info 0.79% : 0.015044s : 1: inline 0.07% : 0.001378s : 1: insert-virtual-dataset 0.00% : 0.000036s : 1: interleave_parallel_branches 0.00% : 0.000037s : 1: interleave_split_concat_branches 0.00% : 0.000036s : 1: label_fine_grained_interleaved_index 0.00% : 0.000034s : 1: label_micro_interleaved_index 0.05% : 0.001011s : 1: loop_unroll 0.00% : 0.000031s : 1: merge_cast_opt 0.00% : 0.000034s : 1: micro_interleaved_order_control 0.01% : 0.000149s : 1: offloading_packed_experts 0.04% : 0.000733s : 44: opt.transform.a1a2 0.00% : 0.000059s : 1: opt.transform.loop_unroll_optimizer 0.41% : 0.007827s : 123: opt.transform.opt_a 0.01% : 0.000208s : 1: opt.transform.opt_after_cconv 0.05% : 0.001036s : 27: opt.transform.opt_b 0.01% : 0.000277s : 1: opt.transform.opt_trans_graph 0.01% : 0.000116s : 3: opt.transform.special_op_eliminate 0.01% : 0.000181s : 4: opt.transform.symbol_engine_opt 2.14% : 0.040737s : 1: opt_a 0.04% : 0.000783s : 1: opt_after_cconv 0.13% : 0.002509s : 1: opt_b 2.63% : 0.050094s : 1: optimize 0.00% : 0.000084s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000077s : 1: order_py_execute_after_rewriter 0.00% : 0.000066s : 1: overlap_grad_flash_sp 0.00% : 0.000033s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000078s : 1: overlap_grad_ring_attention 0.00% : 0.000049s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000077s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000037s : 1: overlap_param_gather 0.00% : 0.000050s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000035s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000150s : 1: parallel-infer-symbol 0.00% : 0.000060s : 1: parallel-infer-symbol-second 0.00% : 0.000040s : 1: partial_unused_args_eliminate 0.00% : 0.000058s : 1: pipeline_parallel_scheduler 0.01% : 0.000135s : 1: pipeline_split 0.01% : 0.000121s : 1: pre_auto_parallel 0.00% : 0.000071s : 1: py_interpret_to_execute 0.00% : 0.000093s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000057s : 1: remove_cast_before_assign_add 0.02% : 0.000346s : 1: remove_dup_value 0.23% : 0.004388s : 2: renormalize.infer 0.11% : 0.002097s : 2: renormalize.specialize 0.00% : 0.000037s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000049s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000459s [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.270.681 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_pool.cc:423] BestFitAscendMemoryPool] BestFitAscendMemoryPool constructed, older memory allocator is enabled. : 1: rewriter_after_opt_a 0.01% : 0.000217s : 2: rewriter_before_opt_a 0.00% : 0.000039s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000062s : 1: slice_recompute_activation 0.00% : 0.000034s : 1: split_layernorm_comm 0.00% : 0.000037s : 1: split_matmul_comm_elemetwise 0.00% : 0.000060s : 1: swap_dp_allreduce_reducescatter 0.03% : 0.000587s : 1: symbol_engine_optimizer 91.05% : 1.733614s : 1: task_emit 0.02% : 0.000429s : 1: tuple_transform 0.91% : 0.017320s : 1: type_inference 0.02% : 0.000349s : 1: validate [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.712 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:859] Transform] Graph(kernel_graph_0) transforms actor begin, strategy:pipeline [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.270.736 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:167] Initialize] Skip initialization of memory pool since init size is not configured. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.270.754 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1785] Run] End [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.270.779 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.270.859 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:239] SavePassesConfig] Running_passes: ['a1a2.r1.a_1', 'a1a2.r1.a_1.inline', 'opt_a.r1.auto_parallel', 'opt_a.r1.flash_sp', 'opt_a.r1.flash_sp_send_recv_attached', 'opt_a.r1.parallel', 'opt_a.r1.parallel_renormalize', 'opt_a.r1.receive_attached', 'opt_a.r1.virtual_dataset', 'opt_a.r1.virtual_dataset.virtual_dataset_eliminate', 'opt_a.r1.virtual_output', 'opt_a.r1.virtual_output.virtual_output_eliminate', 'opt_a.r2.a_1', 'opt_a.r2.a_1.tuple_list_get_item_eliminator', 'opt_a.r2.accelerated_algorithm', 'opt_a.r2.accelerated_algorithm.less_batch_normalization', 'opt_a.r2.auto_parallel', 'opt_a.r2.flash_sp', 'opt_a.r2.renormalize', 'opt_a.r3.auto_parallel'] [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.870 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:480] InitGraphParameterStore] Init graph parameter store: kernel_graph_0, outer size: 2 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.270.911 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1047] SaveCompiledGraph] Save compiled func graph(4_3_1___main___Net_construct_20) phase(train.1738915084260222464.281470854288784.0..)! [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.913 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 0, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.270.942 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 1, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_y [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.270.949 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1065] SaveCompiledGraph] End save compiled func graph! [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.270.987 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1326] CompileInner] [PROF]ParallelPostProcess costs 0.011 msec. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.001 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x32539dd0 for node:ValueNode (0, 0, 0) node addr:0x30c742d0 device type:2 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.271.006 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1154] CleanCompileRes] Clean compile resource start [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.045 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x32539dd0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.084 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3253a0e0 for node:ValueNode 2 node addr:0x30c71eb0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.099 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253a0e0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.119 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3253a490 for node:ValueNode 0 node addr:0x30c71d80 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.133 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253a490 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.154 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3253abc0 for node:ValueNode (1, 1, 1) node addr:0x30c735e0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.168 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253abc0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.188 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3253b3b0 for node:ValueNode (2, 2, 4) node addr:0x30c73290 device type:2 [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.170 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1168] CreateDeviceAddress] Status record: end create device address. graph id: 0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.204 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253b3b0 [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.219 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1123] CompileGraphImpl] [PROF]CreateDeviceAddress costs 1.121 msec. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.223 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3253bba0 for node:ValueNode (0, 0, 2) node addr:0x30c73160 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.249 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253bba0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.271 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3253c390 for node:ValueNode (2, 4, 4) node addr:0x30c75970 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.287 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253c390 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.305 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3253c870 for node:ValueNode 1 node addr:0x30c716f0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.319 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253c870 [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.337 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1177] CacheGraphOutputToFrontNodeWithIndex] Get graph backend output nodes. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.374 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1185] CacheGraphOutputToFrontNodeWithIndex] Get graph front output nodes. [INFO] SESSION(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.442 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1203] CacheGraphOutputToFrontNodeWithIndex] Backend output: Default/AllGather-op2 debug string: @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} node ptr:0x3e9cc620 with index: 0 map to front node: Default/AllGather-op2 debug string: @4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} node ptr: 0x3e97fa60 with index: 0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.454 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:442] ChangeGraphMode] Enable kbk subgraph execute and set run mode for graph: 0 to GraphMode. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.473 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:462] TryEnableKbkSubGraphExecMode] Enable kbk subgraph execute mode for actor set: kernel_graph_0 [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.484 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:766] CompileGraph] Status record: end compile graph. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.527 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:391] TryEnableInputOptimize] Enable input optimize for actor set: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.572 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_y for host data source actor. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.630 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_y for front node:@4_3_1___main___Net_construct_20:param_y index:0 position:1 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.656 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_x for host data source actor. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.687 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_x for front node:@4_3_1___main___Net_construct_20:param_x index:0 position:0 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.689 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1605] CompileGraphFromSegment] Compile cut segment, the cut node: @4_3_1___main___Net_construct_20:ValueNode_64{[0]: ValueNode Return, [1]: CNode_22} [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.731 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2268] BuildDataPrepareActorForGraphParameterStore] Create data prepare actor: kernel_graph_0_DataPrepareActor [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.731 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1129] CompileGraphs] [PROF]CompileSubGraph costs 197.087 msec. [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.757 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:976] ExportCompileCacheKBK] Compile cache: disable by front compile cache config. [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.831 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1143] CompileGraphs] Status record: construct the graph compiler info. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.853 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2224] BuildLoopCountActor] Create loop count actor: kernel_graph_0_LoopCountActor [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.872 [mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc:1001] Parse] Control node parser is not inited. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.271.866 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.881 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2257] BuildOutputActor] Create output actor: kernel_graph_0_OutputActor [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.271.936 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:859] Transform] Graph(kernel_graph_0) transforms actor begin, strategy:pipeline [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.271.969 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1509] CacheGraphOutputToActor] Cache graph 0 output node:Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} with index:0 to actor:kernel_graph0_SuperKernelActor, from front node:Default/AllGather-op2 debug string:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} with index:0 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.050 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.079 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.088 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:480] InitGraphParameterStore] Init graph parameter store: kernel_graph_0, outer size: 2 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.107 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.130 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 0, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.144 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.158 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 1, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.170 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x32538f80 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.199 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.217 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x32539350 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.219 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x4024e580 for node:ValueNode (2, 2, 2) node addr:0x3e987c80 device type:2 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.248 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.258 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024e580 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.274 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.302 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x4024e9d0 for node:ValueNode (2, 4, 4) node addr:0x3e98a360 device type:2 [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.307 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1711] Link] [PROF]GraphSchedulerLinkSinkMode costs 0.19 msec. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.302 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.317 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024e9d0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.337 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x4024ee20 for node:ValueNode 0 node addr:0x3e986770 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.339 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.351 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024ee20 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.362 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph0_SuperKernelActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.370 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x4024f2e0 for node:ValueNode 2 node addr:0x3e9868a0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.376 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_LoopCountActor@ to actor:kernel_graph_0_OutputActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.383 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024f2e0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.393 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_OutputActor@ to actor:kernel_graph_0_DataPrepareActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.400 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x4024f7c0 for node:ValueNode 1 node addr:0x3e9860e0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.407 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:3713] LinkControlArrowForCopyActor] Link control arrow for copy actor start, copy actor size:0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.412 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024f7c0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.436 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x325413e0 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.431 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.432 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x4024fff0 for node:ValueNode (0, 0, 0) node addr:0x3e987b50 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.459 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024fff0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.457 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.472 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:556] AddResultArrow] Add result arrow from actor:kernel_graph0_SuperKernelActor to actor:kernel_graph_0_OutputActor@ from kernel@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} device address:0x325413e0 original ref count:18446744073709551615 ref count:18446744073709551615 dynamic ref count:2147483647 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.478 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x402507e0 for node:ValueNode (2, 2, 4) node addr:0x3e988f50 device type:2 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.485 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.495 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x402507e0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.514 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x40250fd0 for node:ValueNode (1, 1, 1) node addr:0x3e987fd0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.527 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x40250fd0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.560 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.604 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.606 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.648 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.656 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.681 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.695 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:442] ChangeGraphMode] Enable kbk subgraph execute and set run mode for graph: 0 to GraphMode. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.695 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.717 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:462] TryEnableKbkSubGraphExecMode] Enable kbk subgraph execute mode for actor set: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.742 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.772 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:391] TryEnableInputOptimize] Enable input optimize for actor set: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.776 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.817 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_y for host data source actor. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.812 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.272.837 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.875 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_y for front node:@4_3_1___main___Net_construct_20:param_y index:0 position:1 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.901 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_x for host data source actor. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.932 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_x for front node:@4_3_1___main___Net_construct_20:param_x index:0 position:0 [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.272.973 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2268] BuildDataPrepareActorForGraphParameterStore] Create data prepare actor: kernel_graph_0_DataPrepareActor [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.272.989 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.039 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.093 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.096 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2224] BuildLoopCountActor] Create loop count actor: kernel_graph_0_LoopCountActor [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.125 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2257] BuildOutputActor] Create output actor: kernel_graph_0_OutputActor [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.131 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.273.152 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1175] CleanCompileRes] Clean compile resource end [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.273.198 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] End compiling 'Net.construct'. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.199 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.211 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1509] CacheGraphOutputToActor] Cache graph 0 output node:Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} with index:0 to actor:kernel_graph0_SuperKernelActor, from front node:Default/AllGather-op2 debug string:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} with index:0 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.273.228 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1334] CompileInner] [PROF]CleanCompileRes costs 2.214 msec. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.273.247 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1335] CompileInner] Finish compiling. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.244 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.273.232 [mindspore/ccsrc/transform/acl_ir/op_api_exec.cc:145] GetAscendDefaultCustomPath] Add path [/usr/local/Ascend/latest/opp/vendors/customize/op_api/lib/libcust_opapi.so to custom opapi paths. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.273.266 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1336] CompileInner] [PROF]compile_graph costs 2008.95 msec. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.295 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.335 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.389 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.405 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.426 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x4024d6f0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.442 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.459 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.476 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x4024db00 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.491 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.529 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.559 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.567 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1711] Link] [PROF]GraphSchedulerLinkSinkMode costs 0.205 msec. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.591 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.598 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.620 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph0_SuperKernelActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.627 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.635 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_LoopCountActor@ to actor:kernel_graph_0_OutputActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.653 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_OutputActor@ to actor:kernel_graph_0_DataPrepareActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.661 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.669 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:3713] LinkControlArrowForCopyActor] Link control arrow for copy actor start, copy actor size:0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.681 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 4, send_actor : 0x30cbe090, recv_actor : 0x30cbe8a0. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.699 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x40255b50 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.697 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 8, send_actor : 0x3254f940, recv_actor : 0x32550150. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.723 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 3, send_actor : 0x30cbd070, recv_actor : 0x30cbd880. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.734 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:556] AddResultArrow] Add result arrow from actor:kernel_graph0_SuperKernelActor to actor:kernel_graph_0_OutputActor@ from kernel@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} device address:0x40255b50 original ref count:18446744073709551615 ref count:18446744073709551615 dynamic ref count:2147483647 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.737 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 11, send_actor : 0x325538f0, recv_actor : 0x32554330. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.273.737 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_x, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.752 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 10, send_actor : 0x32552470, recv_actor : 0x32552eb0. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.766 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 1, send_actor : 0x325480a0, recv_actor : 0x325489e0. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.779 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 5, send_actor : 0x30cbf2e0, recv_actor : 0x30cbfd20. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.793 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 6, send_actor : 0x32546240, recv_actor : 0x32546c10. [INFO] PARALLEL(187775,ffffba4dbc10,python):2025-02-07-15:58:06.273.790 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_y, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.807 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 7, send_actor : 0x3254a6f0, recv_actor : 0x3254b030. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.819 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 9, send_actor : 0x32551170, recv_actor : 0x32551a30. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.818 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.273.832 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 2, send_actor : 0x3254c320, recv_actor : 0x3254d180. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.861 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.896 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.927 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.968 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.273.997 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.110 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op0 addr:0x32539c70 type:48, kernel tensor addr:0x32539a00, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.209 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.222 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op1 addr:0x3253cd80 type:48, kernel tensor addr:0x3253cb10, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.250 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.289 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.313 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.348 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] UTILS(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.344 [mindspore/ccsrc/utils/dynamic_obfuscation/registry_opaque_predicate.cc:112] init_calling_count] calling_count_ has been initialized to 0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.382 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op2 addr:0x3253d290 type:48, kernel tensor addr:0x3253d020, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.415 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.453 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.451 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.500 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.532 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op3 addr:0x3253d7a0 type:48, kernel tensor addr:0x3253d530, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.536 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.526 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1893] RunGraph] Status record: start run actor: kernel_graph_0 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.584 [mindspore/ccsrc/runtime/device/pre_launch_comm.cc:200] PreLaunchCommKernel] No hccl kernel to pre launch [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.603 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.613 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1918] RunGraph] [PROF]PreLaunchCommKernel costs 0.039 msec. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.626 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op4 addr:0x3253dcb0 type:48, kernel tensor addr:0x3253da40, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.639 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.657 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:966] SpawnMultiPipelineActor] Enable runtime asynchronously launch kernel, default actor thread num 5, current actor thread num: 5 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.685 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.688 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.692 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.720 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.753 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.751 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.771 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op5 addr:0x3253e1c0 type:48, kernel tensor addr:0x3253df50, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.783 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.795 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:394] operator()] Init defrag memory step freq. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.814 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:396] operator()] Config defrag memory step freq : . [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.816 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.825 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.828 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:405] operator()] Defrag memory step freq : 100. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.847 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.866 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 4, send_actor : 0x3e9d2a90, recv_actor : 0x3e9d32a0. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.877 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:638] PrepareDataForDeviceTensorStore] Prepare store data, input tensor size: 0, arg size: 2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.880 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 8, send_actor : 0x40263fe0, recv_actor : 0x402647f0. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.898 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:835] AllocGEFixMemory] Start AllocGEFixMemory [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.905 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 3, send_actor : 0x3e9d1a70, recv_actor : 0x3e9d2280. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.919 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 11, send_actor : 0x40267f90, recv_actor : 0x402689d0. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.931 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 10, send_actor : 0x40266b10, recv_actor : 0x40267550. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.944 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 1, send_actor : 0x4025c740, recv_actor : 0x4025d080. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.946 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:652] PrepareDataForDeviceTensorStore] prepare data for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.957 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 5, send_actor : 0x3e9d3bd0, recv_actor : 0x3e9d4610. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.274.959 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Mul-op0 addr:0x3253e6d0 type:48, kernel tensor addr:0x3253e460, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.971 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 6, send_actor : 0x4025a910, recv_actor : 0x4025b2b0. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.274.979 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-0, debug name:ValueNode 0, front node:ValueNode 0 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.984 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 7, send_actor : 0x4025ed90, recv_actor : 0x4025f6d0. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.274.997 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 9, send_actor : 0x40265810, recv_actor : 0x402660d0. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.275.004 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 0 front node:ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.009 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 2, send_actor : 0x402609c0, recv_actor : 0x40261820. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.009 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.275.025 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3902e070 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.275.041 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.080 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op0 addr:0x3253ebe0 type:48, kernel tensor addr:0x3253e970, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 2)) [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:06.275.086 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.121 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.195 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op0 addr:0x3253f160 type:48, kernel tensor addr:0x3253ef80, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.231 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op0 addr:0x3253f650 type:48, kernel tensor addr:0x3253f470, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.270 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.283 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op0 addr:0x4024e400 type:48, kernel tensor addr:0x4024e190, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.339 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op0 addr:0x3253fad0 type:48, kernel tensor addr:0x3253f860, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.384 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.396 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op1 addr:0x402514c0 type:48, kernel tensor addr:0x40251250, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.448 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op1 addr:0x3253ffe0 type:48, kernel tensor addr:0x3253fd70, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.463 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.486 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.556 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op1 addr:0x32540560 type:48, kernel tensor addr:0x32540380, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.558 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op2 addr:0x402519d0 type:48, kernel tensor addr:0x40251760, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.581 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op1 addr:0x32540a50 type:48, kernel tensor addr:0x32540870, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.618 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.628 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.683 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op1 addr:0x32540ed0 type:48, kernel tensor addr:0x32540c60, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.704 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op3 addr:0x40251ee0 type:48, kernel tensor addr:0x40251c70, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.275.718 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-1, debug name:ValueNode 2, front node:ValueNode 2 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.726 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.275.754 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 2 front node:ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.275.774 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x390414c0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.275.790 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.795 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op4 addr:0x402523f0 type:48, kernel tensor addr:0x40252180, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.783 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op2 addr:0x325413e0 type:48, kernel tensor addr:0x32541170, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.836 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.853 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.941 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op5 addr:0x40252900 type:48, kernel tensor addr:0x40252690, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.275.995 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.275.997 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op1 input kernel:Default/StridedSlice-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.024 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x32539c70 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.037 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-2, debug name:ValueNode (0, 0, 0), front node:ValueNode (0, 0, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.065 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op2 input kernel:Default/StridedSlice-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.085 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253cd80 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.070 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 0) front node:ValueNode (0, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.108 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39041900 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.126 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.130 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op4 input kernel:Default/StridedSlice-op3 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.129 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Mul-op0 addr:0x40252e10 type:48, kernel tensor addr:0x40252ba0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.151 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253d7a0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.178 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.186 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op5 input kernel:Default/StridedSlice-op4 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.206 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253dcb0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.240 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op2 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.239 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op0 addr:0x40253320 type:48, kernel tensor addr:0x402530b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.259 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253d290 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.277 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.289 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op5 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.308 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253e1c0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.333 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op0 input kernel:Default/Mul-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.348 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op0 addr:0x402538a0 type:48, kernel tensor addr:0x402536c0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.365 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253e6d0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.361 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-3, debug name:ValueNode 1, front node:ValueNode 1 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.385 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op0 addr:0x40253d90 type:48, kernel tensor addr:0x40253bb0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.391 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op0 input kernel:Default/AllGather-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.390 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 1 front node:ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.409 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253ebe0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.423 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.408 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39041d20 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.424 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.432 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.446 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253f160 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.464 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.478 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253f650 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.493 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op0 addr:0x40254210 type:48, kernel tensor addr:0x40253fa0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.501 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op1 input kernel:Default/Concat-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.516 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253fad0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.538 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op1 input kernel:Default/AllGather-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.536 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.554 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3253ffe0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.574 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.589 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x32540560 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.593 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op1 addr:0x40254720 type:48, kernel tensor addr:0x402544b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.607 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.637 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.621 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x32540a50 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.652 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op2 input kernel:Default/Concat-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.665 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-4, debug name:ValueNode (1, 1, 1), front node:ValueNode (1, 1, 1) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.684 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x32540ed0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.700 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (1, 1, 1) front node:ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.707 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op1 addr:0x40254ca0 type:48, kernel tensor addr:0x40254ac0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.718 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39042510 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.727 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[y] debug_name: @kernel_graph0:param_y use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.733 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op1 addr:0x402551c0 type:48, kernel tensor addr:0x40254fe0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.736 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.754 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[x] debug_name: @kernel_graph0:param_x use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.771 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.776 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1603] AddControlArrowForNoInputActor] Add control arrow for no input arrow actor: kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.795 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.832 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op1 addr:0x40255640 type:48, kernel tensor addr:0x402553d0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.881 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1} is thread safe. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.898 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:887] Transform] [PROF]GraphSchedulerLink costs 4.886 msec. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.936 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op2 addr:0x40255b50 type:48, kernel tensor addr:0x402558e0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.276.969 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-5, debug name:ValueNode (2, 2, 4), front node:ValueNode (2, 2, 4) for graph:kernel_graph0 [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.276.982 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 1_actor_set_kernel_graph_0_invalid_data_arrow_elimination in 1.67999 us [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.276.987 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.000 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 4) front node:ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.020 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39042d00 [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.024 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 2_actor_set_kernel_graph_0_multi_actor_fusion in 16.6999 us [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.038 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 4) [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.047 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 3_actor_set_kernel_graph_0_batch_data_arrow_fusion in 1.10001 us [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.070 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:904] Transform] Graph(kernel_graph_0) transforms actor end. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.140 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:354] Init] kernel_graph_0 has the parameter input num: 2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.136 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op1 input kernel:Default/StridedSlice-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.161 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x4024e400 origin ref count:2 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.193 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1153] CompileGraphs] [PROF]GraphScheduler costs 6.561 msec. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.198 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op2 input kernel:Default/StridedSlice-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.217 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x402514c0 origin ref count:2 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.221 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:29] operator()] Create MultiStreamController. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.243 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:40] Refresh] Stream manager initialize, device_context : 0x1dee35c0, stream_size : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.257 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op4 input kernel:Default/StridedSlice-op3 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.261 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:214] Resize] Task id on stream manager initialize : 0, stream_size : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.276 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40251ee0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.270 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-6, debug name:ValueNode (0, 2, 0), front node:ValueNode (0, 2, 0) for graph:kernel_graph0 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.287 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1166] CompileGraphs] [PROF]compile_backend_graph costs 1652.02 msec. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.311 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op5 input kernel:Default/StridedSlice-op4 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.314 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 2, 0) front node:ValueNode (0, 2, 0) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.328 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x402523f0 origin ref count:2 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.319 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1169] CompileGraphs] Status record: end compile function graph: 4_3_1___main___Net_construct_20, produce actor: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.333 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x390434f0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.358 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end task_emit action. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.359 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op2 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.351 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 2, 0) [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.377 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.377 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x402519d0 origin ref count:2 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.408 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:281] SetLoopCount] Change vm_loop_flag to 0, set loop_size to 1 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.404 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op5 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.419 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40252900 origin ref count:2 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.440 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start execute action. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.441 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op0 input kernel:Default/Mul-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.467 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40252e10 origin ref count:2 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.470 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end execute action. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.277.487 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.491 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op0 input kernel:Default/AllGather-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.507 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40253320 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.528 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.542 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x402538a0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.558 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.571 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40253d90 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.583 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-7, debug name:ValueNode (2, 4, 4), front node:ValueNode (2, 4, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.593 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op1 input kernel:Default/Concat-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.608 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40254210 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.615 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 4, 4) front node:ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.629 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op1 input kernel:Default/AllGather-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.633 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x39043ce0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.644 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40254720 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.652 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.663 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.677 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40254ca0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.695 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.708 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x402551c0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.730 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op2 input kernel:Default/Concat-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.751 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x40255640 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.791 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[y] debug_name: @kernel_graph0:param_y use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.811 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[x] debug_name: @kernel_graph0:param_x use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.831 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1603] AddControlArrowForNoInputActor] Add control arrow for no input arrow actor: kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.848 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.884 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-8, debug name:ValueNode (0, 0, 2), front node:ValueNode (0, 0, 2) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.915 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 2) front node:ValueNode (0, 0, 2) [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.931 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x390444d0 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.277.948 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 2) [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.277.952 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:887] Transform] [PROF]GraphSchedulerLink costs 4.696 msec. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.031 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 1_actor_set_kernel_graph_0_invalid_data_arrow_elimination in 1.32003 us [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.071 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 2_actor_set_kernel_graph_0_multi_actor_fusion in 17.49 us [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.090 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 3_actor_set_kernel_graph_0_batch_data_arrow_fusion in 1.01002 us [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.112 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:904] Transform] Graph(kernel_graph_0) transforms actor end. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.184 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:354] Init] kernel_graph_0 has the parameter input num: 2 [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.278.194 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_y front node:@4_3_1___main___Net_construct_20:param_y backend is weight:0 front is weight:0 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.234 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1153] CompileGraphs] [PROF]GraphScheduler costs 6.377 msec. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:06.278.235 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_x front node:@4_3_1___main___Net_construct_20:param_x backend is weight:0 front is weight:0 TotalTime = 1.7314, [21] [bootstrap]: 0.00136427 [type_inference]: 0.0169255 [auto_monad]: 0.00035846 [graph_reusing]: 6.01399e-05 [inline]: 0.0136936, [2] [rewriter_before_opt_a]: 0.00010853 [a1a2]: 0.0134685, [2] [Cycle 1]: 0.00174735, [11] [expand_dump_flag]: 5.64009e-06 [switch_simplify]: 5.71101e-05 [loop_unroll]: 3.029e-05 [a_1]: 0.00032219 [recompute_prepare]: 2.434e-05 [updatestate_depend_eliminate]: 1.243e-05 [updatestate_assign_eliminate]: 4.41005e-06 [updatestate_loads_eliminate]: 3.20002e-06 [parameter_eliminate]: 8.15e-06 [a_2]: 0.00061033 [parallel_inline_pass]: 2.07199e-05 [Cycle 2]: 0.00116033, [11] [expand_dump_flag]: 1.16997e-06 [switch_simplify]: 1.975e-05 [loop_unroll]: 1.956e-05 [a_1]: 0.00013487 [recompute_prepare]: 1.97301e-05 [updatestate_depend_eliminate]: 4.54998e-06 [updatestate_assign_eliminate]: 3.16999e-06 [updatestate_loads_eliminate]: 2.84007e-06 [parameter_eliminate]: 2.40002e-06 [a_2]: 0.0005546 [parallel_inline_pass]: 1.999e-05 [parallel-infer-symbol]: 0.00012441 [pre_auto_parallel]: 9.41e-05 [insert-virtual-dataset]: 0.00105694 [parallel-infer-symbol-second]: 4.93299e-05 [dataset_repeat_opt]: 0.00032906 [pipeline_split]: 0.0001126 [optimize]: 0.0420821, [52] [py_interpret_to_execute]: 5.093e-05 [rewriter_before_opt_a]: 0.0001452 [opt_a]: 0.0321773, [3] [Cycle 1]: 0.0169311, [46] [expand_dump_flag]: 2.23995e-06 [switch_simplify]: 3.253e-05 [loop_unroll]: 3.489e-05 [a_1]: 0.00030458 [recompute_prepare]: 2.39901e-05 [updatestate_depend_eliminate]: 9.00996e-06 [updatestate_assign_eliminate]: 6.03008e-06 [updatestate_loads_eliminate]: 5.23007e-06 [parameter_eliminate]: 3.75998e-06 [a_2]: 0.00064119 [accelerated_algorithm]: 2.487e-05 [shard]: 3.092e-05 [meta_shard_fg_expand]: 4.11994e-06 [shard_inline]: 2.39301e-05 [auto_parallel]: 3.506e-05 [parallel]: 0.0090917 [flash_sp]: 4.313e-05 [merge_comm]: 3.392e-05 [allreduce_fusion]: 2.907e-05 [matmul_add_comm_reduction]: 3.427e-05 [allreduce_slice_to_reducescatter]: 1.19e-06 [virtual_shard_identity]: 7.601e-05 [virtual_dataset]: 0.00010991 [get_grad_eliminate_]: 6.464e-05 [virtual_output]: 8.156e-05 [merge_forward]: 2.923e-05 [cell_reuse_recompute_pass]: 5.00993e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00012639 [before_grad]: 8.61801e-05 [inplace_validation]: 2.705e-05 [parallel_renormalize]: 0.00277506 [update_top_fg]: 8.10018e-07 [cast_eliminate]: 7.147e-05 [meta_fg_expand]: 2.686e-05 [inplace_validation_after_expand]: 3.828e-05 [flash_sp_send_recv_attached]: 4.493e-05 [receive_attached]: 4.70993e-06 [after_resolve]: 7.947e-05 [a_after_grad]: 8.84499e-05 [special_op_eliminate]: 6.11e-05 [renormalize]: 1.49943e-07 [add_forward_monad_depend]: 6.02007e-06 [auto_monad_grad]: 4.03007e-06 [auto_monad_eliminator]: 4.79099e-05 [cse]: 0.00016818 [a_3]: 0.00057853 [Cycle 2]: 0.00878375, [46] [expand_dump_flag]: 2.16998e-06 [switch_simplify]: 5.95e-05 [loop_unroll]: 5.773e-05 [a_1]: 0.00137095 [recompute_prepare]: 5.764e-05 [updatestate_depend_eliminate]: 2.877e-05 [updatestate_assign_eliminate]: 2.386e-05 [updatestate_loads_eliminate]: 2.325e-05 [parameter_eliminate]: 3.94997e-06 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.260 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:29] operator()] Create MultiStreamController. [a_2]: 0.00128626 [accelerated_algorithm]: 0.00011934 [shard]: 2.96501e-05 [meta_shard_fg_expand]: 1.06799e-05 [shard_inline]: 6.05e-05 [auto_parallel]: 6.15399e-05 [parallel]: 1.12e-05 [flash_sp]: 3.558e-05 [merge_comm]: 3.112e-05 [allreduce_fusion]: 2.693e-05 [matmul_add_comm_reduction]: 2.663e-05 [allreduce_slice_to_reducescatter]: 8.29925e-07 [virtual_shard_identity]: 5.89901e-05 [virtual_dataset]: 5.66799e-05 [get_grad_eliminate_]: 5.48899e-05 [virtual_output]: 5.46201e-05 [merge_forward]: 2.424e-05 [cell_reuse_recompute_pass]: 3.76999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00011221 [before_grad]: 7.662e-05 [inplace_validation]: 2.486e-05 [parallel_renormalize]: 3.69968e-07 [update_top_fg]: 5.10016e-07 [cast_eliminate]: 5.548e-05 [meta_fg_expand]: 2.357e-05 [inplace_validation_after_expand]: 3.178e-05 [flash_sp_send_recv_attached]: 2.56998e-06 [receive_attached]: 1.76998e-06 [after_resolve]: 6.064e-05 [a_after_grad]: 8.08999e-05 [special_op_eliminate]: 5.44201e-05 [renormalize]: 0.00209655 [add_forward_monad_depend]: 5.18002e-06 [auto_monad_grad]: 2.49001e-06 [auto_monad_eliminator]: 4.236e-05 [cse]: 0.00014403 [a_3]: 0.00055002 [Cycle 3]: 0.0064085, [46] [expand_dump_flag]: 1.95997e-06 [switch_simplify]: 5.706e-05 [loop_unroll]: 5.53e-05 [a_1]: 0.00121607 [recompute_prepare]: 5.73e-05 [updatestate_depend_eliminate]: 2.84699e-05 [updatestate_assign_eliminate]: 2.412e-05 [updatestate_loads_eliminate]: 2.25899e-05 [parameter_eliminate]: 3.01993e-06 [a_2]: 0.00121629 [accelerated_algorithm]: 6.557e-05 [shard]: 3.288e-05 [meta_shard_fg_expand]: 1.05799e-05 [shard_inline]: 5.63e-05 [auto_parallel]: 6.209e-05 [parallel]: 9.70007e-06 [flash_sp]: 1.65997e-06 [merge_comm]: 3.15401e-05 [allreduce_fusion]: 2.811e-05 [matmul_add_comm_reduction]: 3.028e-05 [allreduce_slice_to_reducescatter]: 6.39935e-07 [virtual_shard_identity]: 5.99001e-05 [virtual_dataset]: 5.61001e-05 [get_grad_eliminate_]: 5.482e-05 [virtual_output]: 5.471e-05 [merge_forward]: 2.41001e-05 [cell_reuse_recompute_pass]: 4.5799e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0001219 [before_grad]: 7.925e-05 [inplace_validation]: 2.48e-05 [parallel_renormalize]: 1.70083e-07 [update_top_fg]: 7.10017e-07 [cast_eliminate]: 5.756e-05 [meta_fg_expand]: 2.44901e-05 [inplace_validation_after_expand]: 3.22199e-05 [flash_sp_send_recv_attached]: 1.89e-06 [receive_attached]: 1.90001e-06 [after_resolve]: 7.017e-05 [a_after_grad]: 8.06201e-05 [special_op_eliminate]: 5.667e-05 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 4.91994e-06 [auto_monad_grad]: 3.26999e-06 [auto_monad_eliminator]: 3.856e-05 [cse]: 0.00013397 [a_3]: 0.00062516 [py_interpret_to_execute_after_opt_a]: 7.53701e-05 [slice_cell_reuse_recomputed_activation]: 3.21601e-05 [rewriter_after_opt_a]: 0.00039307 [convert_after_rewriter]: 8.19899e-05 [order_py_execute_after_rewriter]: 6.106e-05 [opt_b]: 0.00268473, [1] [Cycle 1]: 0.00263901, [7] [b_1]: 0.00201459 [b_2]: 6.48099e-05 [updatestate_depend_eliminate]: 2.92e-05 [updatestate_ass[INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.284 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:40] Refresh] Stream manager initialize, device_context : 0x2bc02470, stream_size : 5. ign_eliminate]: 2.306e-05 [updatestate_loads_eliminate]: 2.34699e-05 [renormalize]: 9.79984e-07 [cse]: 0.00013988 [optimize_parallel_all_gather_comm]: 7.995e-05 [overlap_param_gather]: 3.376e-05 [cconv]: 8.83701e-05 [loop_unroll]: 0.00111099 [opt_after_cconv]: 0.00080636, [1] [Cycle 1]: 0.00076946, [7] [c_1]: 0.00022857 [parameter_eliminate]: 6.78003e-06 [updatestate_depend_eliminate]: 3.109e-05 [updatestate_assign_eliminate]: 2.36599e-05 [updatestate_loads_eliminate]: 2.24201e-05 [cse]: 0.00014624 [renormalize]: 1.30001e-06 [remove_dup_value]: 0.00034175 [tuple_transform]: 0.00043039, [1] [Cycle 1]: 0.00039041, [2] [d_1]: 0.00029386 [renormalize]: 5.10016e-07 [partial_unused_args_eliminate]: 3.724e-05 [add_cache_embedding]: 0.00012653 [add_recomputation]: 0.00023925 [cse_after_recomputation]: 0.00018474, [1] [Cycle 1]: 0.00014439, [1] [cse]: 9.022e-05 [environ_conv]: 8.56699e-05 [swap_dp_allreduce_reducescatter]: 6.058e-05 [bias_add_comm_swap]: 3.417e-05 [label_micro_interleaved_index]: 3.19399e-05 [label_fine_grained_interleaved_index]: 3.76101e-05 [merge_cast_opt]: 2.932e-05 [slice_recompute_activation]: 6.61e-05 [micro_interleaved_order_control]: 4.657e-05 [assign_add_opt]: 0.00021724 [ForceFp32Comm]: 3.469e-05 [remove_cast_before_assign_add]: 0.00011346 [full_micro_interleaved_order_control]: 3.307e-05 [reorder_send_recv_between_fp_bp]: 3.43199e-05 [comm_op_add_attrs]: 0.00010332 [add_comm_op_reuse_tag]: 0.00010591 [interleave_split_concat_branches]: 3.586e-05 [interleave_parallel_branches]: 3.292e-05 [overlap_opt_shard_in_pipeline]: 7.43499e-05 [overlap_opt_shard_grad_in_pipeline]: 4.598e-05 [control_data_broadcast_order]: 3.14299e-05 [grouped_pairwise_exchange_alltoall]: 4.819e-05 [offloading_packed_experts]: 7.457e-05 [overlap_recompute_and_grad_model_parallel]: 3.16399e-05 [overlap_grad_matmul_and_grad_allreduce]: 2.85499e-05 [overlap_recompute_allgather_and_fa_grad]: 4.666e-05 [overlap_grad_ring_attention]: 8.617e-05 [overlap_grad_flash_sp]: 6.845e-05 [begin_end_overlap_inline]: 5.00401e-05 [split_matmul_comm_elemetwise]: 3.19299e-05 [split_layernorm_comm]: 3.071e-05 [handle_group_info]: 4.091e-05 [symbol_engine_optimizer]: 0.00063001, [1] [Cycle 1]: 0.000589, [6] [build]: 4.34801e-05 [elim_shapecalc]: 6.20501e-05 [elim_not_effective]: 7.474e-05 [opt_reshape]: 4.938e-05 [fold_const_symbol]: 8.404e-05 [renormalize]: 7.69971e-07 [pipeline_parallel_scheduler]: 5.207e-05 [auto_monad_reorder]: 0.00014336 [get_jit_bprop_graph]: 4.678e-05 [rewriter_after_jit_bprop_graph]: 4.27001e-05 [eliminate_special_op_node]: 0.00111246 [distribtued_split]: 0.00030898 [validate]: 0.00018795 [task_emit]: 1.65262 [execute]: 6.06399e-05 Sums bootstrap : 0.001364s : 0.08% type_inference : 0.016925s : 0.99% auto_monad : 0.000358s : 0.02% graph_reusing : 0.000060s : 0.00% inline.rewriter_before_opt_a : 0.000109s : 0.01% inline.a1a2.expand_dump_flag : 0.000007s : 0.00% inline.a1a2.switch_simplify : 0.000077s : 0.00% inline.a1a2.loop_unroll : 0.000050s : 0.00% inline.a1a2.a_1 : 0.000457s : 0.03% inline.a1a2.recompute_prepare : 0.000044s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000017s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000008s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000006s : 0.00% inline.a1a2.parameter_eliminate : 0.000011s : 0.00% inline.a1a2.a_2 : 0.0011[INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.302 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:214] Resize] Task id on stream manager initialize : 0, stream_size : 5. 65s : 0.07% inline.a1a2.parallel_inline_pass : 0.000041s : 0.00% parallel-infer-symbol : 0.000124s : 0.01% pre_auto_parallel : 0.000094s : 0.01% insert-virtual-dataset : 0.001057s : 0.06% parallel-infer-symbol-second : 0.000049s : 0.00% dataset_repeat_opt : 0.000329s : 0.02% pipeline_split : 0.000113s : 0.01% optimize.py_interpret_to_execute : 0.000051s : 0.00% optimize.rewriter_before_opt_a : 0.000145s : 0.01% optimize.opt_a.expand_dump_flag : 0.000006s : 0.00% optimize.opt_a.switch_simplify : 0.000149s : 0.01% optimize.opt_a.loop_unroll : 0.000148s : 0.01% optimize.opt_a.a_1 : 0.002892s : 0.17% optimize.opt_a.recompute_prepare : 0.000139s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000066s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000054s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000051s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.003144s : 0.18% optimize.opt_a.accelerated_algorithm : 0.000210s : 0.01% optimize.opt_a.shard : 0.000093s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000025s : 0.00% optimize.opt_a.shard_inline : 0.000141s : 0.01% optimize.opt_a.auto_parallel : 0.000159s : 0.01% optimize.opt_a.parallel : 0.009113s : 0.53% optimize.opt_a.flash_sp : 0.000080s : 0.00% optimize.opt_a.merge_comm : 0.000097s : 0.01% optimize.opt_a.allreduce_fusion : 0.000084s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000091s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000003s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000195s : 0.01% optimize.opt_a.virtual_dataset : 0.000223s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000174s : 0.01% optimize.opt_a.virtual_output : 0.000191s : 0.01% optimize.opt_a.merge_forward : 0.000078s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000013s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000360s : 0.02% optimize.opt_a.before_grad : 0.000242s : 0.01% optimize.opt_a.inplace_validation : 0.000077s : 0.00% optimize.opt_a.parallel_renormalize : 0.002776s : 0.16% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000185s : 0.01% optimize.opt_a.meta_fg_expand : 0.000075s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000102s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000049s : 0.00% optimize.opt_a.receive_attached : 0.000008s : 0.00% optimize.opt_a.after_resolve : 0.000210s : 0.01% optimize.opt_a.a_after_grad : 0.000250s : 0.01% optimize.opt_a.special_op_eliminate : 0.000172s : 0.01% optimize.opt_a.renormalize : 0.002097s : 0.12% optimize.opt_a.add_forward_monad_depend : 0.000016s : 0.00% optimize.opt_a.auto_monad_grad : 0.000010s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000129s : 0.01% optimize.opt_a.cse : 0.000446s : 0.03% optimize.opt_a.a_3 : 0.001754s : 0.10% optimize.py_interpret_to_execute_after_opt_a : 0.000075s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000032s : 0.00% optimize.rewriter_after_opt_a : 0.000393s : 0.02% optimize.convert_after_rewriter : 0.000082s : 0.00% optimize.order_py_execute_after_rewriter : 0.000061s : 0.00% optimize.opt_b.b_[INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.327 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1166] CompileGraphs] [PROF]compile_backend_graph costs 1958.12 msec. 1 : 0.002015s : 0.12% optimize.opt_b.b_2 : 0.000065s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000029s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000023s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000023s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000140s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000080s : 0.00% optimize.overlap_param_gather : 0.000034s : 0.00% optimize.cconv : 0.000088s : 0.01% optimize.loop_unroll : 0.001111s : 0.06% optimize.opt_after_cconv.c_1 : 0.000229s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000007s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000031s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000024s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000022s : 0.00% optimize.opt_after_cconv.cse : 0.000146s : 0.01% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000342s : 0.02% optimize.tuple_transform.d_1 : 0.000294s : 0.02% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000037s : 0.00% optimize.add_cache_embedding : 0.000127s : 0.01% optimize.add_recomputation : 0.000239s : 0.01% optimize.cse_after_recomputation.cse : 0.000090s : 0.01% optimize.environ_conv : 0.000086s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000061s : 0.00% optimize.bias_add_comm_swap : 0.000034s : 0.00% optimize.label_micro_interleaved_index : 0.000032s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000038s : 0.00% optimize.merge_cast_opt : 0.000029s : 0.00% optimize.slice_recompute_activation : 0.000066s : 0.00% optimize.micro_interleaved_order_control : 0.000047s : 0.00% optimize.assign_add_opt : 0.000217s : 0.01% optimize.ForceFp32Comm : 0.000035s : 0.00% optimize.remove_cast_before_assign_add : 0.000113s : 0.01% optimize.full_micro_interleaved_order_control : 0.000033s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000034s : 0.00% optimize.comm_op_add_attrs : 0.000103s : 0.01% optimize.add_comm_op_reuse_tag : 0.000106s : 0.01% optimize.interleave_split_concat_branches : 0.000036s : 0.00% optimize.interleave_parallel_branches : 0.000033s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000074s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000046s : 0.00% optimize.control_data_broadcast_order : 0.000031s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000048s : 0.00% optimize.offloading_packed_experts : 0.000075s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000032s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000029s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000047s : 0.00% optimize.overlap_grad_ring_attention : 0.000086s : 0.01% optimize.overlap_grad_flash_sp : 0.000068s : 0.00% optimize.begin_end_overlap_inline : 0.000050s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000032s : 0.00% optimize.split_layernorm_comm : 0.000031s : 0.00% optimize.handle_group_info : 0.000041s : 0.00% optimize.symbol_engine_optimizer.build : 0.000043s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000062s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000075s : 0.00% optimize.s[DEBUG] RUNTIME_FRAMEWORK(187775,fffe8ffff0f0,python):2025-02-07-15:58:06.278.325 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:0, sequential num:2001075757 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.356 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1169] CompileGraphs] Status record: end compile function graph: 4_3_1___main___Net_construct_20, produce actor: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.278.328 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph0_SuperKernelActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:1, sequential num:2001075757 ymbol_engine_optimizer.opt_reshape : 0.000049s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000084s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000052s : 0.00% auto_monad_reorder : 0.000143s : 0.01% get_jit_bprop_graph : 0.000047s : 0.00% rewriter_after_jit_bprop_graph : 0.000043s : 0.00% eliminate_special_op_node : 0.001112s : 0.06% distribtued_split : 0.000309s : 0.02% validate : 0.000188s : 0.01% task_emit : 1.652618s : 96.54% execute : 0.000061s : 0.00% [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.390 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end task_emit action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.410 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.441 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:281] SetLoopCount] Change vm_loop_flag to 0, set loop_size to 1 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.471 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start execute action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.497 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end execute action. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.278.512 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 41 [INFO] GE_ADPT(187775,fffe87fff0f0,python):2025-02-07-15:58:06.278.578 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 Time group info: ------[substitution.] 0.000606 351 11.52% : 0.000070s : 10: substitution.arithmetic_simplify 1.20% : 0.000007s : 21: substitution.elim_not_effective 3.07% : 0.000019s : 14: substitution.float_tuple_getitem_switch 1.17% : 0.000007s : 21: substitution.fold_const_symbol 3.73% : 0.000023s : 28: substitution.graph_param_transform 13.82% : 0.000084s : 1: substitution.inline 5.18% : 0.000031s : 66: substitution.j_node_and_user_rematch 5.68% : 0.000034s : 4: substitution.less_batch_normalization 2.29% : 0.000014s : 10: substitution.minmaximum_grad 6.91% : 0.000042s : 66: substitution.remove_not_recompute_node 1.41% : 0.000009s : 6: substitution.replace_old_param 9.10% : 0.000055s : 18: substitution.tuple_list_convert_item_index_to_positive 6.38% : 0.000039s : 18: substitution.tuple_list_get_item_const_eliminator 5.89% : 0.000036s : 18: substitution.tuple_list_get_item_depend_reorder 14.11% : 0.000086s : 30: substitution.tuple_list_get_item_eliminator 6.08% : 0.000037s : 18: substitution.tuple_list_get_set_item_eliminator 1.98% : 0.000012s : 1: substitution.virtual_dataset_eliminate 0.48% : 0.000003s : 1: substitution.virtual_output_eliminate ------[type_inference.] 0.016788 2 96.97% : 0.016279s : 1: type_inference.infer 3.03% : 0.000509s : 1: type_inference.specialize ------[replace.] 0.000078 5 20.12% : 0.000016s : 1: replace.inline 36.30% : 0.000028s : 2: replace.tuple_list_get_item_eliminator 29.42% : 0.000023s : 1: replace.virtual_dataset_eliminate 14.16% : 0.000011s : 1: replace.virtual_output_eliminate ------[match.] 0.000100 5 82.97% : 0.000083s : 1: match.inline 4.30% : 0.000004s : 2: match.tuple_list_get_item_eliminator 10.91% : 0.000011s : 1: match.virtual_dataset_eliminate 1.82% : 0.000002s : 1: match.virtual_output_eliminate ------[predicate.] 0.001393 11209 0.79% : 0.000011s : 100: predicate.accumulaten_eliminater 0.50% : 0.000007s : 28: predicate.ad_related_special_op_eliminate 0.89% : 0.000012s : 97: predicate.addn_check_dump 0.79% : 0.000011s : 100: predicate.addn_zero_filter 0.75% : 0.000010s : 100: predicate.adjust_all_reduce_mul_add 2.15% : 0.000030s : 197: predicate.arithmetic_simplify 2.10% : 0.000029s : 226: predicate.cast_eliminate 1.10% : 0.000015s : 126: predicate.check_bprop_eliminate 0.84% : 0.000012s : 97: predicate.compare_switch_simplify 0.28% : 0.000004s : 41: predicate.const_output_eliminate 0.32% : 0.000004s : 28: predicate.convert_tensor_all_eliminate 1.08% : 0.000015s : 102: predicate.convert_tensor_eliminate 0.88% : 0.000012s : 97: predicate.depend_value_elim 0.85% : 0.000012s : 100: predicate.dict_get_item_const_eliminator 0.89% : 0.000012s : 100: predicate.dict_get_item_eliminator 0.85% : 0.000012s : 100: predicate.dict_set_item_eliminator 0.18% : 0.000003s : 28: predicate.elim_not_effective 0.43% : 0.000006s : 28: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000016s : 141: predicate.environ_add_const_eliminate 1.15% : 0.000016s : 141: predicate.environ_get_add_eliminate 1.11% : 0.000015s : 141: predicate.environ_get_depend_swap 2.03% : 0.000028s : 238: predicate.environ_get_eliminate 1.14% : 0.000016s : 141: predicate.environ_get_set_eliminate 0.81% : 0.000011s : 103: predicate.exchange_switch_depend_value 1.15% : 0.000016s : 103: predicate.float_depend_g_call 0.87% : 0.000012s : 97: predicate.float_environ_get_switch 1.30% : 0.000018s : 138: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 28: predicate.fold_const_symbol 1.19% : 0.000017s : 127: predicate.get_grad_eliminate 0.20% : 0.000003s : 28: predicate.graph_param_transform 0.88% : 0.000012s : 97: predicate.incorporate_call 0.83% : 0.000012s : 97: predicate.incorporate_call_switch 5.22% : 0.000073s : 479: predicate.inline 1.43% : 0.000020s : 126: predicate.inline_without_move 0.68% : 0.000010s : 126: predicate.j_node_and_user_rematch 0.88% : 0.000012s : 89: predicate.less_batch_normalization 1.50% : 0.000021s : 171: predicate.list_to_tuple_eliminator_ 2.26% : 0.000031s : 284: predicate.load_eliminater 0.59% : 0.000008s : 41: predicate.loop_unroll_after_grad 1.09% : 0.000015s : 105: predicate.loop_unroll_before_grad 1.52% : 0.000021s : 182: predicate.make_slice_get_slice_eliminator 0.90% : 0.000013s : 97: predicate.merge_addn 1.10% : 0.000015s : 126: predicate.micro_step_allgather_replace 1.08% : 0.000015s : 126: predicate.mini_step_allgather_replace 0.79% : 0.000011s : 100: predicate.minmaximum_grad 0.39% : 0.000005s : 28: predicate.mutable_eliminate 0.32% : 0.000004s : 28: predicate.opt_reshape 0.42% : 0.000006s : 41: predicate.parallel_virtual_node 1.03% : 0.000014s : 103: predicate.partial_defer_inline 1.26% : 0.000018s : 143: predicate.partial_eliminate 0.77% : 0.000011s : 100: predicate.print_const_string_wrapper 0.87% : 0.000012s : 97: predicate.reduce_all_const_elim 0.96% : 0.000013s : 100: predicate.reduce_eliminate 0.70% : 0.000010s : 126: predicate.remove_not_recompute_node 1.62% : 0.000023s : 228: predicate.replace_applicator 0.74% : 0.000010s : 126: predicate.replace_old_param 0.28% : 0.000004s : 41: predicate.reset_defer_inline 0.78% : 0.000011s : 100: predicate.reshape_eliminate 1.10% : 0.000015s : 126: predicate.row_tensor_add_zeros_like 0.40% : 0.000006s : 41: predicate.row_tensor_eliminate 1.24% : 0.000017s : 126: predicate.same_eliminate 0.63% : 0.000009s : 97: predicate.set_cell_output_no_recompute 1.27% : 0.000018s : 127: predicate.shard_identity_eliminate 1.60% : 0.000022s : 167: predicate.special_op_eliminate 1.02% : 0.000014s : 97: predicate.specialize_transform 1.25% : 0.000017s : 126: predicate.split_environ_get_set_with_tuple_value 1.29% : 0.000018s : 126: predicate.stack_unstack_eliminate 2.25% : 0.000031s : 284: predicate.stopgrad_eliminater 0.37% : 0.000005s : 41: predicate.switch_call_monad_eliminater 0.89% : 0.000012s : 103: predicate.switch_defer_inline 1.94% : 0.000027s : 229: predicate.switch_layer_defer_inline 3.27% : 0.000046s : 305: predicate.switch_simplify 0.78% : 0.000011s : 100: predicate.tile_eliminate 0.77% : 0.000011s : 100: predicate.transpose_eliminate 1.54% : 0.000021s : 169: predicate.tuple_list_convert_item_index_to_positive 1.54% : 0.000021s : 169: predicate.tuple_list_get_item_const_eliminator 1.37% : 0.000019s : 169: predicate.tuple_list_get_item_depend_reorder 2.62% : 0.000036s : 268: predicate.tuple_list_get_item_eliminator 1.43% : 0.000020s : 169: predicate.tuple_list_get_set_item_eliminator 2.47% : 0.000034s : 266: predicate.tuple_list_set_item_eliminator 1.46% : 0.000020s : 171: predicate.tuple_to_list_eliminator_ 2.21% : 0.000031s : 284: predicate.updatestate_pure_node_eliminater 3.21% : 0.000045s : 381: predicate.updatestate_useless_node_eliminater 0.39% : 0.000005s : 41: predicate.value_based_eliminate 1.24% : 0.000017s : 130: predicate.virtual_dataset_eliminate 1.18% : 0.000016s : 128: predicate.virtual_output_eliminate 0.43% : 0.000006s : 41: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000482 5 6.77% : 0.000033s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.23% : 0.000449s : 4: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.801543 283 0.00% : 0.000040s : 1: ForceFp32Comm 0.75% : 0.013475s : 1: a1a2 0.01% : 0.000133s : 1: add_cache_embedding 0.01% : 0.000114s : 1: add_comm_op_reuse_tag 0.01% : 0.000248s : 1: add_recomputation 0.01% : 0.000225s : 1: assign_add_opt 0.02% : 0.000374s : 1: auto_monad 0.01% : 0.000154s : 1: auto_monad_reorder 0.00% : 0.000056s : 1: begin_end_overlap_inline 0.00% : 0.000039s : 1: bias_add_comm_swap 0.08% : 0.001400s : 1: bootstrap 0.01% : 0.000095s : 1: cconv 0.01% : 0.000109s : 1: comm_op_add_attrs 0.00% : 0.000035s : 1: control_data_broadcast_order 0.00% : 0.000089s : 1: convert_after_rewriter 0.01% : 0.000191s : 1: cse_after_recomputation 0.02% : 0.000340s : 1: dataset_repeat_opt 0.02% : 0.000324s : 1: distribtued_split 0.06% : 0.001127s : 1: eliminate_special_op_node 0.01% : 0.000092s : 1: environ_conv 0.00% : 0.000070s : 1: execute 0.00% : 0.000038s : 1: full_micro_interleaved_order_control 0.00% : 0.000054s : 1: get_jit_bprop_graph 0.00% : 0.000070s : 1: graph_reusing 0.00% : 0.000053s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000045s : 1: handle_group_info 0.76% : 0.013708s : 1: inline 0.06% : 0.001075s : 1: insert-virtual-dataset 0.00% : 0.000037s : 1: interleave_parallel_branches 0.00% : 0.000040s : 1: interleave_split_concat_branches 0.00% : 0.000044s : 1: label_fine_grained_interleaved_index 0.00% : 0.000036s : 1: label_micro_interleaved_index 0.06% : 0.001121s : 1: loop_unroll 0.00% : 0.000034s : 1: merge_cast_opt 0.00% : 0.000052s : 1: micro_interleaved_order_control 0.00% : 0.000079s : 1: offloading_packed_experts 0.04% : 0.000653s : 44: opt.transform.a1a2 0.00% : 0.000059s : 1: opt.transform.loop_unroll_optimizer 0.40% : 0.007274s : 123: opt.transform.opt_a 0.01% : 0.000212s : 1: opt.transform.opt_after_cconv 0.06% : 0.001051s : 27: opt.transform.opt_b 0.02% : 0.000276s : 1: opt.transform.opt_trans_graph 0.01% : 0.000116s : 3: opt.transform.special_op_eliminate 0.01% : 0.000188s : 4: opt.transform.symbol_engine_opt 1.79% : 0.032185s : 1: opt_a 0.05% : 0.000814s : 1: opt_after_cconv 0.15% : 0.002691s : 1: opt_b 2.34% : 0.042096s : 1: optimize 0.00% : 0.000087s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000069s : 1: order_py_execute_after_rewriter 0.00% : 0.000074s : 1: overlap_grad_flash_sp 0.00% : 0.000033s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000093s : 1: overlap_grad_ring_attention 0.00% : 0.000051s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000080s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000039s : 1: overlap_param_gather 0.00% : 0.000051s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000036s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000135s : 1: parallel-infer-symbol 0.00% : 0.000059s : 1: parallel-infer-symbol-second 0.00% : 0.000042s : 1: partial_unused_args_eliminate 0.00% : 0.000060s : 1: pipeline_parallel_scheduler 0.01% : 0.000121s : 1: pipeline_split 0.01% : 0.000102s : 1: pre_auto_parallel 0.00% : 0.000058s : 1: py_interpret_to_execute 0.00% : 0.000083s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000119s : 1: remove_cast_before_assign_add 0.02% : 0.000350s : 1: remove_dup_value 0.17% : 0.003103s : 2: renormalize.infer 0.10% : 0.001751s : 2: renormalize.specialize 0.00% : 0.000039s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000049s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000403s : 1: rewriter_after_opt_a 0.01% : 0.000268s : 2: rewriter_before_opt_a 0.00% : 0.000038s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000072s : 1: slice_recompute_activation 0.00% : 0.000035s : 1: split_layernorm_comm 0.00% : 0.000037s : 1: split_matmul_comm_elemetwise 0.00% : 0.000066s : 1: swap_dp_allreduce_reducescatter 0.04% : 0.000635s : 1: symbol_engine_optimizer 91.74% : 1.652660s : 1: task_emit 0.02% : 0.000436s : 1: tuple_transform 0.94% : 0.016951s : 1: type_inference 0.02% : 0.000432s : 1: validate [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.278.702 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1785] Run] End [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.278.805 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:239] SavePassesConfig] Running_passes: ['a1a2.r1.a_1', 'a1a2.r1.a_1.inline', 'opt_a.r1.auto_parallel', 'opt_a.r1.flash_sp', 'opt_a.r1.flash_sp_send_recv_attached', 'opt_a.r1.parallel', 'opt_a.r1.parallel_renormalize', 'opt_a.r1.receive_attached', 'opt_a.r1.virtual_dataset', 'opt_a.r1.virtual_dataset.virtual_dataset_eliminate', 'opt_a.r1.virtual_output', 'opt_a.r1.virtual_output.virtual_output_eliminate', 'opt_a.r2.a_1', 'opt_a.r2.a_1.tuple_list_get_item_eliminator', 'opt_a.r2.accelerated_algorithm', 'opt_a.r2.accelerated_algorithm.less_batch_normalization', 'opt_a.r2.auto_parallel', 'opt_a.r2.flash_sp', 'opt_a.r2.renormalize', 'opt_a.r3.auto_parallel'] [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.278.850 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1047] SaveCompiledGraph] Save compiled func graph(4_3_1___main___Net_construct_20) phase(train.1738915084361932544.281470011721104.0..)! [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.278.862 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 6_Default/StridedSlice-op0, front node: @4_3_1___main___Net_construct_20:param_x, with index: 0, addr index: 0, device type: 2 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.278.888 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1065] SaveCompiledGraph] End save compiled func graph! [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.278.905 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 6_Default/StridedSlice-op0, outer index: 0, inner index:0, front node: @4_3_1___main___Net_construct_20:param_x [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.278.929 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1326] CompileInner] [PROF]ParallelPostProcess costs 0.011 msec. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.278.948 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1154] CleanCompileRes] Clean compile resource start [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.278.972 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 0, inner index: 0, dynamic is 0 [INFO] GE_ADPT(187775,fffeacff90f0,python):2025-02-07-15:58:06.278.987 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.186 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 0_Default/StreamSend-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.212 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.238 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 6_Default/StridedSlice-op0, input index: 0, device tensor: 0x39040870, ptr: 0x12c7fda01200, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.263 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor6_Default/StridedSlice-op0, actor input: 0, graph input: 1, device tensor: 0x39040870, ptr: 0x12c7fda01200, ref cnt: 1 TotalTime = 2.03631, [21] [bootstrap]: 0.00129115 [type_inference]: 0.0172955 [auto_monad]: 0.00034971 [graph_reusing]: 9.209e-05 [inline]: 0.0133478, [2] [rewriter_before_opt_a]: 0.00011476 [a1a2]: 0.0131069, [2] [Cycle 1]: 0.00165987, [11] [expand_dump_flag]: 3.73996e-06 [switch_simplify]: 4.407e-05 [loop_unroll]: 2.942e-05 [a_1]: 0.00029418 [recompute_prepare]: 2.26999e-05 [updatestate_depend_eliminate]: 9.47004e-06 [updatestate_assign_eliminate]: 3.91004e-06 [updatestate_loads_eliminate]: 3.29991e-06 [parameter_eliminate]: 5.09003e-06 [a_2]: 0.00069775 [parallel_inline_pass]: 2.18099e-05 [Cycle 2]: 0.00149623, [11] [expand_dump_flag]: 1.27999e-06 [switch_simplify]: 2.036e-05 [loop_unroll]: 2.058e-05 [a_1]: 0.00013516 [recompute_prepare]: 2.134e-05 [updatestate_depend_eliminate]: 3.99002e-06 [updatestate_assign_eliminate]: 2.90002e-06 [updatestate_loads_eliminate]: 2.64996e-06 [parameter_eliminate]: 1.76008e-06 [a_2]: 0.00057317 [parallel_inline_pass]: 2.1e-05 [parallel-infer-symbol]: 0.00012098 [pre_auto_parallel]: 9.455e-05 [insert-virtual-dataset]: 0.00104872 [parallel-infer-symbol-second]: 4.97e-05 [dataset_repeat_opt]: 0.00034002 [pipeline_split]: 0.00010595 [optimize]: 0.0410809, [52] [py_interpret_to_execute]: 4.871e-05 [rewriter_before_opt_a]: 7.011e-05 [opt_a]: 0.0316738, [3] [Cycle 1]: 0.0166454, [46] [expand_dump_flag]: 1.83994e-06 [switch_simplify]: 3.473e-05 [loop_unroll]: 3.39301e-05 [a_1]: 0.00028726 [recompute_prepare]: 2.49899e-05 [updatestate_depend_eliminate]: 8.54e-06 [updatestate_assign_eliminate]: 4.93997e-06 [updatestate_loads_eliminate]: 7.97992e-06 [parameter_eliminate]: 2.60992e-06 [a_2]: 0.00066917 [accelerated_algorithm]: 2.47101e-05 [shard]: 3.041e-05 [meta_shard_fg_expand]: 3.80003e-06 [shard_inline]: 2.23899e-05 [auto_parallel]: 3.367e-05 [parallel]: 0.00903639 [flash_sp]: 4.046e-05 [merge_comm]: 3.18501e-05 [allreduce_fusion]: 2.75e-05 [matmul_add_comm_reduction]: 3.243e-05 [allreduce_slice_to_reducescatter]: 7.3004e-07 [virtual_shard_identity]: 7.242e-05 [virtual_dataset]: 0.00010527 [get_grad_eliminate_]: 6.241e-05 [virtual_output]: 8.111e-05 [merge_forward]: 2.83801e-05 [cell_reuse_recompute_pass]: 3.99002e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0001475 [before_grad]: 8.316e-05 [inplace_validation]: 2.642e-05 [parallel_renormalize]: 0.00260427 [update_top_fg]: 7.59959e-07 [cast_eliminate]: 7.04301e-05 [meta_fg_expand]: 2.57901e-05 [inplace_validation_after_expand]: 3.71899e-05 [flash_sp_send_recv_attached]: 4.282e-05 [receive_attached]: 4.31994e-06 [after_resolve]: 7.95301e-05 [a_after_grad]: 8.707e-05 [special_op_eliminate]: 5.848e-05 [renormalize]: 1.39931e-07 [add_forward_monad_depend]: 5.56e-06 [auto_monad_grad]: 3.75009e-06 [auto_monad_eliminator]: 4.28901e-05 [cse]: 0.00026199 [a_3]: 0.00057152 [Cycle 2]: 0.00843611, [46] [expand_dump_flag]: 2.04006e-06 [switch_simplify]: 5.85399e-05 [loop_unroll]: 5.72e-05 [a_1]: 0.0013555 [recompute_prepare]: 5.791e-05 [updatestate_depend_eliminate]: 3.057e-05 [updatestate_assign_eliminate]: 2.325e-05 [updatestate_loads_eliminate]: 2.245e-05 [parameter_eliminate]: 3.04997e-06 [a_2]: 0.00114296 [accelerated_algorithm]: 0.00019874 [shard]: 3.076e-05 [meta_shard_fg_expand]: 1.051e-05 [shard_inline]: 5.69901e-05 [auto_parallel]: 5.233e-05 [parallel]: 9.64e-06 [flash_sp]: 3.564e-05 [merge_comm]: 2.83801e-05 [allreduce_fusion]: 2.477e-05 [matmul_add_comm_reduction]: 2.47901e-05 [allreduce_slice_to_reducescatter]: 5.79981e-07 [virtual_shard_identity]: 5.703e-05 [virtual_dataset]: 5.64799e-05 [get_grad_eliminate_]: 5.45699e-05 [virtual_output]: 5.54899e-05 [merge_forward]: 2.441e-05 [cell_reuse_recompute_pass]: 2.94996e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00010944 [before_grad]: 7.464e-05 [inplace_validation]: 2.298e-05 [parallel_renormalize]: 1.10012e-07 [update_top_fg]: 3.89991e-07 [cast_eliminate]: 5.474e-05 [meta_fg_expand]: 2.309e-05 [inplace_validation_after_expand]: 2.912e-05 [flash_sp_send_recv_attached]: 1.45007e-06 [receive_attached]: 1.56998e-06 [after_resolve]: 5.953e-05 [a_after_grad]: 8.018e-05 [special_op_eliminate]: 5.508e-05 [renormalize]: 0.00197919 [add_forward_monad_depend]: 5.24998e-06 [auto_monad_grad]: 2.61993e-06 [auto_monad_eliminator]: 4.205e-05 [cse]: 0.00013938 [a_3]: 0.00055461 [Cycle 3]: 0.006535, [46] [expand_dump_flag]: 1.93994e-06 [switch_simplify]: 5.75801e-05 [loop_unroll]: 5.523e-05 [a_1]: 0.00121076 [recompute_prepare]: 6.06599e-05 [updatestate_depend_eliminate]: 2.91499e-05 [updatestate_assign_eliminate]: 2.33001e-05 [updatestate_loads_eliminate]: 2.223e-05 [parameter_eliminate]: 5.60004e-06 [a_2]: 0.00124029 [accelerated_algorithm]: 6.862e-05 [shard]: 3.342e-05 [meta_shard_fg_expand]: 1.26699e-05 [shard_inline]: 5.81701e-05 [auto_parallel]: 6.785e-05 [parallel]: 1.393e-05 [flash_sp]: 1.71992e-06 [merge_comm]: 3.168e-05 [allreduce_fusion]: 2.718e-05 [matmul_add_comm_reduction]: 3.428e-05 [allreduce_slice_to_reducescatter]: 7.20029e-07 [virtual_shard_identity]: 6.50199e-05 [virtual_dataset]: 5.563e-05 [get_grad_eliminate_]: 5.468e-05 [virtual_output]: 5.702e-05 [merge_forward]: 2.535e-05 [cell_reuse_recompute_pass]: 5.20004e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00012958 [before_grad]: 7.82501e-05 [inplace_validation]: 2.49801e-05 [parallel_renormalize]: 6.00703e-08 [update_top_fg]: 6.70087e-07 [cast_eliminate]: 5.71001e-05 [meta_fg_expand]: 2.47001e-05 [inplace_validation_after_expand]: 3.195e-05 [flash_sp_send_recv_attached]: 2.51993e-06 [receive_attached]: 1.83005e-06 [after_resolve]: 7.41e-05 [a_after_grad]: 8.195e-05 [special_op_eliminate]: 5.51001e-05 [renormalize]: 3.40049e-07 [add_forward_monad_depend]: 5.07e-06 [auto_monad_grad]: 4.81994e-06 [auto_monad_eliminator]: 4.38699e-05 [cse]: 0.00014433 [a_3]: 0.00057701 [py_interpret_to_execute_after_opt_a]: 7.72501e-05 [slice_cell_reuse_recomputed_activation]: 3.13601e-05 [rewriter_after_opt_a]: 0.00040009 [convert_after_rewriter]: 8.10799e-05 [order_py_execute_after_rewriter]: 5.448e-05 [opt_b]: 0.00252681, [1] [Cycle 1]: 0.00247969, [7] [b_1]: 0.00185977 [b_2]: 6.32301e-05 [updatestate_depend_eliminate]: 3.19401e-05 [updatestate_assign_eliminate]: 2.37101e-05 [updatestate_loads_eliminate]: 2.279e-05 [renormalize]: 1.44995e-06 [cse]: 0.00014149 [optimize_parallel_all_gather_comm]: 7.67199e-05 [overlap_param_gather]: 3.38999e-05 [cconv]: 8.76e-05 [loop_unroll]: 0.00099138 [opt_after_cconv]: 0.00080661, [1] [Cycle 1]: 0.00076649, [7] [c_1]: 0.00022567 [parameter_eliminate]: 5.89003e-06 [updatestate_depend_eliminate]: 3.369e-05 [updatestate_assign_eliminate]: 2.331e-05 [updatestate_loads_eliminate]: 2.186e-05 [cse]: 0.00014047 [renormalize]: 1.33005e-06 [remove_dup_value]: 0.00034547 [tuple_transform]: 0.00044056, [1] [Cycle 1]: 0.00039653, [2] [d_1]: 0.00029708 [renormalize]: 4.7998e-07 [partial_unused_args_eliminate]: 3.428e-05 [add_cache_embedding]: 0.00012444 [add_recomputation]: 0.00023341 [cse_after_recomputation]: 0.00018454, [1] [Cycle 1]: 0.00014148, [1] [cse]: 8.796e-05 [environ_conv]: 9.04901e-05 [swap_dp_allreduce_reducescatter]: 5.95299e-05 [bias_add_comm_swap]: 3.40301e-05 [label_micro_interleaved_index]: 2.98099e-05 [label_fine_grained_interleaved_index]: 3.05701e-05 [merge_cast_opt]: 3.01501e-05 [slice_recompute_activation]: 6.219e-05 [micro_interleaved_order_control]: 4.38e-05 [assign_add_opt]: 0.00021535 [ForceFp32Comm]: 3.10199e-05 [remove_cast_before_assign_add]: 5.306e-05 [full_micro_interleaved_order_control]: 3.117e-05 [reorder_send_recv_between_fp_bp]: 2.99801e-05 [comm_op_add_attrs]: 9.807e-05 [add_comm_op_reuse_tag]: 0.00010828 [interleave_split_concat_branches]: 3.14401e-05 [interleave_parallel_branches]: 3.228e-05 [overlap_opt_shard_in_pipeline]: 7.622e-05 [overlap_opt_shard_grad_in_pipeline]: 4.344e-05 [control_data_broadcast_order]: 3.032e-05 [grouped_pairwise_exchange_alltoall]: 4.439e-05 [offloading_packed_experts]: 7.561e-05 [overlap_recompute_and_grad_model_parallel]: 3.167e-05 [overlap_grad_matmul_and_grad_allreduce]: 2.739e-05 [overlap_recompute_allgather_and_fa_grad]: 6.15601e-05 [overlap_grad_ring_attention]: 7.34701e-05 [overlap_grad_flash_sp]: 5.833e-05 [begin_end_overlap_inline]: 4.134e-05 [split_matmul_comm_elemetwise]: 3.032e-05 [split_layernorm_comm]: 3.131e-05 [handle_group_info]: 3.501e-05 [symbol_engine_optimizer]: 0.00060032, [1] [Cycle 1]: 0.00056292, [6] [build]: 3.936e-05 [elim_shapecalc]: 6.26701e-05 [elim_not_effective]: 7.319e-05 [opt_reshape]: 4.728e-05 [fold_const_symbol]: 8.09199e-05 [renormalize]: 6.10016e-07 [pipeline_parallel_scheduler]: 5.13999e-05 [auto_monad_reorder]: 0.00013933 [get_jit_bprop_graph]: 4.97301e-05 [rewriter_after_jit_bprop_graph]: 4.341e-05 [eliminate_special_op_node]: 0.0011106 [distribtued_split]: 0.00029255 [validate]: 0.00018071 [task_emit]: 1.95868 [execute]: 5.563e-05 Sums bootstrap : 0.001291s : 0.06% type_inference : 0.017296s : 0.86% auto_monad : 0.000350s : 0.02% graph_reusing : 0.000092s : 0.00% inline.rewriter_before_opt_a : 0.000115s : 0.01% inline.a1a2.expand_dump_flag : 0.000005s : 0.00% inline.a1a2.switch_simplify : 0.000064s : 0.00% inline.a1a2.loop_unroll : 0.000050s : 0.00% inline.a1a2.a_1 : 0.000429s : 0.02% inline.a1a2.recompute_prepare : 0.000044s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000013s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000007s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000006s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.001271s : 0.06% inline.a1a2.parallel_inline_pass : 0.000043s : 0.00% parallel-infer-symbol : 0.000121s : 0.01% pre_auto_parallel : 0.000095s : 0.00% insert-virtual-dataset : 0.001049s : 0.05% parallel-infer-symbol-second : 0.000050s : 0.00% dataset_repeat_opt : 0.000340s : 0.02% pipeline_split : 0.000106s : 0.01% optimize.py_interpret_to_execute : 0.000049s : 0.00% optimize.rewriter_before_opt_a : 0.000070s : 0.00% optimize.opt_a.expand_dump_flag : 0.000006s : 0.00% optimize.opt_a.switch_simplify : 0.000151s : 0.01% optimize.opt_a.loop_unroll : 0.000146s : 0.01% optimize.opt_a.a_1 : 0.002854s : 0.14% optimize.opt_a.recompute_prepare : 0.000144s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000068s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000051s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000053s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.003052s : 0.15% optimize.opt_a.accelerated_algorithm : 0.000292s : 0.01% optimize.opt_a.shard : 0.000095s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000027s : 0.00% optimize.opt_a.shard_inline : 0.000138s : 0.01% optimize.opt_a.auto_parallel : 0.000154s : 0.01% optimize.opt_a.parallel : 0.009060s : 0.45% optimize.opt_a.flash_sp : 0.000078s : 0.00% optimize.opt_a.merge_comm : 0.000092s : 0.00% optimize.opt_a.allreduce_fusion : 0.000079s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000092s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000194s : 0.01% optimize.opt_a.virtual_dataset : 0.000217s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000172s : 0.01% optimize.opt_a.virtual_output : 0.000194s : 0.01% optimize.opt_a.merge_forward : 0.000078s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000012s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000387s : 0.02% optimize.opt_a.before_grad : 0.000236s : 0.01% optimize.opt_a.inplace_validation : 0.000074s : 0.00% optimize.opt_a.parallel_renormalize : 0.002604s : 0.13% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000182s : 0.01% optimize.opt_a.meta_fg_expand : 0.000074s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000098s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000047s : 0.00% optimize.opt_a.receive_attached : 0.000008s : 0.00% optimize.opt_a.after_resolve : 0.000213s : 0.01% optimize.opt_a.a_after_grad : 0.000249s : 0.01% optimize.opt_a.special_op_eliminate : 0.000169s : 0.01% optimize.opt_a.renormalize : 0.001980s : 0.10% optimize.opt_a.add_forward_monad_depend : 0.000016s : 0.00% optimize.opt_a.auto_monad_grad : 0.000011s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000129s : 0.01% optimize.opt_a.cse : 0.000546s : 0.03% optimize.opt_a.a_3 : 0.001703s : 0.08% optimize.py_interpret_to_execute_after_opt_a : 0.000077s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000031s : 0.00% optimize.rewriter_after_opt_a : 0.000400s : 0.02% optimize.convert_after_rewriter : 0.000081s : 0.00% optimize.order_py_execute_after_rewriter : 0.000054s : 0.00% optimize.opt_b.b_1 : 0.001860s : 0.09% optimize.opt_b.b_2 : 0.000063s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000032s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000024s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000023s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000141s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000077s : 0.00% optimize.overlap_param_gather : 0.000034s : 0.00% optimize.cconv : 0.000088s : 0.00% optimize.loop_unroll : 0.000991s : 0.05% optimize.opt_after_cconv.c_1 : 0.000226s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000034s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000023s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000022s : 0.00% optimize.opt_after_cconv.cse : 0.000140s : 0.01% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000345s : 0.02% optimize.tuple_transform.d_1 : 0.000297s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000034s : 0.00% optimize.add_cache_embedding : 0.000124s : 0.01% optimize.add_recomputation : 0.000233s : 0.01% optimize.cse_after_recomputation.cse : 0.000088s : 0.00% optimize.environ_conv : 0.000090s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000060s : 0.00% optimize.bias_add_comm_swap : 0.000034s : 0.00% optimize.label_micro_interleaved_index : 0.000030s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000031s : 0.00% optimize.merge_cast_opt : 0.000030s : 0.00% optimize.slice_recompute_activation : 0.000062s : 0.00% optimize.micro_interleaved_order_control : 0.000044s : 0.00% optimize.assign_add_opt : 0.000215s : 0.01% optimize.ForceFp32Comm : 0.000031s : 0.00% optimize.remove_cast_before_assign_add : 0.000053s : 0.00% optimize.full_micro_interleaved_order_control : 0.000031s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000030s : 0.00% optimize.comm_op_add_attrs : 0.000098s : 0.00% optimize.add_comm_op_reuse_tag : 0.000108s : 0.01% optimize.interleave_split_concat_branches : 0.000031s : 0.00% optimize.interleave_parallel_branches : 0.000032s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000076s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000043s : 0.00% optimize.control_data_broadcast_order : 0.000030s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000044s : 0.00% optimize.offloading_packed_experts : 0.000076s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000032s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000027s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000062s : 0.00% optimize.overlap_grad_ring_attention : 0.000073s : 0.00% optimize.overlap_grad_flash_sp : 0.000058s : 0.00% optimize.begin_end_overlap_inline : 0.000041s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000030s : 0.00% optimize.split_layernorm_comm : 0.000031s : 0.00% optimize.handle_group_info : 0.000035s : 0.00% optimize.symbol_engine_optimizer.build : 0.000039s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000063s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000073s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000047s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000081s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000051s : 0.00% auto_monad_reorder : 0.000139s : 0.01% get_jit_bprop_graph : 0.000050s : 0.00% rewriter_after_jit_bprop_graph : 0.000043s : 0.00% eliminate_special_op_node : 0.001111s : 0.06% distribtued_split : 0.000293s : 0.01% validate : 0.000181s : 0.01% task_emit : 1.958682s : 97.09% execute : 0.000056s : 0.00% [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.319 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 9_Default/StridedSlice-op3, front node: @4_3_1___main___Net_construct_20:param_y, with index: 0, addr index: 1, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.348 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 9_Default/StridedSlice-op3, outer index: 1, inner index:0, front node: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.374 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 1, inner index: 0, dynamic is 0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.387 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.415 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 2, actor name : 1_Default/StreamRecv-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.440 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.459 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.520 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 9_Default/StridedSlice-op3, input index: 0, device tensor: 0x390404a0, ptr: 0x12c7fda01600, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.541 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor9_Default/StridedSlice-op3, actor input: 0, graph input: 0, device tensor: 0x390404a0, ptr: 0x12c7fda01600, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.560 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.583 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 2_Default/StreamSend-op1, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.598 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.626 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph0_SuperKernelActor and check running condition:1, sequential num:2001075757 Time group info: ------[substitution.] 0.000668 351 10.55% : 0.000070s : 10: substitution.arithmetic_simplify 1.09% : 0.000007s : 21: substitution.elim_not_effective 3.13% : 0.000021s : 14: substitution.float_tuple_getitem_switch 1.02% : 0.000007s : 21: substitution.fold_const_symbol 3.46% : 0.000023s : 28: substitution.graph_param_transform 9.98% : 0.000067s : 1: substitution.inline 4.51% : 0.000030s : 66: substitution.j_node_and_user_rematch 13.76% : 0.000092s : 4: substitution.less_batch_normalization 2.06% : 0.000014s : 10: substitution.minmaximum_grad 9.56% : 0.000064s : 66: substitution.remove_not_recompute_node 1.41% : 0.000009s : 6: substitution.replace_old_param 8.52% : 0.000057s : 18: substitution.tuple_list_convert_item_index_to_positive 5.50% : 0.000037s : 18: substitution.tuple_list_get_item_const_eliminator 5.04% : 0.000034s : 18: substitution.tuple_list_get_item_depend_reorder 13.13% : 0.000088s : 30: substitution.tuple_list_get_item_eliminator 5.04% : 0.000034s : 18: substitution.tuple_list_get_set_item_eliminator 1.74% : 0.000012s : 1: substitution.virtual_dataset_eliminate 0.49% : 0.000003s : 1: substitution.virtual_output_eliminate ------[type_inference.] 0.017156 2 96.64% : 0.016580s : 1: type_inference.infer 3.36% : 0.000576s : 1: type_inference.specialize ------[replace.] 0.000073 5 17.85% : 0.000013s : 1: replace.inline 38.63% : 0.000028s : 2: replace.tuple_list_get_item_eliminator 29.34% : 0.000021s : 1: replace.virtual_dataset_eliminate 14.18% : 0.000010s : 1: replace.virtual_output_eliminate ------[match.] 0.000083 5 79.57% : 0.000066s : 1: match.inline 5.04% : 0.000004s : 2: match.tuple_list_get_item_eliminator 12.71% : 0.000010s : 1: match.virtual_dataset_eliminate 2.68% : 0.000002s : 1: match.virtual_output_eliminate ------[predicate.] 0.001389 11209 0.77% : 0.000011s : 100: predicate.accumulaten_eliminater 0.52% : 0.000007s : 28: predicate.ad_related_special_op_eliminate 0.88% : 0.000012s : 97: predicate.addn_check_dump 0.80% : 0.000011s : 100: predicate.addn_zero_filter 0.75% : 0.000010s : 100: predicate.adjust_all_reduce_mul_add 2.25% : 0.000031s : 197: predicate.arithmetic_simplify 2.01% : 0.000028s : 226: predicate.cast_eliminate 1.10% : 0.000015s : 126: predicate.check_bprop_eliminate 0.86% : 0.000012s : 97: predicate.compare_switch_simplify 0.28% : 0.000004s : 41: predicate.const_output_eliminate 0.30% : 0.000004s : 28: predicate.convert_tensor_all_eliminate 1.06% : 0.000015s : 102: predicate.convert_tensor_eliminate 0.88% : 0.000012s : 97: predicate.depend_value_elim 0.83% : 0.000012s : 100: predicate.dict_get_item_const_eliminator 0.89% : 0.000012s : 100: predicate.dict_get_item_eliminator 0.88% : 0.000012s : 100: predicate.dict_set_item_eliminator 0.21% : 0.000003s : 28: predicate.elim_not_effective 0.37% : 0.000005s : 28: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000016s : 141: predicate.environ_add_const_eliminate 1.12% : 0.000016s : 141: predicate.environ_get_add_eliminate 1.10% : 0.000015s : 141: predicate.environ_get_depend_swap 2.05% : 0.000028s : 238: predicate.environ_get_eliminate 1.13% : 0.000016s : 141: predicate.environ_get_set_eliminate 0.80% : 0.000011s : 103: predicate.exchange_switch_depend_value 1.13% : 0.000016s : 103: predicate.float_depend_g_call 0.89% : 0.000012s : 97: predicate.float_environ_get_switch 1.27% : 0.000018s : 138: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 28: predicate.fold_const_symbol 1.18% : 0.000016s : 127: predicate.get_grad_eliminate 0.19% : 0.000003s : 28: predicate.graph_param_transform 0.91%[DEBUG] RUNTIME_FRAMEWORK(187775,fffe8ffff0f0,python):2025-02-07-15:58:06.279.646 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:317] RunOpData] Actor(kernel_graph_0_OutputActor) receive the input op data and output position:0 device tensor:0x39049050 ptr:0 ref count:18446744073709551615 origin ref count:18446744073709551615 dynamic ref count:2147483647 from memory pool:0 output node:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} index:0 : 0.000013s : 97: predicate.incorporate_call 0.85% : 0.000012s : 97: predicate.incorporate_call_switch 5.32% : 0.000074s : 479: predicate.inline 1.39% : 0.000019s : 126: predicate.inline_without_move 0.68% : 0.000010s : 126: predicate.j_node_and_user_rematch 0.98% : 0.000014s : 89: predicate.less_batch_normalization 1.57% : 0.000022s : 171: predicate.list_to_tuple_eliminator_ 2.28% : 0.000032s : 284: predicate.load_eliminater 0.63% : 0.000009s : 41: predicate.loop_unroll_after_grad 1.04% : 0.000014s : 105: predicate.loop_unroll_before_grad 1.60% : 0.000022s : 182: predicate.make_slice_get_slice_eliminator 0.88% : 0.000012s : 97: predicate.merge_addn 1.10% : 0.000015s : 126: predicate.micro_step_allgather_replace 1.12% : 0.000016s : 126: predicate.mini_step_allgather_replace 0.80% : 0.000011s : 100: predicate.minmaximum_grad 0.43% : 0.000006s : 28: predicate.mutable_eliminate 0.31% : 0.000004s : 28: predicate.opt_reshape 0.40% : 0.000006s : 41: predicate.parallel_virtual_node 1.05% : 0.000015s : 103: predicate.partial_defer_inline 1.28% : 0.000018s : 143: predicate.partial_eliminate 0.79% : 0.000011s : 100: predicate.print_const_string_wrapper 0.87% : 0.000012s : 97: predicate.reduce_all_const_elim 0.96% : 0.000013s : 100: predicate.reduce_eliminate 0.69% : 0.000010s : 126: predicate.remove_not_recompute_node 1.59% : 0.000022s : 228: predicate.replace_applicator 0.75% : 0.000010s : 126: predicate.replace_old_param 0.26% : 0.000004s : 41: predicate.reset_defer_inline 0.79% : 0.000011s : 100: predicate.reshape_eliminate 1.13% : 0.000016s : 126: predicate.row_tensor_add_zeros_like 0.41% : 0.000006s : 41: predicate.row_tensor_eliminate 1.22% : 0.000017s : 126: predicate.same_eliminate 0.64% : 0.000009s : 97: predicate.set_cell_output_no_recompute 1.25% : 0.000017s : 127: predicate.shard_identity_eliminate 1.55% : 0.000022s : 167: predicate.special_op_eliminate 1.03% : 0.000014s : 97: predicate.specialize_transform 1.18% : 0.000016s : 126: predicate.split_environ_get_set_with_tuple_value 1.23% : 0.000017s : 126: predicate.stack_unstack_eliminate 2.27% : 0.000032s : 284: predicate.stopgrad_eliminater 0.37% : 0.000005s : 41: predicate.switch_call_monad_eliminater 0.86% : 0.000012s : 103: predicate.switch_defer_inline 1.97% : 0.000027s : 229: predicate.switch_layer_defer_inline 3.10% : 0.000043s : 305: predicate.switch_simplify 0.78% : 0.000011s : 100: predicate.tile_eliminate 0.77% : 0.000011s : 100: predicate.transpose_eliminate 1.56% : 0.000022s : 169: predicate.tuple_list_convert_item_index_to_positive 1.55% : 0.000022s : 169: predicate.tuple_list_get_item_const_eliminator 1.38% : 0.000019s : 169: predicate.tuple_list_get_item_depend_reorder 2.59% : 0.000036s : 268: predicate.tuple_list_get_item_eliminator 1.47% : 0.000020s : 169: predicate.tuple_list_get_set_item_eliminator 2.42% : 0.000034s : 266: predicate.tuple_list_set_item_eliminator 1.48% : 0.000020s : 171: predicate.tuple_to_list_eliminator_ 2.22% : 0.000031s : 284: predicate.updatestate_pure_node_eliminater 3.22% : 0.000045s : 381: predicate.updatestate_useless_node_eliminater 0.42% : 0.000006s : 41: predicate.value_based_eliminate 1.21% : 0.000017s : 130: predicate.virtual_dataset_eliminate 1.16% : 0.000016s : 128: predicate.virtual_output_eliminate 0.40% : 0.000006s : 41: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000452 5 7.05% : 0.000032s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.95% : 0.000421s : 4: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 -[INFO] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.664 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:57] IncreaseLoopCount] Loop count actor(kernel_graph_0_LoopCountActor) running, loop count: 1, current count: 1, total running count: 1 -----[others.] 2.104818 283 0.00% : 0.000037s : 1: ForceFp32Comm 0.62% : 0.013111s : 1: a1a2 0.01% : 0.000131s : 1: add_cache_embedding 0.01% : 0.000115s : 1: add_comm_op_reuse_tag 0.01% : 0.000241s : 1: add_recomputation 0.01% : 0.000222s : 1: assign_add_opt 0.02% : 0.000363s : 1: auto_monad 0.01% : 0.000149s : 1: auto_monad_reorder 0.00% : 0.000046s : 1: begin_end_overlap_inline 0.00% : 0.000038s : 1: bias_add_comm_swap 0.06% : 0.001323s : 1: bootstrap 0.00% : 0.000092s : 1: cconv 0.00% : 0.000104s : 1: comm_op_add_attrs 0.00% : 0.000034s : 1: control_data_broadcast_order 0.00% : 0.000089s : 1: convert_after_rewriter 0.01% : 0.000190s : 1: cse_after_recomputation 0.02% : 0.000350s : 1: dataset_repeat_opt 0.01% : 0.000307s : 1: distribtued_split 0.05% : 0.001125s : 1: eliminate_special_op_node 0.00% : 0.000099s : 1: environ_conv 0.00% : 0.000065s : 1: execute 0.00% : 0.000036s : 1: full_micro_interleaved_order_control 0.00% : 0.000058s : 1: get_jit_bprop_graph 0.00% : 0.000101s : 1: graph_reusing 0.00% : 0.000049s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000039s : 1: handle_group_info 0.63% : 0.013357s : 1: inline 0.05% : 0.001066s : 1: insert-virtual-dataset 0.00% : 0.000037s : 1: interleave_parallel_branches 0.00% : 0.000037s : 1: interleave_split_concat_branches 0.00% : 0.000034s : 1: label_fine_grained_interleaved_index 0.00% : 0.000034s : 1: label_micro_interleaved_index 0.05% : 0.001003s : 1: loop_unroll 0.00% : 0.000034s : 1: merge_cast_opt 0.00% : 0.000049s : 1: micro_interleaved_order_control 0.00% : 0.000080s : 1: offloading_packed_experts 0.03% : 0.000618s : 44: opt.transform.a1a2 0.00% : 0.000061s : 1: opt.transform.loop_unroll_optimizer 0.35% : 0.007329s : 123: opt.transform.opt_a 0.01% : 0.000210s : 1: opt.transform.opt_after_cconv 0.05% : 0.001049s : 27: opt.transform.opt_b 0.01% : 0.000278s : 1: opt.transform.opt_trans_graph 0.01% : 0.000119s : 3: opt.transform.special_op_eliminate 0.01% : 0.000188s : 4: opt.transform.symbol_engine_opt 1.51% : 0.031681s : 1: opt_a 0.04% : 0.000813s : 1: opt_after_cconv 0.12% : 0.002532s : 1: opt_b 1.95% : 0.041097s : 1: optimize 0.00% : 0.000083s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000060s : 1: order_py_execute_after_rewriter 0.00% : 0.000063s : 1: overlap_grad_flash_sp 0.00% : 0.000031s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000078s : 1: overlap_grad_ring_attention 0.00% : 0.000049s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000081s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000039s : 1: overlap_param_gather 0.00% : 0.000065s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000035s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000130s : 1: parallel-infer-symbol 0.00% : 0.000059s : 1: parallel-infer-symbol-second 0.00% : 0.000039s : 1: partial_unused_args_eliminate 0.00% : 0.000060s : 1: pipeline_parallel_scheduler 0.01% : 0.000114s : 1: pipeline_split 0.00% : 0.000103s : 1: pre_auto_parallel 0.00% : 0.000070s : 1: py_interpret_to_execute 0.00% : 0.000083s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000058s : 1: remove_cast_before_assign_add 0.02% : 0.000356s : 1: remove_dup_value 0.14% : 0.002947s : 2: renormalize.infer 0.08% : 0.001620s : 2: renormalize.specialize 0.00% : 0.000035s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000051s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000411s [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:06.279.681 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish : 1: rewriter_after_opt_a 0.01% : 0.000198s : 2: rewriter_before_opt_a 0.00% : 0.000035s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000067s : 1: slice_recompute_activation 0.00% : 0.000035s : 1: split_layernorm_comm 0.00% : 0.000035s : 1: split_matmul_comm_elemetwise 0.00% : 0.000065s : 1: swap_dp_allreduce_reducescatter 0.03% : 0.000606s : 1: symbol_engine_optimizer 93.06% : 1.958721s : 1: task_emit 0.02% : 0.000446s : 1: tuple_transform 0.82% : 0.017319s : 1: type_inference 0.02% : 0.000352s : 1: validate [DEBUG] RUNTIME_FRAMEWORK(187775,fffe8ffff0f0,python):2025-02-07-15:58:06.279.709 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:388] CreateOutputTensor] Create output tensor, output node: Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}, output index: 0, output position: 0, output kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.727 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op1 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.279.734 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1785] Run] End [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.747 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 3, actor name : 3_Default/StreamRecv-op1, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187775,fffe8ffff0f0,python):2025-02-07-15:58:06.279.766 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:470] CreateOutputTensor] Create device tensor:0xfffe800095a0, size: 512 type:48 output node:Default/AllGather-op2 output index:0 output position:0, origin output device tensor: 0x39049050 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.780 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.795 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op1 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.279.836 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:239] SavePassesConfig] Running_passes: ['a1a2.r1.a_1', 'a1a2.r1.a_1.inline', 'opt_a.r1.auto_parallel', 'opt_a.r1.flash_sp', 'opt_a.r1.flash_sp_send_recv_attached', 'opt_a.r1.parallel', 'opt_a.r1.parallel_renormalize', 'opt_a.r1.receive_attached', 'opt_a.r1.virtual_dataset', 'opt_a.r1.virtual_dataset.virtual_dataset_eliminate', 'opt_a.r1.virtual_output', 'opt_a.r1.virtual_output.virtual_output_eliminate', 'opt_a.r2.a_1', 'opt_a.r2.a_1.tuple_list_get_item_eliminator', 'opt_a.r2.accelerated_algorithm', 'opt_a.r2.accelerated_algorithm.less_batch_normalization', 'opt_a.r2.auto_parallel', 'opt_a.r2.flash_sp', 'opt_a.r2.renormalize', 'opt_a.r3.auto_parallel'] [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.279.882 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1047] SaveCompiledGraph] Save compiled func graph(4_3_1___main___Net_construct_20) phase(train.1738915084078296064.281470489933200.0..)! [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.883 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.905 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 4_Default/StreamSend-op2, task_id_on_stream : 3. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.279.918 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1065] SaveCompiledGraph] End save compiled func graph! [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.279.920 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op2 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.279.954 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1326] CompileInner] [PROF]ParallelPostProcess costs 0.011 msec. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.279.972 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1154] CleanCompileRes] Clean compile resource start [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.280.049 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.280.071 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 4, actor name : 5_Default/StreamRecv-op2, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.280.086 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.280.101 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.280.187 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.280.223 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 6_Default/StridedSlice-op0, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:06.280.239 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op0 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.281.316 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1175] CleanCompileRes] Clean compile resource end [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.281.360 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] End compiling 'Net.construct'. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.281.390 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1334] CompileInner] [PROF]CleanCompileRes costs 2.433 msec. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.281.406 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1335] CompileInner] Finish compiling. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:06.281.424 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1336] CompileInner] [PROF]compile_graph costs 1915.19 msec. [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.281.887 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_x, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] PARALLEL(187753,ffff8292dc10,python):2025-02-07-15:58:06.281.942 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_y, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] GE_ADPT(187775,fffeacff90f0,python):2025-02-07-15:58:06.282.469 [mindspore/ccsrc/transform/acl_ir/acl_allocator.cc:104] RegisterAllocator] Register AclAllocator [INFO] UTILS(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.553 [mindspore/ccsrc/utils/dynamic_obfuscation/registry_opaque_predicate.cc:112] init_calling_count] calling_count_ has been initialized to 0 [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.679 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1893] RunGraph] Status record: start run actor: kernel_graph_0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.282.686 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1175] CleanCompileRes] Clean compile resource end [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.731 [mindspore/ccsrc/runtime/device/pre_launch_comm.cc:200] PreLaunchCommKernel] No hccl kernel to pre launch [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.282.732 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] End compiling 'Net.construct'. [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.761 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1918] RunGraph] [PROF]PreLaunchCommKernel costs 0.038 msec. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.282.764 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1334] CompileInner] [PROF]CleanCompileRes costs 2.782 msec. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.282.781 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1335] CompileInner] Finish compiling. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:06.282.799 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1336] CompileInner] [PROF]compile_graph costs 2199.99 msec. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.806 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:966] SpawnMultiPipelineActor] Enable runtime asynchronously launch kernel, default actor thread num 5, current actor thread num: 5 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.844 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.908 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.939 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:394] operator()] Init defrag memory step freq. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.956 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:396] operator()] Config defrag memory step freq : . [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.282.970 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:405] operator()] Defrag memory step freq : 100. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.023 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:638] PrepareDataForDeviceTensorStore] Prepare store data, input tensor size: 0, arg size: 2 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.067 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:835] AllocGEFixMemory] Start AllocGEFixMemory [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.111 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:652] PrepareDataForDeviceTensorStore] prepare data for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.147 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-0, debug name:ValueNode (0, 0, 0), front node:ValueNode (0, 0, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.174 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 0) front node:ValueNode (0, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.194 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x32539dd0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.211 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 0) [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.283.237 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_x, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.258 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] PARALLEL(187742,ffffa187dc10,python):2025-02-07-15:58:06.283.291 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_y, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] UTILS(187742,ffffa187dc10,python):2025-02-07-15:58:06.283.849 [mindspore/ccsrc/utils/dynamic_obfuscation/registry_opaque_predicate.cc:112] init_calling_count] calling_count_ has been initialized to 0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.877 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-1, debug name:ValueNode 2, front node:ValueNode 2 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.915 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 2 front node:ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.934 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253a0e0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.283.950 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 2 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.024 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1893] RunGraph] Status record: start run actor: kernel_graph_0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.086 [mindspore/ccsrc/runtime/device/pre_launch_comm.cc:200] PreLaunchCommKernel] No hccl kernel to pre launch [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.115 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1918] RunGraph] [PROF]PreLaunchCommKernel costs 0.041 msec. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.155 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:966] SpawnMultiPipelineActor] Enable runtime asynchronously launch kernel, default actor thread num 5, current actor thread num: 5 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.191 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.187 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-2, debug name:ValueNode 0, front node:ValueNode 0 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.218 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 0 front node:ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.235 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253a490 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.247 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.252 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 0 [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.279 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:394] operator()] Init defrag memory step freq. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.299 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:396] operator()] Config defrag memory step freq : . [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.312 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:405] operator()] Defrag memory step freq : 100. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.356 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:638] PrepareDataForDeviceTensorStore] Prepare store data, input tensor size: 0, arg size: 2 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.400 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:835] AllocGEFixMemory] Start AllocGEFixMemory [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.440 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:652] PrepareDataForDeviceTensorStore] prepare data for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.475 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-0, debug name:ValueNode (2, 2, 2), front node:ValueNode (2, 2, 2) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.501 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 2) front node:ValueNode (2, 2, 2) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.484 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-3, debug name:ValueNode (1, 1, 1), front node:ValueNode (1, 1, 1) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.521 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024e580 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.528 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (1, 1, 1) front node:ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.539 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 2) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.547 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253abc0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.566 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (1, 1, 1) [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:06.284.583 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.824 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-4, debug name:ValueNode (2, 2, 4), front node:ValueNode (2, 2, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.858 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 4) front node:ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.875 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253b3b0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.284.891 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.115 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-5, debug name:ValueNode (0, 0, 2), front node:ValueNode (0, 0, 2) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.146 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 2) front node:ValueNode (0, 0, 2) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.163 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253bba0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.180 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 2) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.212 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-1, debug name:ValueNode (2, 4, 4), front node:ValueNode (2, 4, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.253 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 4, 4) front node:ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.271 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024e9d0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.289 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.401 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-6, debug name:ValueNode (2, 4, 4), front node:ValueNode (2, 4, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.432 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 4, 4) front node:ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.449 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253c390 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.466 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.522 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-2, debug name:ValueNode 0, front node:ValueNode 0 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.551 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 0 front node:ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.567 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024ee20 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.582 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.698 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-7, debug name:ValueNode 1, front node:ValueNode 1 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.728 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 1 front node:ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.745 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3253c870 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.285.761 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.803 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-3, debug name:ValueNode 2, front node:ValueNode 2 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.845 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 2 front node:ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.862 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024f2e0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.285.878 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.286.001 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_y front node:@4_3_1___main___Net_construct_20:param_y backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:06.286.045 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_x front node:@4_3_1___main___Net_construct_20:param_x backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.093 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-4, debug name:ValueNode 1, front node:ValueNode 1 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.121 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 1 front node:ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.137 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024f7c0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.152 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.286.133 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph0_SuperKernelActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe57fff0f0,python):2025-02-07-15:58:06.286.137 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:0, sequential num:2001075757 [INFO] GE_ADPT(187753,fffe577fe0f0,python):2025-02-07-15:58:06.286.349 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.366 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-5, debug name:ValueNode (0, 0, 0), front node:ValueNode (0, 0, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.398 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 0) front node:ValueNode (0, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.415 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x4024fff0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.432 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.286.626 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 6_Default/StridedSlice-op0, front node: @4_3_1___main___Net_construct_20:param_x, with index: 0, addr index: 0, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.651 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-6, debug name:ValueNode (2, 2, 4), front node:ValueNode (2, 2, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.286.672 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 6_Default/StridedSlice-op0, outer index: 0, inner index:0, front node: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.682 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 4) front node:ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.700 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x402507e0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.717 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.286.744 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 0, inner index: 0, dynamic is 0 [INFO] GE_ADPT(187753,fffe74ff90f0,python):2025-02-07-15:58:06.286.769 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.951 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-7, debug name:ValueNode (1, 1, 1), front node:ValueNode (1, 1, 1) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.982 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (1, 1, 1) front node:ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.286.998 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x40250fd0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.287.015 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.004 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 0_Default/StreamSend-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.020 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 6_Default/StridedSlice-op0, input index: 0, device tensor: 0x32539350, ptr: 0x12c7fd601000, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.054 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.070 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor6_Default/StridedSlice-op0, actor input: 0, graph input: 1, device tensor: 0x32539350, ptr: 0x12c7fd601000, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.128 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 9_Default/StridedSlice-op3, front node: @4_3_1___main___Net_construct_20:param_y, with index: 0, addr index: 1, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.161 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 9_Default/StridedSlice-op3, outer index: 1, inner index:0, front node: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.189 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 1, inner index: 0, dynamic is 0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.240 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.287.252 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_y front node:@4_3_1___main___Net_construct_20:param_y backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.269 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 2, actor name : 1_Default/StreamRecv-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.288 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:06.287.294 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_x front node:@4_3_1___main___Net_construct_20:param_x backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.307 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.330 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 9_Default/StridedSlice-op3, input index: 0, device tensor: 0x32538f80, ptr: 0x12c7fd601400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.353 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor9_Default/StridedSlice-op3, actor input: 0, graph input: 0, device tensor: 0x32538f80, ptr: 0x12c7fd601400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.402 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.287.392 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph0_SuperKernelActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.424 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 2_Default/StreamSend-op1, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7a7fc0f0,python):2025-02-07-15:58:06.287.403 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:0, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.443 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.443 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph0_SuperKernelActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe57fff0f0,python):2025-02-07-15:58:06.287.461 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:317] RunOpData] Actor(kernel_graph_0_OutputActor) receive the input op data and output position:0 device tensor:0x325413e0 ptr:0 ref count:18446744073709551615 origin ref count:18446744073709551615 dynamic ref count:2147483647 from memory pool:0 output node:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} index:0 [INFO] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.501 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:57] IncreaseLoopCount] Loop count actor(kernel_graph_0_LoopCountActor) running, loop count: 1, current count: 1, total running count: 1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:06.287.519 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187753,fffe57fff0f0,python):2025-02-07-15:58:06.287.536 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:388] CreateOutputTensor] Create output tensor, output node: Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}, output index: 0, output position: 0, output kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.586 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe57fff0f0,python):2025-02-07-15:58:06.287.598 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:470] CreateOutputTensor] Create device tensor:0xfffe480095a0, size: 512 type:48 output node:Default/AllGather-op2 output index:0 output position:0, origin output device tensor: 0x325413e0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.607 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 3, actor name : 3_Default/StreamRecv-op1, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.621 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 2. [INFO] GE_ADPT(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.287.614 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.638 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.723 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.742 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 4_Default/StreamSend-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.757 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.881 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.287.885 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 6_Default/StridedSlice-op0, front node: @4_3_1___main___Net_construct_20:param_x, with index: 0, addr index: 0, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.909 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 4, actor name : 5_Default/StreamRecv-op2, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.924 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.287.930 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 6_Default/StridedSlice-op0, outer index: 0, inner index:0, front node: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.287.939 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.002 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 0, inner index: 0, dynamic is 0 [INFO] GE_ADPT(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.014 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.288.025 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.288.063 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 6_Default/StridedSlice-op0, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:06.288.080 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.234 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 0_Default/StreamSend-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.277 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.277 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 6_Default/StridedSlice-op0, input index: 0, device tensor: 0x4024db00, ptr: 0x12c7fd401000, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.316 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor6_Default/StridedSlice-op0, actor input: 0, graph input: 1, device tensor: 0x4024db00, ptr: 0x12c7fd401000, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.369 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 9_Default/StridedSlice-op3, front node: @4_3_1___main___Net_construct_20:param_y, with index: 0, addr index: 1, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.398 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 9_Default/StridedSlice-op3, outer index: 1, inner index:0, front node: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.425 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 1, inner index: 0, dynamic is 0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.464 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.493 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 2, actor name : 1_Default/StreamRecv-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.510 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.528 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.582 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 9_Default/StridedSlice-op3, input index: 0, device tensor: 0x4024d6f0, ptr: 0x12c7fd401400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.605 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor9_Default/StridedSlice-op3, actor input: 0, graph input: 0, device tensor: 0x4024d6f0, ptr: 0x12c7fd401400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.619 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.658 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 2_Default/StreamSend-op1, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.675 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.715 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph0_SuperKernelActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7a7fc0f0,python):2025-02-07-15:58:06.288.734 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:317] RunOpData] Actor(kernel_graph_0_OutputActor) receive the input op data and output position:0 device tensor:0x40255b50 ptr:0 ref count:18446744073709551615 origin ref count:18446744073709551615 dynamic ref count:2147483647 from memory pool:0 output node:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} index:0 [INFO] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.754 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:57] IncreaseLoopCount] Loop count actor(kernel_graph_0_LoopCountActor) running, loop count: 1, current count: 1, total running count: 1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:06.288.771 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7a7fc0f0,python):2025-02-07-15:58:06.288.801 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:388] CreateOutputTensor] Create output tensor, output node: Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}, output index: 0, output position: 0, output kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.810 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.840 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 3, actor name : 3_Default/StreamRecv-op1, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.855 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7a7fc0f0,python):2025-02-07-15:58:06.288.869 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:470] CreateOutputTensor] Create device tensor:0xfffe640095a0, size: 512 type:48 output node:Default/AllGather-op2 output index:0 output position:0, origin output device tensor: 0x40255b50 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.871 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.966 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.288.988 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 4_Default/StreamSend-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.289.005 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.289.125 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.289.161 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 4, actor name : 5_Default/StreamRecv-op2, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.289.176 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.289.190 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.289.272 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.289.311 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 6_Default/StridedSlice-op0, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:06.289.328 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op0 [INFO] GE_ADPT(187753,fffe74ff90f0,python):2025-02-07-15:58:06.289.751 [mindspore/ccsrc/transform/acl_ir/acl_allocator.cc:104] RegisterAllocator] Register AclAllocator [INFO] GE_ADPT(187742,fffe7affd0f0,python):2025-02-07-15:58:06.290.976 [mindspore/ccsrc/transform/acl_ir/acl_allocator.cc:104] RegisterAllocator] Register AclAllocator [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.325.687 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Mul, kernel type:opapi_kernel [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.325.818 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.325.953 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.326.047 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.326.100 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.326.122 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.326.147 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.326.524 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.326.556 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.333.636 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Mul, kernel type:opapi_kernel [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.333.798 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.334.006 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.334.114 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.334.171 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.334.197 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.334.225 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.334.707 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.334.742 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [WARNING] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:06.374.070 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-16057586909177180503 [INFO] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:06.374.164 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-16057586909177180503 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.216 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 242.904 msec. [WARNING] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.243 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-16057586909177180503 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.374.355 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:232] InitCommGroup] In group: 2-6853331267304275293, the max communication size is 1 MB. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.374.382 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:238] InitCommGroup] For group: 2-6853331267304275293, the hccl_buffsize is inited by HCCL_BUFFSIZE, and the value is 200 MB. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:06.374.403 [mindspore/ccsrc/distributed/collective/collective_manager.cc:875] SubmitCreateDeviceCommTask] Submit init communicator task for 2-6853331267304275293. Call 'WaitCommInitDone' later to wait initialization to be done. [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.424 [mindspore/ccsrc/distributed/collective/collective_manager.cc:923] RunInitCommTasks] Create device communicator in thread for group: 2-6853331267304275293 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.441 [mindspore/ccsrc/distributed/collective/collective_manager.cc:724] CreateDeviceCommunicator] Create device communicator for 2-6853331267304275293 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.581 [mindspore/ccsrc/distributed/collective/collective_manager.cc:747] CreateDeviceCommunicator] [PROF]GenerateRootInfo costs 0.106 msec. [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.760 [mindspore/ccsrc/distributed/collective/collective_manager.cc:766] CreateDeviceCommunicator] Successfully send/fetch unqiueid for communication group 2-6853331267304275293 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.787 [mindspore/ccsrc/distributed/collective/collective_manager.cc:768] CreateDeviceCommunicator] [PROF]BroadcastUniqueID costs 0.177 msec. [WARNING] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.804 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: 2-6853331267304275293 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.374.822 [mindspore/ccsrc/distributed/collective/collective_manager.cc:704] GetCommunicatorInitTimeout] HCCL_CONNECT_TIMEOUT is 600 seconds. [WARNING] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:06.375.050 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for 2-6853331267304275293, hcclBufferSize is 200 MB. hcclDeterministic is 0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.418.817 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.418.885 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.418.960 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.419.018 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.419.057 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.419.080 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.419.105 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.419.270 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.419.295 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.419.942 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.419.987 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.420.044 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.420.092 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.420.125 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.420.147 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.420.531 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_0_erase_visit_attr in 306.79 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.420.951 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_1_deal_ref_output in 382.8 us [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.019 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:166] AclAfterCreateKernel] [PROF]AclAfterCreateKernel costs 0.826 msec. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.079 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:195] OptimizeACLGraphAfterCreateKernel] [PROF]OptimizeACLGraphAfterCreateKernel costs 0.904 msec. [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.186 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.379 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1102] OptimizeExecutionOrder] [PROF]OptimizeExecutionOrder costs 0.269 msec. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.415 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1064] CompileGraphImpl] [PROF]CreateKernel costs 153.808 msec. [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.596 [mindspore/ccsrc/backend/common/session/session_basic.cc:1152] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.621 [mindspore/ccsrc/debug/summary/summary.cc:52] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.641 [mindspore/ccsrc/debug/summary/summary.cc:57] RecurseSetSummaryNodesForAllGraphs] This function should be skipped on GE backend. [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.660 [mindspore/ccsrc/debug/data_dump/dump_json_parser.cc:1207] UpdateNeedDumpKernels] Get kernel dump flag [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.707 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1183] PreprocessBeforeRun] Current Exec Order Algo in MS Context is bfs [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.749 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1046] DoStreamAssign] Status record: start stream assign, kernel_graph0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.781 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.830 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.870 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op2 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.897 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op3 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.933 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op4 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.421.969 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op5 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.422.012 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Mul-op0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.422.040 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.423.317 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 2 for node Default/AllGather-op0, group: 2-6541264347459079684 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.423.372 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.423.416 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op2 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.423.446 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.424.445 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 3 for node Default/AllGather-op1, group: 2-5435772415009061329 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.424.496 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.424.536 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op3 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.424.565 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op2 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.425.941 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 4 for node Default/AllGather-op2, group: 2-16453000547691086251 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.426.808 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1062] DoStreamAssign] Status record: end stream assign, kernel_graph0 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.426.888 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.426.930 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:2 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.426.968 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.426.994 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 2, record_stream_id_ : 0. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.025 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.047 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:3 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.077 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.100 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 3, record_stream_id_ : 0. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.128 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.155 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:4 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.184 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.207 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 4, record_stream_id_ : 0. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.237 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.260 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:5 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.288 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.310 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 5, record_stream_id_ : 0. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.339 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.359 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:0 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.388 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.410 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 0, record_stream_id_ : 2. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.438 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.460 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:6 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.487 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.519 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 6, record_stream_id_ : 0. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.546 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.568 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:1 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.601 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.624 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 1, record_stream_id_ : 3. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.653 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.675 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:7 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.703 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.725 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 7, record_stream_id_ : 0. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.754 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.775 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:8 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.801 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.824 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 8, record_stream_id_ : 2. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.854 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.876 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:9 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.905 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.926 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 9, record_stream_id_ : 3. [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.953 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.427.975 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:10 [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.010 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.033 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 10, record_stream_id_ : 4. [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.062 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:931] PrintGraphExecuteOrder] Graph 0 execution order: [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.156 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[0], node name[Default/StreamSend-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_24{[0]: ValueNode StreamSend}], event id[2] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.203 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[1], node name[Default/StreamRecv-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_25{[0]: ValueNode StreamRecv}], event id[2] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.236 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[2], node name[Default/StreamSend-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_26{[0]: ValueNode StreamSend}], event id[3] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.265 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[3], node name[Default/StreamRecv-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_27{[0]: ValueNode StreamRecv}], event id[3] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.293 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[4], node name[Default/StreamSend-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_28{[0]: ValueNode StreamSend}], event id[4] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.321 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[5], node name[Default/StreamRecv-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_29{[0]: ValueNode StreamRecv}], event id[4] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.384 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[6], node name[Default/StridedSlice-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_30{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_x, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.434 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[7], node name[Default/StridedSlice-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.481 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[8], node name[Default/StridedSlice-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.528 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[9], node name[Default/StridedSlice-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_33{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.583 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[10], node name[Default/StridedSlice-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.642 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[11], node name[Default/StridedSlice-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.686 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[12], node name[Default/Mul-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.718 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[13], node name[Default/StreamSend-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_37{[0]: ValueNode StreamSend}], event id[5] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.747 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[14], node name[Default/StreamRecv-op3], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_38{[0]: ValueNode StreamRecv}], event id[5] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.785 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[15], node name[Default/AllGather-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36}], group[2-6541264347459079684] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.814 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[16], node name[Default/StreamSend-op4], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_40{[0]: ValueNode StreamSend}], event id[0] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.842 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[17], node name[Default/StreamRecv-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_41{[0]: ValueNode StreamRecv}], event id[0] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.876 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[18], node name[Default/Split-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.916 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[19], node name[Default/Concat-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.944 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[20], node name[Default/StreamSend-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_46{[0]: ValueNode StreamSend}], event id[6] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.428.986 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[21], node name[Default/StreamRecv-op5], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_47{[0]: ValueNode StreamRecv}], event id[6] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.020 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[22], node name[Default/AllGather-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43}], group[2-5435772415009061329] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.049 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[23], node name[Default/StreamSend-op6], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_49{[0]: ValueNode StreamSend}], event id[1] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.077 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[24], node name[Default/StreamRecv-op6], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_50{[0]: ValueNode StreamRecv}], event id[1] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.111 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[25], node name[Default/Split-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.151 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[26], node name[Default/Concat-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1}] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.185 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[27], node name[Default/StreamSend-op7], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_55{[0]: ValueNode StreamSend}], event id[7] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.213 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[28], node name[Default/StreamRecv-op7], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_56{[0]: ValueNode StreamRecv}], event id[7] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.247 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[29], node name[Default/AllGather-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}], group[2-16453000547691086251] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.275 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[30], node name[Default/StreamSend-op8], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_58{[0]: ValueNode StreamSend}], event id[8] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.302 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[31], node name[Default/StreamRecv-op8], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_59{[0]: ValueNode StreamRecv}], event id[8] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.331 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[32], node name[Default/StreamSend-op9], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_60{[0]: ValueNode StreamSend}], event id[9] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.365 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[33], node name[Default/StreamRecv-op9], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_61{[0]: ValueNode StreamRecv}], event id[9] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.394 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[34], node name[Default/StreamSend-op10], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_62{[0]: ValueNode StreamSend}], event id[10] [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.422 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[35], node name[Default/StreamRecv-op10], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_63{[0]: ValueNode StreamRecv}], event id[10] [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.459 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1115] CompileGraphImpl] [PROF]PreprocessBeforeRun costs 7.771 msec. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.429.501 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1161] CreateDeviceAddress] Status record: start create device address. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.430.647 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1168] CreateDeviceAddress] Status record: end create device address. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.430.694 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1123] CompileGraphImpl] [PROF]CreateDeviceAddress costs 1.181 msec. [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.430.827 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1177] CacheGraphOutputToFrontNodeWithIndex] Get graph backend output nodes. [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.430.865 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1185] CacheGraphOutputToFrontNodeWithIndex] Get graph front output nodes. [INFO] SESSION(187789,ffffaa419c10,python):2025-02-07-15:58:06.430.938 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1203] CacheGraphOutputToFrontNodeWithIndex] Backend output: Default/AllGather-op2 debug string: @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} node ptr:0x32bed760 with index: 0 map to front node: Default/AllGather-op2 debug string: @4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} node ptr: 0x32ba01b0 with index: 0 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.430.984 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:766] CompileGraph] Status record: end compile graph. graph id: 0 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.184 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1605] CompileGraphFromSegment] Compile cut segment, the cut node: @4_3_1___main___Net_construct_20:ValueNode_64{[0]: ValueNode Return, [1]: CNode_22} [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.224 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1129] CompileGraphs] [PROF]CompileSubGraph costs 184.849 msec. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.248 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:976] ExportCompileCacheKBK] Compile cache: disable by front compile cache config. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.311 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1143] CompileGraphs] Status record: construct the graph compiler info. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.356 [mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc:1001] Parse] Control node parser is not inited. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.408 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:859] Transform] Graph(kernel_graph_0) transforms actor begin, strategy:pipeline [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.565 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:480] InitGraphParameterStore] Init graph parameter store: kernel_graph_0, outer size: 2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.607 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 0, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.637 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 1, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.681 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3445c660 for node:ValueNode (2, 2, 2) node addr:0x32ba84d0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.706 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3445c660 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.734 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3446fe10 for node:ValueNode (2, 2, 4) node addr:0x32ba97a0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.752 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3446fe10 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.774 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x34470260 for node:ValueNode 2 node addr:0x32ba70f0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.791 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34470260 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.814 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x34470a50 for node:ValueNode (1, 1, 1) node addr:0x32ba8820 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.832 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34470a50 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.855 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x34471240 for node:ValueNode (2, 0, 0) node addr:0x32baa8c0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.872 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34471240 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.893 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x34471a30 for node:ValueNode (4, 4, 4) node addr:0x32baabb0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.910 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34471a30 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.941 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x34471f10 for node:ValueNode 1 node addr:0x32ba6930 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.961 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34471f10 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.431.984 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x34472720 for node:ValueNode (0, 0, 0) node addr:0x32ba83a0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.001 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34472720 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.023 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x34472c00 for node:ValueNode 0 node addr:0x32ba6fc0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.040 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34472c00 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.183 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:442] ChangeGraphMode] Enable kbk subgraph execute and set run mode for graph: 0 to GraphMode. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.203 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:462] TryEnableKbkSubGraphExecMode] Enable kbk subgraph execute mode for actor set: kernel_graph_0 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.261 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:391] TryEnableInputOptimize] Enable input optimize for actor set: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.301 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_y for host data source actor. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.351 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_y for front node:@4_3_1___main___Net_construct_20:param_y index:0 position:1 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.377 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_x for host data source actor. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.409 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_x for front node:@4_3_1___main___Net_construct_20:param_x index:0 position:0 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.438 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2268] BuildDataPrepareActorForGraphParameterStore] Create data prepare actor: kernel_graph_0_DataPrepareActor [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.541 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2224] BuildLoopCountActor] Create loop count actor: kernel_graph_0_LoopCountActor [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.569 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2257] BuildOutputActor] Create output actor: kernel_graph_0_OutputActor [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.697 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1509] CacheGraphOutputToActor] Cache graph 0 output node:Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} with index:0 to actor:kernel_graph0_SuperKernelActor, from front node:Default/AllGather-op2 debug string:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} with index:0 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.882 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.909 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3446ebd0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.941 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.432.958 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3446efa0 origin ref count:2 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.054 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1711] Link] [PROF]GraphSchedulerLinkSinkMode costs 0.2 msec. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.084 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.107 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph0_SuperKernelActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.125 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_LoopCountActor@ to actor:kernel_graph_0_OutputActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.145 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_OutputActor@ to actor:kernel_graph_0_DataPrepareActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.163 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:3713] LinkControlArrowForCopyActor] Link control arrow for copy actor start, copy actor size:0 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.195 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34477770 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.229 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:556] AddResultArrow] Add result arrow from actor:kernel_graph0_SuperKernelActor to actor:kernel_graph_0_OutputActor@ from kernel@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} device address:0x34477770 original ref count:18446744073709551615 ref count:18446744073709551615 dynamic ref count:2147483647 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.317 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.362 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.400 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.447 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.482 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.515 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.738 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.780 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.837 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.874 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.946 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.433.982 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.034 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.072 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.142 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.176 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.226 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.266 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.301 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.335 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.370 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.405 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.434 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 4, send_actor : 0x32bf3780, recv_actor : 0x32bf41e0. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.451 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 8, send_actor : 0x34485ec0, recv_actor : 0x344866d0. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.468 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 3, send_actor : 0x32bd2560, recv_actor : 0x32bf2e10. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.485 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 11, send_actor : 0x34489e70, recv_actor : 0x3448a8b0. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.501 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 10, send_actor : 0x344889f0, recv_actor : 0x34489430. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.517 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 1, send_actor : 0x3447e620, recv_actor : 0x3447ef60. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.533 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 5, send_actor : 0x32bf4c20, recv_actor : 0x32bf5660. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.550 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 6, send_actor : 0x3447c7c0, recv_actor : 0x3447d190. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.567 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 7, send_actor : 0x34480c70, recv_actor : 0x344815b0. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.583 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 9, send_actor : 0x344876f0, recv_actor : 0x34487fb0. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.600 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 2, send_actor : 0x344828a0, recv_actor : 0x34483700. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.844 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op0 addr:0x3446f8c0 type:48, kernel tensor addr:0x3446f650, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.434.951 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op1 addr:0x34473110 type:48, kernel tensor addr:0x34472ea0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.016 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.108 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op2 addr:0x34473620 type:48, kernel tensor addr:0x344733b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.174 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.247 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op3 addr:0x34473b30 type:48, kernel tensor addr:0x344738c0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.259 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.328 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.337 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op4 addr:0x34474040 type:48, kernel tensor addr:0x34473dd0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.393 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.412 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.476 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.479 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op5 addr:0x34474550 type:48, kernel tensor addr:0x344742e0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.517 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.533 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.541 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.566 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.648 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Mul-op0 addr:0x34474a60 type:48, kernel tensor addr:0x344747f0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.696 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35} is thread safe. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.747 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.758 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op0 addr:0x34474f70 type:48, kernel tensor addr:0x34474d00, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 2)) [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.435.775 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.796 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.872 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op0 addr:0x344754f0 type:48, kernel tensor addr:0x34475310, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.900 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op0 addr:0x344759e0 type:48, kernel tensor addr:0x34475800, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.435.937 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.007 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op0 addr:0x34475e60 type:48, kernel tensor addr:0x34475bf0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.052 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.111 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op1 addr:0x34476370 type:48, kernel tensor addr:0x34476100, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.149 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.214 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op1 addr:0x344768f0 type:48, kernel tensor addr:0x34476710, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.240 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op1 addr:0x34476de0 type:48, kernel tensor addr:0x34476c00, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.276 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.336 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op1 addr:0x34477260 type:48, kernel tensor addr:0x34476ff0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.378 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.457 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op2 addr:0x34477770 type:48, kernel tensor addr:0x34477500, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.496 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} is thread safe. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.436.502 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.436.550 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.436.612 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.659 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op1 input kernel:Default/StridedSlice-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.687 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3446f8c0 origin ref count:2 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.436.682 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.436.723 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.731 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op2 input kernel:Default/StridedSlice-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.436.748 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.751 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34473110 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.798 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op4 input kernel:Default/StridedSlice-op3 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.817 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34473b30 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.851 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op5 input kernel:Default/StridedSlice-op4 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.870 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34474040 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.904 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op2 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.922 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34473620 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.950 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op5 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.436.969 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34474550 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.005 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op0 input kernel:Default/Mul-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.022 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34474a60 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.047 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op0 input kernel:Default/AllGather-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.063 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34474f70 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.088 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.105 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x344754f0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.125 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.142 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x344759e0 origin ref count:2 [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.437.147 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_0_erase_visit_attr in 316.75 us [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.167 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op1 input kernel:Default/Concat-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.184 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34475e60 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.209 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op1 input kernel:Default/AllGather-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.225 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34476370 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.248 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.265 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x344768f0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.285 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.308 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34476de0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.333 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op2 input kernel:Default/Concat-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.350 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x34477260 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.393 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[y] debug_name: @kernel_graph0:param_y use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.417 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[x] debug_name: @kernel_graph0:param_x use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.439 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1603] AddControlArrowForNoInputActor] Add control arrow for no input arrow actor: kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.460 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph0_SuperKernelActor@ [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.564 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:887] Transform] [PROF]GraphSchedulerLink costs 4.813 msec. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.437.574 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_1_deal_ref_output in 385.49 us [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.643 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 1_actor_set_kernel_graph_0_invalid_data_arrow_elimination in 1.26008 us [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:06.437.646 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:166] AclAfterCreateKernel] [PROF]AclAfterCreateKernel costs 0.847 msec. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.684 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 2_actor_set_kernel_graph_0_multi_actor_fusion in 16.78 us [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.705 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 3_actor_set_kernel_graph_0_batch_data_arrow_fusion in 0.92003 us [WARNING] DEVICE(187818,fffe15ffb0f0,python):2025-02-07-15:58:06.437.668 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-6853331267304275293 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.437.714 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:195] OptimizeACLGraphAfterCreateKernel] [PROF]OptimizeACLGraphAfterCreateKernel costs 0.933 msec. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.730 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:904] Transform] Graph(kernel_graph_0) transforms actor end. [INFO] DEVICE(187818,fffe15ffb0f0,python):2025-02-07-15:58:06.437.790 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-6853331267304275293 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.812 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:354] Init] kernel_graph_0 has the parameter input num: 2 [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.437.828 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.866 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1153] CompileGraphs] [PROF]GraphScheduler costs 6.52 msec. [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:06.437.872 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 645.015 msec. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.892 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:29] operator()] Create MultiStreamController. [WARNING] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:06.437.910 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-6853331267304275293 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.914 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:40] Refresh] Stream manager initialize, device_context : 0x1fe1be40, stream_size : 5. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.936 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:214] Resize] Task id on stream manager initialize : 0, stream_size : 5. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.437.962 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1166] CompileGraphs] [PROF]compile_backend_graph costs 1874.83 msec. [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.438.003 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1169] CompileGraphs] Status record: end compile function graph: 4_3_1___main___Net_construct_20, produce actor: kernel_graph_0 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.437.996 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:247] InitCommGroup] The MOC occupied by HCCL of graph: 4_3_1___main___Net_construct_20 is 1600 MB. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.438.033 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end task_emit action. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.035 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1102] OptimizeExecutionOrder] [PROF]OptimizeExecutionOrder costs 0.287 msec. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.438.057 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.058 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1072] CompileGraphs] [PROF]InitCommGroup costs 2297.51 msec. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.077 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1064] CompileGraphImpl] [PROF]CreateKernel costs 168.322 msec. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.438.093 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:281] SetLoopCount] Change vm_loop_flag to 0, set loop_size to 1 [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.091 [mindspore/ccsrc/distributed/collective/collective_manager.cc:833] WaitAllCommInitDone] All device communictor is initialized. You can launch communication operators after this step. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.116 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1076] CompileGraphs] [PROF]WaitAllCommInit costs 0.032 msec. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.438.125 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start execute action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.438.154 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end execute action. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.438.172 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.312 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1347] IsEnableControlFlowInline] Disable switch inline, executor mode:1 [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.310 [mindspore/ccsrc/backend/common/session/session_basic.cc:1152] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.341 [mindspore/ccsrc/debug/summary/summary.cc:52] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.348 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1180] DoUnifyMindIRPass] Do unify mindir pass for graph 4_3_1___main___Net_construct_20 [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.363 [mindspore/ccsrc/debug/summary/summary.cc:57] RecurseSetSummaryNodesForAllGraphs] This function should be skipped on GE backend. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.384 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_pm_0_erase_invalid_micro_depend in 3.55 us [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.386 [mindspore/ccsrc/debug/data_dump/dump_json_parser.cc:1207] UpdateNeedDumpKernels] Get kernel dump flag [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.456 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1183] PreprocessBeforeRun] Current Exec Order Algo in MS Context is bfs [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.490 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1046] DoStreamAssign] Status record: start stream assign, kernel_graph0 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.492 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:937] EnableKBKCompileCache] Disable backend compile cache by front config. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.525 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.582 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op1 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.627 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op2 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.658 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op3 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.700 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op4 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.741 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op5 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.743 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:620] BuildSymbolEngine] Status record: skip build symbol engine for function graph: 4_3_1___main___Net_construct_20 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.438.789 [mindspore/ccsrc/backend/graph_compiler/graph_partition.cc:866] Partition] GraphPartion Info: 4_3_1___main___Net_construct_20 inline mode:0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.793 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Mul-op0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.438.827 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op0 TotalTime = 1.96117, [21] [bootstrap]: 0.00130983 [type_inference]: 0.015726 [auto_monad]: 0.00024385 [graph_reusing]: 7.25699e-05 [inline]: 0.0134732, [2] [rewriter_before_opt_a]: 7.77501e-05 [a1a2]: 0.0132997, [2] [Cycle 1]: 0.00192293, [11] [expand_dump_flag]: 5.79003e-06 [switch_simplify]: 5.873e-05 [loop_unroll]: 2.832e-05 [a_1]: 0.00031792 [recompute_prepare]: 2.623e-05 [updatestate_depend_eliminate]: 1.103e-05 [updatestate_assign_eliminate]: 5.09003e-06 [updatestate_loads_eliminate]: 3.50992e-06 [parameter_eliminate]: 8.3599e-06 [a_2]: 0.00071532 [parallel_inline_pass]: 2.66499e-05 [Cycle 2]: 0.00139341, [11] [expand_dump_flag]: 2.16998e-06 [switch_simplify]: 2.495e-05 [loop_unroll]: 2.22101e-05 [a_1]: 0.000137 [recompute_prepare]: 2.28101e-05 [updatestate_depend_eliminate]: 4.58001e-06 [updatestate_assign_eliminate]: 3.95998e-06 [updatestate_loads_eliminate]: 3.35998e-06 [parameter_eliminate]: 2.42994e-06 [a_2]: 0.00068829 [parallel_inline_pass]: 2.515e-05 [parallel-infer-symbol]: 0.00013831 [pre_auto_parallel]: 0.00010657 [insert-virtual-dataset]: 0.0012468 [parallel-infer-symbol-second]: 5.86e-05 [dataset_repeat_opt]: 0.00035189 [pipeline_split]: 0.00012661 [optimize]: 0.0501146, [52] [py_interpret_to_execute]: 5.56699e-05 [rewriter_before_opt_a]: 7.654e-05 [opt_a]: 0.0391197, [3] [Cycle 1]: 0.0197613, [46] [expand_dump_flag]: 2.91003e-06 [switch_simplify]: 3.975e-05 [loop_unroll]: 2.699e-05 [a_1]: 0.00032018 [recompute_prepare]: 2.839e-05 [updatestate_depend_eliminate]: 9.72999e-06 [updatestate_assign_eliminate]: 6.4401e-06 [updatestate_loads_eliminate]: 5.62007e-06 [parameter_eliminate]: 5.5501e-06 [a_2]: 0.00075524 [accelerated_algorithm]: 2.728e-05 [shard]: 3.82899e-05 [meta_shard_fg_expand]: 4.34997e-06 [shard_inline]: 2.77701e-05 [auto_parallel]: 4.004e-05 [parallel]: 0.0104484 [flash_sp]: 4.82399e-05 [merge_comm]: 3.525e-05 [allreduce_fusion]: 2.78901e-05 [matmul_add_comm_reduction]: 3.529e-05 [allreduce_slice_to_reducescatter]: 7.40052e-07 [virtual_shard_identity]: 8.955e-05 [virtual_dataset]: 0.00012622 [get_grad_eliminate_]: 6.645e-05 [virtual_output]: 9.12501e-05 [merge_forward]: 3.047e-05 [cell_reuse_recompute_pass]: 7.49994e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00013682 [before_grad]: 9.353e-05 [inplace_validation]: 2.735e-05 [parallel_renormalize]: 0.00330372 [update_top_fg]: 1.44006e-06 [cast_eliminate]: 8.96e-05 [meta_fg_expand]: 2.845e-05 [inplace_validation_after_expand]: 4.45e-05 [flash_sp_send_recv_attached]: 5.813e-05 [receive_attached]: 2.005e-05 [after_resolve]: 8.148e-05 [a_after_grad]: 9.88001e-05 [special_op_eliminate]: 0.00013196 [renormalize]: 1.10012e-07 [add_forward_monad_depend]: 1.044e-05 [auto_monad_grad]: 3.71994e-06 [auto_monad_eliminator]: 6.70301e-05 [cse]: 0.00019085 [a_3]: 0.00069419 [Cycle 2]: 0.0113525, [46] [expand_dump_flag]: 3.99002e-06 [switch_simplify]: 6.957e-05 [loop_unroll]: 6.37099e-05 [a_1]: 0.00144389 [recompute_prepare]: 7.614e-05 [updatestate_depend_eliminate]: 3.804e-05 [updatestate_assign_eliminate]: 2.42899e-05 [updatestate_loads_eliminate]: 2.41001e-05 [parameter_eliminate]: 6.88003e-06 [a_2]: 0.00159423 [accelerated_algorithm]: 0.00015307 [shard]: 4.141e-05 [meta_shard_fg_expand]: 1.828e-05 [shard_inline]: 6.55099e-05 [auto_parallel]: 7.72e-05 [parallel]: 1.541e-05 [flash_sp]: 4.29399e-05 [merge_comm]: 3.47199e-05 [allreduce_fusion]: 2.66699e-05 [matmul_add_comm_reduction]: 2.87e-05 [allreduce_slice_to_reducescatter]: 7.20029e-07 [virtual_shard_identity]: 7.064e-05 [virtual_dataset]: 6.332e-05 [get_grad_eliminate_]: 6.022e-05 [virtual_output]: 5.989e-05 [merge_forward]: 3.00599e-05 [cell_reuse_recompute_pass]: 8.21007e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00014292 [before_grad]: 8.671e-05 [inplace_validation]: 2.723e-05 [parallel_renormalize]: 1.10012e-07 [update_top_fg]: 1.20001e-06 [cast_eliminate]: 6.54199e-05 [meta_fg_expand]: 2.867e-05 [inplace_validation_after_expand]: 3.92101e-05 [flash_sp_send_recv_attached]: 3.74008e-06 [receive_attached]: 2.88e-06 [after_resolve]: 0.00017915 [a_after_grad]: 8.76801e-05 [special_op_eliminate]: 6.377e-05 [renormalize]: 0.00292724 [add_forward_monad_depend]: 9.91009e-06 [auto_monad_grad]: 3.97e-06 [auto_monad_eliminator]: 5.977e-05 [cse]: 0.00017263 [a_3]: 0.00068345 [Cycle 3]: 0.0079363, [46] [expand_dump_flag]: 3.98001e-06 [switch_simplify]: 6.648e-05 [loop_unroll]: 6.209e-05 [a_1]: 0.00125123 [recompute_prepare]: 7.225e-05 [updatestate_depend_eliminate]: 4.18801e-05 [updatestate_assign_eliminate]: 2.475e-05 [updatestate_loads_eliminate]: 2.42699e-05 [parameter_eliminate]: 8.10006e-06 [a_2]: 0.00148467 [accelerated_algorithm]: 8.65e-05 [shard]: 4.16e-05 [meta_shard_fg_expand]: 2.296e-05 [shard_inline]: 6.627e-05 [auto_parallel]: 9.239e-05 [parallel]: 1.67199e-05 [flash_sp]: 2.91993e-06 [merge_comm]: 3.56999e-05 [allreduce_fusion]: 2.713e-05 [matmul_add_comm_reduction]: 3.655e-05 [allreduce_slice_to_reducescatter]: 7.10017e-07 [virtual_shard_identity]: 7.256e-05 [virtual_dataset]: 6.097e-05 [get_grad_eliminate_]: 6.263e-05 [virtual_output]: 6.11299e-05 [merge_forward]: 3.123e-05 [cell_reuse_recompute_pass]: 8.10996e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00015216 [before_grad]: 8.958e-05 [inplace_validation]: 2.84599e-05 [parallel_renormalize]: 1.20024e-07 [update_top_fg]: 1.05996e-06 [cast_eliminate]: 6.536e-05 [meta_fg_expand]: 3.16499e-05 [inplace_validation_after_expand]: 4.139e-05 [flash_sp_send_recv_attached]: 3.84008e-06 [receive_attached]: 2.35008e-06 [after_resolve]: 7.617e-05 [a_after_grad]: 9.07801e-05 [special_op_eliminate]: 6.166e-05 [renormalize]: 1.10012e-07 [add_forward_monad_depend]: 8.43999e-06 [auto_monad_grad]: 4.93007e-06 [auto_monad_eliminator]: 5.497e-05 [cse]: 0.00017731 [a_3]: 0.00072505 [py_interpret_to_execute_after_opt_a]: 0.00011015 [slice_cell_reuse_recomputed_activation]: 4.537e-05 [rewriter_after_opt_a]: 0.00039861 [convert_after_rewriter]: 8.185e-05 [order_py_execute_after_rewriter]: 8.468e-05 [opt_b]: 0.00308221, [1] [Cycle 1]: 0.00302137, [7] [b_1]: 0.00222383 [b_2]: 7.193e-05 [updatestate_depend_eliminate]: 5.158e-05 [updatestate_assign_eliminate]: 2.562e-05 [updatestate_loads_eliminate]: 2.42599e-05 [renormalize]: 1.14995e-06 [cse]: 0.00018923 [optimize_parallel_all_gather_comm]: 8.745e-05 [overlap_param_gather]: 4.301e-05 [cconv]: 0.00010778 [loop_unroll]: 0.00111053 [opt_after_cconv]: 0.00093474, [1] [Cycle 1]: 0.00088594, [7] [c_1]: 0.00023423 [parameter_eliminate]: 8.82009e-06 [updatestate_depend_eliminate]: 4.004e-05 [updatestate_assign_eliminate]: 2.54699e-05 [updatestate_loads_eliminate]: 2.353e-05 [cse]: 0.00016891 [renormalize]: 1.37999e-06 [remove_dup_value]: 0.00038325 [tuple_transform]: 0.0005054, [1] [Cycle 1]: 0.00045456, [2] [d_1]: 0.00032916 [renormalize]: 6.39935e-07 [partial_unused_args_eliminate]: 4.215e-05 [add_cache_embedding]: 0.00014952 [add_recomputation]: 0.00028985 [cse_after_recomputation]: 0.00023274, [1] [Cycle 1]: 0.00017972, [1] [cse]: 0.00011049 [environ_conv]: 0.00010227 [swap_dp_allreduce_reducescatter]: 7.14201e-05 [bias_add_comm_swap]: 4.32e-05 [label_micro_interleaved_index]: 4.011e-05 [label_fine_grained_interleaved_index]: 3.982e-05 [merge_cast_opt]: 3.625e-05 [slice_recompute_activation]: 7.125e-05 [micro_interleaved_order_control]: 3.75101e-05 [assign_add_opt]: 0.00025233 [ForceFp32Comm]: 3.706e-05 [remove_cast_before_assign_add]: 6.397e-05 [full_micro_interleaved_order_control]: 3.94899e-05 [reorder_send_recv_between_fp_bp]: 4.08001e-05 [comm_op_add_attrs]: 0.00011714 [add_comm_op_reuse_tag]: 0.00011355 [interleave_split_concat_branches]: 4.105e-05 [interleave_parallel_branches]: 3.806e-05 [overlap_opt_shard_in_pipeline]: 5.466e-05 [overlap_opt_shard_grad_in_pipeline]: 6.17499e-05 [control_data_broadcast_order]: 3.92101e-05 [grouped_pairwise_exchange_alltoall]: 5.59101e-05 [offloading_packed_experts]: 9.69601e-05 [overlap_recompute_and_grad_model_parallel]: 4.223e-05 [overlap_grad_matmul_and_grad_allreduce]: 3.71e-05 [overlap_recompute_allgather_and_fa_grad]: 5.62801e-05 [overlap_grad_ring_attention]: 8.677e-05 [overlap_grad_flash_sp]: 6.73101e-05 [begin_end_overlap_inline]: 4.955e-05 [split_matmul_comm_elemetwise]: 4.368e-05 [split_layernorm_comm]: 4.07001e-05 [handle_group_info]: 4.309e-05 [symbol_engine_optimizer]: 0.00072663, [1] [Cycle 1]: 0.00067557, [6] [build]: 5.61901e-05 [elim_shapecalc]: 7.409e-05 [elim_not_effective]: 8.395e-05 [opt_reshape]: 5.611e-05 [fold_const_symbol]: 7.333e-05 [renormalize]: 1.14995e-06 [pipeline_parallel_scheduler]: 6.29401e-05 [auto_monad_reorder]: 0.00016452 [get_jit_bprop_graph]: 5.605e-05 [rewriter_after_jit_bprop_graph]: 5.444e-05 [eliminate_special_op_node]: 0.00137826 [distribtued_split]: 0.00028175 [validate]: 0.00020954 [task_emit]: 1.87545 [execute]: 6.356e-05 Sums bootstrap : 0.001310s : 0.07% type_inference : 0.015726s : 0.81% auto_monad : 0.000244s : 0.01% graph_reusing : 0.000073s : 0.00% inline.rewriter_before_opt_a : 0.000078s : 0.00% inline.a1a2.expand_dump_flag : 0.000008s : 0.00% inline.a1a2.switch_simplify : 0.000084s : 0.00% inline.a1a2.loop_unroll : 0.000051s : 0.00% inline.a1a2.a_1 : 0.000455s : 0.02% inline.a1a2.recompute_prepare : 0.000049s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000016s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000009s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000007s : 0.00% inline.a1a2.parameter_eliminate : 0.000011s : 0.00% inline.a1a2.a_2 : 0.001404s : 0.07% inline.a1a2.parallel_inline_pass : 0.000052s : 0.00% parallel-infer-symbol : 0.000138s : 0.01% pre_auto_parallel : 0.000107s : 0.01% insert-virtual-dataset : 0.001247s : 0.06% parallel-infer-symbol-second : 0.000059s : 0.00% dataset_repeat_opt : 0.000352s : 0.02% pipeline_split : 0.000127s : 0.01% optimize.py_interpret_to_execute : 0.000056s : 0.00% optimize.rewriter_before_opt_a : 0.000077s : 0.00% optimize.opt_a.expand_dump_flag : 0.000011s : 0.00% optimize.opt_a.switch_simplify : 0.000176s : 0.01% optimize.opt_a.loop_unroll : 0.000153s : 0.01% optimize.opt_a.a_1 : 0.003015s : 0.16% optimize.opt_a.recompute_prepare : 0.000177s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000090s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000055s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000054s : 0.00% optimize.opt_a.parameter_eliminate : 0.000021s : 0.00% optimize.opt_a.a_2 : 0.003834s : 0.20% optimize.opt_a.accelerated_algorithm : 0.000267s : 0.01% optimize.opt_a.shard : 0.000121s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000046s : 0.00% optimize.opt_a.shard_inline : 0.000160s : 0.01% optimize.opt_a.auto_parallel : 0.000210s : 0.01% optimize.opt_a.parallel : 0.010481s : 0.54% optimize.opt_a.flash_sp : 0.000094s : 0.00% optimize.opt_a.merge_comm : 0.000106s : 0.01% optimize.opt_a.allreduce_fusion : 0.000082s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000101s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000233s : 0.01% optimize.opt_a.virtual_dataset : 0.000251s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000189s : 0.01% optimize.opt_a.virtual_output : 0.000212s : 0.01% optimize.opt_a.merge_forward : 0.000092s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000024s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000432s : 0.02% optimize.opt_a.before_grad : 0.000270s : 0.01% optimize.opt_a.inplace_validation : 0.000083s : 0.00% optimize.opt_a.parallel_renormalize : 0.003304s : 0.17% optimize.opt_a.update_top_fg : 0.000004s : 0.00% optimize.opt_a.cast_eliminate : 0.000220s : 0.01% optimize.opt_a.meta_fg_expand : 0.000089s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000125s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000066s : 0.00% optimize.opt_a.receive_attached : 0.000025s : 0.00% optimize.opt_a.after_resolve : 0.000337s : 0.02% optimize.opt_a.a_after_grad : 0.000277s : 0.01% optimize.opt_a.special_op_eliminate : 0.000257s : 0.01% optimize.opt_a.renormalize : 0.002927s : 0.15% optimize.opt_a.add_forward_monad_depend : 0.000029s : 0.00% optimize.opt_a.auto_monad_grad : 0.000013s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000182s : 0.01% optimize.opt_a.cse : 0.000541s : 0.03% optimize.opt_a.a_3 : 0.002103s : 0.11% optimize.py_interpret_to_execute_after_opt_a : 0.000110s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000045s : 0.00% optimize.rewriter_after_opt_a : 0.000399s : 0.02% optimize.convert_after_rewriter : 0.000082s : 0.00% optimize.order_py_execute_after_rewriter : 0.000085s : 0.00% optimize.opt_b.b_1 : 0.002224s : 0.11% optimize.opt_b.b_2 : 0.000072s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000052s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000026s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000024s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000189s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000087s : 0.00% optimize.overlap_param_gather : 0.000043s : 0.00% optimize.cconv : 0.000108s : 0.01% optimize.loop_unroll : 0.001111s : 0.06% optimize.opt_after_cconv.c_1 : 0.000234s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000009s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000040s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000025s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000024s : 0.00% optimize.opt_after_cconv.cse : 0.000169s : 0.01% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000383s : 0.02% optimize.tuple_transform.d_1 : 0.000329s : 0.02% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000042s : 0.00% optimize.add_cache_embedding : 0.000150s : 0.01% optimize.add_recomputation : 0.000290s : 0.01% optimize.cse_after_recomputation.cse : 0.000110s : 0.01% optimize.environ_conv : 0.000102s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000071s : 0.00% optimize.bias_add_comm_swap : 0.000043s : 0.00% optimize.label_micro_interleaved_index : 0.000040s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000040s : 0.00% optimize.merge_cast_opt : 0.000036s : 0.00% optimize.slice_recompute_activation : 0.000071s : 0.00% optimize.micro_interleaved_order_control : 0.000038s : 0.00% optimize.assign_add_opt : 0.000252s : 0.01% optimize.ForceFp32Comm : 0.000037s : 0.00% optimize.remove_cast_before_assign_add : 0.000064s : 0.00% optimize.full_micro_interleaved_order_control : 0.000039s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000041s : 0.00% optimize.comm_op_add_attrs : 0.000117s : 0.01% optimize.add_comm_op_reuse_tag : 0.000114s : 0.01% optimize.interleave_split_concat_branches : 0.000041s : 0.00% optimize.interleave_parallel_branches : 0.000038s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000055s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000062s : 0.00% optimize.control_data_broadcast_order : 0.000039s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000056s : 0.00% optimize.offloading_packed_experts : 0.000097s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000042s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000037s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000056s : 0.00% optimize.overlap_grad_ring_attention : 0.000087s : 0.00% optimize.overlap_grad_flash_sp : 0.000067s : 0.00% optimize.begin_end_overlap_inline : 0.000050s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000044s : 0.00% optimize.split_layernorm_comm : 0.000041s : 0.00% optimize.handle_group_info : 0.000043s : 0.00% optimize.symbol_engine_optimizer.build : 0.000056s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000074s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000084s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000056s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000073s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000063s : 0.00% auto_monad_reorder : 0.000165s : 0.01% get_jit_bprop_graph : 0.000056s : 0.00% rewriter_after_jit_bprop_graph : 0.000054s : 0.00% eliminate_special_op_node : 0.001378s : 0.07% distribtued_split : 0.000282s : 0.01% validate : 0.000210s : 0.01% task_emit : 1.875450s : 96.68% execute : 0.000064s : 0.00% [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.439.110 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1519] CompileGraph] Compile graph: 4_3_1___main___Net_construct_20, Split segments size: 2 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.439.191 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1555] CompileGraphFromSegment] Compile normal segment, the first node: @4_3_1___main___Net_construct_20:CNode_21{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} Time group info: ------[substitution.] 0.000730 352 14.61% : 0.000107s : 10: substitution.arithmetic_simplify 1.16% : 0.000009s : 21: substitution.elim_not_effective 3.66% : 0.000027s : 14: substitution.float_tuple_getitem_switch 0.95% : 0.000007s : 21: substitution.fold_const_symbol 3.45% : 0.000025s : 29: substitution.graph_param_transform 11.43% : 0.000083s : 1: substitution.inline 4.73% : 0.000035s : 66: substitution.j_node_and_user_rematch 6.73% : 0.000049s : 4: substitution.less_batch_normalization 2.05% : 0.000015s : 10: substitution.minmaximum_grad 5.62% : 0.000041s : 66: substitution.remove_not_recompute_node 1.65% : 0.000012s : 6: substitution.replace_old_param 9.80% : 0.000072s : 18: substitution.tuple_list_convert_item_index_to_positive 5.46% : 0.000040s : 18: substitution.tuple_list_get_item_const_eliminator 4.83% : 0.000035s : 18: substitution.tuple_list_get_item_depend_reorder 15.81% : 0.000115s : 30: substitution.tuple_list_get_item_eliminator 5.30% : 0.000039s : 18: substitution.tuple_list_get_set_item_eliminator 2.32% : 0.000017s : 1: substitution.virtual_dataset_eliminate 0.44% : 0.000003s : 1: substitution.virtual_output_eliminate ------[type_inference.] 0.015596 2 97.30% : 0.015175s : 1: type_inference.infer 2.70% : 0.000421s : 1: type_inference.specialize ------[replace.] 0.000115 5 13.06% : 0.000015s : 1: replace.inline 50.49% : 0.000058s : 2: replace.tuple_list_get_item_eliminator 25.37% : 0.000029s : 1: replace.virtual_dataset_eliminate 11.08% : 0.000013s : 1: replace.virtual_output_eliminate ------[match.] 0.000109 5 75.87% : 0.000082s : 1: match.inline 8.07% : 0.000009s : 2: match.tuple_list_get_item_eliminator 14.42% : 0.000016s : 1: match.virtual_dataset_eliminate 1.63% : 0.000002s : 1: match.virtual_output_eliminate ------[predicate.] 0.001510 11225 0.80% : 0.000012s : 100: predicate.accumulaten_eliminater 0.61% : 0.000009s : 29: predicate.ad_related_special_op_eliminate 0.88% : 0.000013s : 97: predicate.addn_check_dump 0.78% : 0.000012s : 100: predicate.addn_zero_filter 0.73% : 0.000011s : 100: predicate.adjust_all_reduce_mul_add 2.32% : 0.000035s : 197: predicate.arithmetic_simplify 2.19% : 0.000033s : 226: predicate.cast_eliminate 1.12% : 0.000017s : 126: predicate.check_bprop_eliminate 0.87% : 0.000013s : 97: predicate.compare_switch_simplify 0.24% : 0.000004s : 41: predicate.const_output_eliminate 0.31% : 0.000005s : 29: predicate.convert_tensor_all_eliminate 1.17% : 0.000018s : 102: predicate.convert_tensor_eliminate 0.95% : 0.000014s : 97: predicate.depend_value_elim 0.81% : 0.000012s : 100: predicate.dict_get_item_const_eliminator 0.84% : 0.000013s : 100: predicate.dict_get_item_eliminator 0.82% : 0.000012s : 100: predicate.dict_set_item_eliminator 0.20% : 0.000003s : 29: predicate.elim_not_effective 0.38% : 0.000006s : 29: predicate.elim_shapecalc_of_broadcastargs 1.10% : 0.000017s : 141: predicate.environ_add_const_eliminate 1.09% : 0.000016s : 141: predicate.environ_get_add_eliminate 1.11% : 0.000017s : 141: predicate.environ_get_depend_swap 2.08% : 0.000031s : 238: predicate.environ_get_eliminate 1.07% : 0.000016s : 141: predicate.environ_get_set_eliminate 0.76% : 0.000012s : 103: predicate.exchange_switch_depend_value 1.21% : 0.000018s : 103: predicate.float_depend_g_call 0.93% : 0.000014s : 97: predicate.float_environ_get_switch 1.25% : 0.000019s : 138: predicate.float_tuple_getitem_switch 0.15% : 0.000002s : 29: predicate.fold_const_symbol 1.14% : 0.000017s : 127: predicate.get_grad_eliminate 0.20% : 0.000003s : 29: predicate.graph_param_transform 0.85% : 0.000013s : 97: predicate.incorporate_call 0.86% : 0.000013s : 97: predicate.incorporate_call_switch 5.21% : 0.000079s : 479: predicate.inline 1.53% : 0.000023s : 126: predicate.inline_without_move 0.63% : 0.000010s : 126: predicate.j_node_and_user_rematch 1.07% : 0.000016s : 89: predicate.less_batch_normalization 1.50% : 0.000023s : 172: predicate.list_to_tuple_eliminator_ 2.16% : 0.000033s : 284: predicate.load_eliminater 0.80% : 0.000012s : 41: predicate.loop_unroll_after_grad 1.02% : 0.000015s : 105: predicate.loop_unroll_before_grad 1.59% : 0.000024s : 182: predicate.make_slice_get_slice_eliminator 0.90% : 0.000014s : 97: predicate.merge_addn 1.07% : 0.000016s : 126: predicate.micro_step_allgather_replace 1.09% : 0.000016s : 126: predicate.mini_step_allgather_replace 0.79% : 0.000012s : 100: predicate.minmaximum_grad 0.45% : 0.000007s : 29: predicate.mutable_eliminate 0.32% : 0.000005s : 29: predicate.opt_reshape 0.36% : 0.000005s : 41: predicate.parallel_virtual_node 1.10% : 0.000017s : 103: predicate.partial_defer_inline 1.21% : 0.000018s : 143: predicate.partial_eliminate 0.76% : 0.000012s : 100: predicate.print_const_string_wrapper 0.84% : 0.000013s : 97: predicate.reduce_all_const_elim 0.96% : 0.000015s : 100: predicate.reduce_eliminate 0.67% : 0.000010s : 126: predicate.remove_not_recompute_node 1.58% : 0.000024s : 228: predicate.replace_applicator 0.76% : 0.000011s : 126: predicate.replace_old_param 0.28% : 0.000004s : 41: predicate.reset_defer_inline 0.79% : 0.000012s : 100: predicate.reshape_eliminate 1.10% : 0.000017s : 126: predicate.row_tensor_add_zeros_like 0.41% : 0.000006s : 41: predicate.row_tensor_eliminate 1.34% : 0.000020s : 126: predicate.same_eliminate 0.62% : 0.000009s : 97: predicate.set_cell_output_no_recompute 1.29% : 0.000019s : 127: predicate.shard_identity_eliminate 1.69% : 0.000025s : 167: predicate.special_op_eliminate 1.05% : 0.000016s : 97: predicate.specialize_transform 1.22% : 0.000018s : 126: predicate.split_environ_get_set_with_tuple_value 1.18% : 0.000018s : 126: predicate.stack_unstack_eliminate 2.17% : 0.000033s : 284: predicate.stopgrad_eliminater 0.35% : 0.000005s : 41: predicate.switch_call_monad_eliminater 0.85% : 0.000013s : 103: predicate.switch_defer_inline 1.90% : 0.000029s : 229: predicate.switch_layer_defer_inline 3.09% : 0.000047s : 305: predicate.switch_simplify 0.77% : 0.000012s : 100: predicate.tile_eliminate 0.75% : 0.000011s : 100: predicate.transpose_eliminate 1.50% : 0.000023s : 170: predicate.tuple_list_convert_item_index_to_positive 1.52% : 0.000023s : 170: predicate.tuple_list_get_item_const_eliminator 1.42% : 0.000022s : 170: predicate.tuple_list_get_item_depend_reorder 2.71% : 0.000041s : 269: predicate.tuple_list_get_item_eliminator 1.39% : 0.000021s : 170: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000038s : 267: predicate.tuple_list_set_item_eliminator 1.50% : 0.000023s : 172: predicate.tuple_to_list_eliminator_ 2.11% : 0.000032s : 284: predicate.updatestate_pure_node_eliminater 3.15% : 0.000048s : 381: predicate.updatestate_useless_node_eliminater 0.40% : 0.000006s : 41: predicate.value_based_eliminate 1.18% : 0.000018s : 130: predicate.virtual_dataset_eliminate 1.14% : 0.000017s : 128: predicate.virtual_output_eliminate 0.40% : 0.000006s : 41: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000574 5 6.23% : 0.000036s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.77% : 0.000538s : 4: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 2.041103 283 0.00% : 0.000041s : 1: ForceFp32Comm 0.65% : 0.013307s : 1: a1a2 0.01% : 0.000157s : 1: add_cache_embedding 0.01% : 0.000122s : 1: add_comm_op_reuse_tag 0.01% : 0.000299s : 1: add_recomputation 0.01% : 0.000263s : 1: assign_add_opt 0.01% : 0.000256s : 1: auto_monad 0.01% : 0.000176s : 1: auto_monad_reorder 0.00% : 0.000055s : 1: begin_end_overlap_inline 0.00% : 0.000050s : 1: bias_add_comm_swap 0.07% : 0.001342s : 1: bootstrap 0.01% : 0.000114s : 1: cconv 0.01% : 0.000124s : 1: comm_op_add_attrs 0.00% : 0.000043s : 1: control_data_broadcast_order 0.00% : 0.000088s : 1: convert_after_rewriter 0.01% : 0.000238s : 1: cse_after_recomputation 0.02% : 0.000364s : 1: dataset_repeat_opt 0.01% : 0.000296s : 1: distribtued_split 0.07% : 0.001395s : 1: eliminate_special_op_node 0.01% : 0.000109s : 1: environ_conv 0.00% : 0.000073s : 1: execute 0.00% : 0.000046s : 1: full_micro_interleaved_order_control 0.00% : 0.000064s : 1: get_jit_bprop_graph 0.00% : 0.000081s : 1: graph_reusing 0.00% : 0.000060s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000048s : 1: handle_group_info 0.66% : 0.013489s : 1: inline 0.06% : 0.001267s : 1: insert-virtual-dataset 0.00% : 0.000043s : 1: interleave_parallel_branches 0.00% : 0.000047s : 1: interleave_split_concat_branches 0.00% : 0.000046s : 1: label_fine_grained_interleaved_index 0.00% : 0.000045s : 1: label_micro_interleaved_index 0.05% : 0.001122s : 1: loop_unroll 0.00% : 0.000041s : 1: merge_cast_opt 0.00% : 0.000042s : 1: micro_interleaved_order_control 0.01% : 0.000102s : 1: offloading_packed_experts 0.03% : 0.000654s : 44: opt.transform.a1a2 0.00% : 0.000075s : 1: opt.transform.loop_unroll_optimizer 0.38% : 0.007799s : 123: opt.transform.opt_a 0.01% : 0.000213s : 1: opt.transform.opt_after_cconv 0.05% : 0.001064s : 27: opt.transform.opt_b 0.01% : 0.000304s : 1: opt.transform.opt_trans_graph 0.01% : 0.000125s : 3: opt.transform.special_op_eliminate 0.01% : 0.000200s : 4: opt.transform.symbol_engine_opt 1.92% : 0.039128s : 1: opt_a 0.05% : 0.000942s : 1: opt_after_cconv 0.15% : 0.003089s : 1: opt_b 2.46% : 0.050131s : 1: optimize 0.00% : 0.000095s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000091s : 1: order_py_execute_after_rewriter 0.00% : 0.000072s : 1: overlap_grad_flash_sp 0.00% : 0.000042s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000095s : 1: overlap_grad_ring_attention 0.00% : 0.000067s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000059s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000049s : 1: overlap_param_gather 0.00% : 0.000061s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000046s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000150s : 1: parallel-infer-symbol 0.00% : 0.000068s : 1: parallel-infer-symbol-second 0.00% : 0.000047s : 1: partial_unused_args_eliminate 0.00% : 0.000072s : 1: pipeline_parallel_scheduler 0.01% : 0.000135s : 1: pipeline_split 0.01% : 0.000116s : 1: pre_auto_parallel 0.00% : 0.000062s : 1: py_interpret_to_execute 0.01% : 0.000119s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000069s : 1: remove_cast_before_assign_add 0.02% : 0.000394s : 1: remove_dup_value 0.20% : 0.004090s : 2: renormalize.infer 0.10% : 0.002109s : 2: renormalize.specialize 0.00% : 0.000046s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000061s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000409s : 1: rewriter_after_opt_a 0.01% : 0.000167s : 2: rewriter_before_opt_a 0.00% : 0.000050s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000076s : 1: slice_recompute_activation 0.00% : 0.000045s : 1: split_layernorm_comm 0.00% : 0.000050s : 1: split_matmul_comm_elemetwise 0.00% : 0.000078s : 1: swap_dp_allreduce_reducescatter 0.04% : 0.000732s : 1: symbol_engine_optimizer 91.89% : 1.875493s : 1: task_emit 0.03% : 0.000511s : 1: tuple_transform 0.77% : 0.015746s : 1: type_inference 0.02% : 0.000354s : 1: validate [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.439.385 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1785] Run] End [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.439.431 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:629] CompileGraph] Status record: start compile graph. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.439.479 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:2227] ConstructKernelGraph] Create graph: 0 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.439.480 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:239] SavePassesConfig] Running_passes: ['a1a2.r1.a_1', 'a1a2.r1.a_1.inline', 'opt_a.r1.auto_parallel', 'opt_a.r1.flash_sp', 'opt_a.r1.flash_sp_send_recv_attached', 'opt_a.r1.parallel', 'opt_a.r1.parallel_renormalize', 'opt_a.r1.receive_attached', 'opt_a.r1.virtual_dataset', 'opt_a.r1.virtual_dataset.virtual_dataset_eliminate', 'opt_a.r1.virtual_output', 'opt_a.r1.virtual_output.virtual_output_eliminate', 'opt_a.r2.a_1', 'opt_a.r2.a_1.tuple_list_get_item_eliminator', 'opt_a.r2.accelerated_algorithm', 'opt_a.r2.accelerated_algorithm.less_batch_normalization', 'opt_a.r2.auto_parallel', 'opt_a.r2.flash_sp', 'opt_a.r2.renormalize', 'opt_a.r3.auto_parallel'] [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.439.529 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1047] SaveCompiledGraph] Save compiled func graph(4_3_1___main___Net_construct_20) phase(train.1738915084320481280.281470585332944.0..)! [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.439.566 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1065] SaveCompiledGraph] End save compiled func graph! [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.439.612 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1326] CompileInner] [PROF]ParallelPostProcess costs 0.011 msec. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.439.634 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1154] CleanCompileRes] Clean compile resource start [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.440.046 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 2 for node Default/AllGather-op0, group: 2-6541264347459079684 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.440.104 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.440.156 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op2 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.440.190 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op1 [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.440.485 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:3487] ConstructOutput] Output:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.440.792 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.440.940 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:637] CompileGraph] [PROF]ConstructKernelGraph costs 1.473 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.072 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: kernel_graph0 [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.220 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:449] RecursiveSetRunMode] Kernel graph: kernel_graph0, set run mode:KernelMode [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.441.362 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 3 for node Default/AllGather-op1, group: 2-16057586909177180503 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.381 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:191] EliminateIllegalDataTypePass] Start eliminate illegal data type for kernel graph id:0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.441.415 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op1 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.451 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_0_convert_list_to_tuple in 22.57 us [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.441.464 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op3 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.441.497 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.659 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_1_eliminate_func_type in 174.78 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.705 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:204] EliminateIllegalDataTypePass] [PROF]EliminateIllegalDataTypePass costs 0.32 msec. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.441.704 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1175] CleanCompileRes] Clean compile resource end [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.736 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:151] CommonUnifyMindIR] start common unify mindir opt graph:0 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.441.747 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] End compiling 'Net.construct'. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.764 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: conv_transpose_to_conv_backprop_input [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.441.779 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1334] CompileInner] [PROF]CleanCompileRes costs 2.137 msec. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.441.798 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1335] CompileInner] Finish compiling. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:06.441.819 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1336] CompileInner] [PROF]compile_graph costs 2117.85 msec. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.901 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_0_conv_transpose_to_conv_backprop_input in 132.1 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.926 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: custom_op_reg_info_to_attr [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.963 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_1_custom_op_reg_info_to_attr in 35.19 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.441.985 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Custom not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.015 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_2_inplace_assign_for_custom_op in 28.86 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.037 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_attr_to_unify_mindir [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.176 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_3_convert_attr_to_unify_mindir in 134.58 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.218 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:164] CommonUnifyMindIR] [PROF]CommonUnifyMindIR costs 0.478 msec. [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.442.220 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_x, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.255 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:81] BackendCommonOptimization] Status record: start common optimization. graph id: 0 [INFO] PARALLEL(187789,ffffaa419c10,python):2025-02-07-15:58:06.442.274 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_y, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.298 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_dynamic_broadcast_to [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.410 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_0_convert_dynamic_broadcast_to in 108.44 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.578 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_1_convert_const_input_to_attr in 139.63 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.710 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_2_custom_op_const_input_to_attr in 103.77 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.442.829 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_3_convert_const_input_to_tensor_input_for_print in 92.42 us [INFO] UTILS(187789,ffffaa419c10,python):2025-02-07-15:58:06.442.827 [mindspore/ccsrc/utils/dynamic_obfuscation/registry_opaque_predicate.cc:112] init_calling_count] calling_count_ has been initialized to 0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.442.832 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 4 for node Default/AllGather-op2, group: 2-12944936785892925600 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.442.950 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1893] RunGraph] Status record: start run actor: kernel_graph_0 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.008 [mindspore/ccsrc/runtime/device/pre_launch_comm.cc:200] PreLaunchCommKernel] No hccl kernel to pre launch [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.039 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1918] RunGraph] [PROF]PreLaunchCommKernel costs 0.042 msec. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.084 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:966] SpawnMultiPipelineActor] Enable runtime asynchronously launch kernel, default actor thread num 5, current actor thread num: 5 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.123 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.181 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.216 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:394] operator()] Init defrag memory step freq. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.236 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:396] operator()] Config defrag memory step freq : . [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.443.252 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_4_convert_tuple_output_to_maketuple in 393.17 us [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.253 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:405] operator()] Defrag memory step freq : 100. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.443.287 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_5_convert_unused_tuple_para_to_make_tuple in 3.64 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.443.310 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_6_flatten_concat_fission is enabled. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.312 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:638] PrepareDataForDeviceTensorStore] Prepare store data, input tensor size: 0, arg size: 2 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.334 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:835] AllocGEFixMemory] Start AllocGEFixMemory [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.366 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:652] PrepareDataForDeviceTensorStore] prepare data for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.404 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-0, debug name:ValueNode (2, 2, 2), front node:ValueNode (2, 2, 2) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.432 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 2) front node:ValueNode (2, 2, 2) [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.443.433 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_6_flatten_concat_fission in 98.98 us [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.454 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3445c660 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.475 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 2) [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:06.443.521 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.443.553 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_7_inset_input_structural_for_py_execute in 91.25 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.443.578 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_8_broadcast_to_fusion is enabled. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.443.688 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_8_broadcast_to_fusion in 85.8 us [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.443.808 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1062] DoStreamAssign] Status record: end stream assign, kernel_graph0 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.443.890 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.443.998 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.444.031 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_9_add_attr_to_node in 312.6 us [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.044 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.444.059 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_10_replace_addn is enabled. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.076 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 2, record_stream_id_ : 0. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.114 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.120 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-1, debug name:ValueNode (2, 2, 4), front node:ValueNode (2, 2, 4) for graph:kernel_graph0 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.139 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:3 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.160 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 4) front node:ValueNode (2, 2, 4) [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.173 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.181 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3446fe10 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.444.179 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_10_replace_addn in 94.56 us [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.202 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 4) [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.201 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 3, record_stream_id_ : 0. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.236 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.444.236 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:93] BackendCommonOptimization] [PROF]BackendCommonOptimization costs 1.976 msec. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.444.260 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:100] BackendCommonOptimization] Status record: end common optimization. graph id: 0 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.261 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:4 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.296 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.444.310 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:258] OptimizationWithoutBackend] [PROF]OptimizationWithoutBackend costs 2.928 msec. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.320 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 4, record_stream_id_ : 0. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.355 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.379 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:5 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.412 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.436 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 5, record_stream_id_ : 0. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.438 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-2, debug name:ValueNode 2, front node:ValueNode 2 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.468 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 2 front node:ValueNode 2 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.470 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.488 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34470260 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.497 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:0 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.507 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 2 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.530 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.556 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 0, record_stream_id_ : 2. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.588 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.611 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:6 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.683 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.710 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 6, record_stream_id_ : 0. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.444.706 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_0_renorm_split in 129.53 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.444.739 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: reduce_axis_update [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.744 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.770 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:1 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.801 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.799 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-3, debug name:ValueNode (1, 1, 1), front node:ValueNode (1, 1, 1) for graph:kernel_graph0 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.827 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 1, record_stream_id_ : 3. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.837 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (1, 1, 1) front node:ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.858 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34470a50 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.860 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.444.879 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (1, 1, 1) [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.884 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:7 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.914 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.938 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 7, record_stream_id_ : 0. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.966 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.444.988 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:8 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.004 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_1_reduce_axis_update in 260.31 us [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.024 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.032 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission is enabled. [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.051 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 8, record_stream_id_ : 2. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.059 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim ClipByNorm not exist in name to cnode [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.082 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.083 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission in 27.95 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.107 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: space_to_batch_nd_attr_update [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.107 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:9 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.110 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-4, debug name:ValueNode (2, 0, 0), front node:ValueNode (2, 0, 0) for graph:kernel_graph0 [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.139 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.144 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 0, 0) front node:ValueNode (2, 0, 0) [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.150 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_3_space_to_batch_nd_attr_update in 40.25 us [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.162 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 9, record_stream_id_ : 3. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.165 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34471240 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.175 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: batch_to_space_nd_attr_update [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.188 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 0, 0) [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.194 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.210 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_4_batch_to_space_nd_attr_update in 32.23 us [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.219 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:10 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.245 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdamWeightDecay not exist in name to cnode [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.249 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.268 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_5_adam_weight_decay_unify_mindir in 33.35 us [INFO] KERNEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.275 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 10, record_stream_id_ : 4. [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.306 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:931] PrintGraphExecuteOrder] Graph 0 execution order: [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.315 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_6_add_depend_for_adamw in 23.75 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.340 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_7_cdist_fission is enabled. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.360 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Cdist not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.379 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_7_cdist_fission in 19.17 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.408 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[0], node name[Default/StreamSend-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_24{[0]: ValueNode StreamSend}], event id[2] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.398 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission is enabled. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.421 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-5, debug name:ValueNode (4, 4, 4), front node:ValueNode (4, 4, 4) for graph:kernel_graph0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.431 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim CdistGrad not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.450 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[1], node name[Default/StreamRecv-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_25{[0]: ValueNode StreamRecv}], event id[2] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.454 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (4, 4, 4) front node:ValueNode (4, 4, 4) [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.452 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission in 20.01 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.473 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.474 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34471a30 [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.486 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[2], node name[Default/StreamSend-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_26{[0]: ValueNode StreamSend}], event id[3] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.496 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (4, 4, 4) [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.498 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion in 5.8 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.519 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion is enabled. [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.521 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[3], node name[Default/StreamRecv-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_27{[0]: ValueNode StreamRecv}], event id[3] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.542 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion in 3.22 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.554 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[4], node name[Default/StreamSend-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_28{[0]: ValueNode StreamSend}], event id[4] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.567 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.585 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[5], node name[Default/StreamRecv-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_29{[0]: ValueNode StreamRecv}], event id[4] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.588 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_11_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir in 23.43 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.609 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.627 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_12_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2 in 17.12 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.652 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[6], node name[Default/StridedSlice-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_30{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_x, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.674 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.697 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_13_sparse_softmax_cross_entropy_with_logits_unify_mindir in 49.14 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.711 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[7], node name[Default/StridedSlice-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.715 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-6, debug name:ValueNode 1, front node:ValueNode 1 for graph:kernel_graph0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.736 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutExt not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.744 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 1 front node:ValueNode 1 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.757 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_14_dropout_ext_unify_mindir1 in 38.94 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.763 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[8], node name[Default/StridedSlice-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.763 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34471f10 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.779 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutGradExt not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.445.794 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 1 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.798 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_15_dropoutgrad_ext_unify_mindir in 18.48 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.818 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Dropout not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.818 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[9], node name[Default/StridedSlice-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_33{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.839 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_16_dropout_unify_mindir1 in 19.47 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.859 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: dropoutgrad_unify_mindir [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.879 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[10], node name[Default/StridedSlice-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.909 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_17_dropoutgrad_unify_mindir in 46.45 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.929 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[11], node name[Default/StridedSlice-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.934 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchange not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.956 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_18_neighbor_exchange_unify_mindir in 22.08 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.445.973 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[12], node name[Default/Mul-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35}] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.976 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2 not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.445.997 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_19_neighbor_exchange_v2_unify_mindir in 19.81 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.007 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[13], node name[Default/StreamSend-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_37{[0]: ValueNode StreamSend}], event id[5] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.015 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2Grad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.022 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-7, debug name:ValueNode (0, 0, 0), front node:ValueNode (0, 0, 0) for graph:kernel_graph0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.035 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_20_neighbor_exchange_v2_grad_unify_mindir in 19.13 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.040 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[14], node name[Default/StreamRecv-op3], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_38{[0]: ValueNode StreamRecv}], event id[5] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.054 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAll not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.056 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 0) front node:ValueNode (0, 0, 0) [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.076 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_21_all_to_all_unify_mindir in 19.79 us [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.077 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34472720 [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.081 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[15], node name[Default/AllGather-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36}], group[2-6541264347459079684] [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.098 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 0) [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.096 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAllV not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.114 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[16], node name[Default/StreamSend-op4], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_40{[0]: ValueNode StreamSend}], event id[0] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.117 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_22_all_to_all_v_unify_mindir in 20.12 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.145 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[17], node name[Default/StreamRecv-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_41{[0]: ValueNode StreamRecv}], event id[0] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.161 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.183 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[18], node name[Default/Split-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.184 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_23_bn_split in 44.49 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.206 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: bn_grad_unify_mindir [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.227 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[19], node name[Default/Concat-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2}] [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.267 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[20], node name[Default/StreamSend-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_46{[0]: ValueNode StreamSend}], event id[6] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.267 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_24_bn_grad_unify_mindir in 58.59 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.299 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[21], node name[Default/StreamRecv-op5], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_47{[0]: ValueNode StreamRecv}], event id[6] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.300 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.320 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-8, debug name:ValueNode 0, front node:ValueNode 0 for graph:kernel_graph0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.324 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_25_bn_grad_split in 30.54 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.336 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[22], node name[Default/AllGather-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43}], group[2-16057586909177180503] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.346 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.348 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 0 front node:ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.367 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x34472c00 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.366 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_26_batchnormgrad_to_bninfergrad in 20.17 us [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.386 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 0 [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.367 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[23], node name[Default/StreamSend-op6], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_49{[0]: ValueNode StreamSend}], event id[1] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.387 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission is enabled. [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.400 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[24], node name[Default/StreamRecv-op6], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_50{[0]: ValueNode StreamRecv}], event id[1] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.416 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.437 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[25], node name[Default/Split-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.438 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission in 19.99 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.460 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.480 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_28_batchnorm_to_bninfer in 18.94 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.480 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[26], node name[Default/Concat-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1}] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.500 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge is enabled. [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.514 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[27], node name[Default/StreamSend-op7], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_55{[0]: ValueNode StreamSend}], event id[7] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.523 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Lamb not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.543 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge in 22.49 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.544 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[28], node name[Default/StreamRecv-op7], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_56{[0]: ValueNode StreamRecv}], event id[7] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.565 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Print not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.581 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[29], node name[Default/AllGather-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}], group[2-12944936785892925600] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.588 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_30_print_insert_placeholder_for_tensor_name in 21.62 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.610 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim GetNext not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.613 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[30], node name[Default/StreamSend-op8], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_58{[0]: ValueNode StreamSend}], event id[8] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.631 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_31_getnext_for_ge in 21.6 us [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.627 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_y front node:@4_3_1___main___Net_construct_20:param_y backend is weight:0 front is weight:0 [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.644 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[31], node name[Default/StreamRecv-op8], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_59{[0]: ValueNode StreamRecv}], event id[8] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.653 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNorm not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:06.446.672 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_x front node:@4_3_1___main___Net_construct_20:param_x backend is weight:0 front is weight:0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.675 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_32_sync_bn_split in 22.91 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.683 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[32], node name[Default/StreamSend-op9], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_60{[0]: ValueNode StreamSend}], event id[9] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.696 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNormGrad not exist in name to cnode [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.715 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[33], node name[Default/StreamRecv-op9], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_61{[0]: ValueNode StreamRecv}], event id[9] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.717 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_33_sync_bn_grad_split in 19.99 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.737 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion is enabled. [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.747 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[34], node name[Default/StreamSend-op10], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_62{[0]: ValueNode StreamSend}], event id[10] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.758 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdaptiveMaxPool2D not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187789,fffe977fe0f0,python):2025-02-07-15:58:06.446.763 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:0, sequential num:2001075757 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.777 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion in 19.71 us [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.778 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[35], node name[Default/StreamRecv-op10], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_63{[0]: ValueNode StreamRecv}], event id[10] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.822 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AvgPoolGrad not exist in name to cnode [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.822 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1115] CompileGraphImpl] [PROF]PreprocessBeforeRun costs 8.388 msec. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.845 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_35_avg_pool_grad_for_ge in 46.29 us [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.446.822 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph0_SuperKernelActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:1, sequential num:2001075757 [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.446.871 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1161] CreateDeviceAddress] Status record: start create device address. graph id: 0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.867 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion is enabled. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.897 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.918 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion in 20.9 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.938 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion is enabled. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.957 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.446.977 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion in 19.26 us [INFO] GE_ADPT(187789,fffe97fff0f0,python):2025-02-07-15:58:06.447.087 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.447.237 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_38_add_attr_to_dump in 234.32 us [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.447.462 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 6_Default/StridedSlice-op0, front node: @4_3_1___main___Net_construct_20:param_x, with index: 0, addr index: 0, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.447.524 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 6_Default/StridedSlice-op0, outer index: 0, inner index:0, front node: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.447.593 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 0, inner index: 0, dynamic is 0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.447.616 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_39_ascend_mindir_op_adapter in 344.84 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.447.645 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 is enabled. [INFO] GE_ADPT(187789,fffe9cff90f0,python):2025-02-07-15:58:06.447.632 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.447.670 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:263] DefineFlashAttentionPattern] Do FlashAttentionPattern V1. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.447.947 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 in 275.08 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.447.972 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 is enabled. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.447.996 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:377] DefineFlashAttentionPattern] Do FlashAttentionPattern V2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.053 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 0_Default/StreamSend-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.091 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op0 [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.110 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1168] CreateDeviceAddress] Status record: end create device address. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.156 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1123] CompileGraphImpl] [PROF]CreateDeviceAddress costs 1.274 msec. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.099 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 6_Default/StridedSlice-op0, input index: 0, device tensor: 0x3446efa0, ptr: 0x12c7fd801200, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.218 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor6_Default/StridedSlice-op0, actor input: 0, graph input: 1, device tensor: 0x3446efa0, ptr: 0x12c7fd801200, ref cnt: 1 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.448.268 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 in 269.14 us [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.292 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 9_Default/StridedSlice-op3, front node: @4_3_1___main___Net_construct_20:param_y, with index: 0, addr index: 1, device type: 2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.448.294 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion is enabled. [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.299 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1177] CacheGraphOutputToFrontNodeWithIndex] Get graph backend output nodes. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.327 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 9_Default/StridedSlice-op3, outer index: 1, inner index:0, front node: @4_3_1___main___Net_construct_20:param_y [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.343 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1185] CacheGraphOutputToFrontNodeWithIndex] Get graph front output nodes. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.365 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op0 [INFO] SESSION(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.424 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1203] CacheGraphOutputToFrontNodeWithIndex] Backend output: Default/AllGather-op2 debug string: @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} node ptr:0x47e50f60 with index: 0 map to front node: Default/AllGather-op2 debug string: @4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} node ptr: 0x47e03a50 with index: 0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.367 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 1, inner index: 0, dynamic is 0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.467 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 2, actor name : 1_Default/StreamRecv-op0, task_id_on_stream : 1. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.477 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:766] CompileGraph] Status record: end compile graph. graph id: 0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.488 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.510 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.448.521 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion in 200.69 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.448.547 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce is enabled. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.702 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 9_Default/StridedSlice-op3, input index: 0, device tensor: 0x3446ebd0, ptr: 0x12c7fd801600, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.711 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op0 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.728 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1605] CompileGraphFromSegment] Compile cut segment, the cut node: @4_3_1___main___Net_construct_20:ValueNode_64{[0]: ValueNode Return, [1]: CNode_22} [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.780 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1129] CompileGraphs] [PROF]CompileSubGraph costs 202.602 msec. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.448.777 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce in 204.25 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.448.806 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm is enabled. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.812 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:976] ExportCompileCacheKBK] Compile cache: disable by front compile cache config. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.733 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor9_Default/StridedSlice-op3, actor input: 0, graph input: 0, device tensor: 0x3446ebd0, ptr: 0x12c7fd801600, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.830 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 2_Default/StreamSend-op1, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.849 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op1 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.881 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1143] CompileGraphs] Status record: construct the graph compiler info. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.919 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph0_SuperKernelActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.448.936 [mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc:1001] Parse] Control node parser is not inited. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe977fe0f0,python):2025-02-07-15:58:06.448.933 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:317] RunOpData] Actor(kernel_graph_0_OutputActor) receive the input op data and output position:0 device tensor:0x34477770 ptr:0 ref count:18446744073709551615 origin ref count:18446744073709551615 dynamic ref count:2147483647 from memory pool:0 output node:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} index:0 [INFO] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.960 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:57] IncreaseLoopCount] Loop count actor(kernel_graph_0_LoopCountActor) running, loop count: 1, current count: 1, total running count: 1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:06.448.981 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.448.986 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe977fe0f0,python):2025-02-07-15:58:06.448.999 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:388] CreateOutputTensor] Create output tensor, output node: Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}, output index: 0, output position: 0, output kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.000 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:859] Transform] Graph(kernel_graph_0) transforms actor begin, strategy:pipeline [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.448.991 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm in 157.14 us [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.025 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 3, actor name : 3_Default/StreamRecv-op1, task_id_on_stream : 1. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.030 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.045 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.064 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe977fe0f0,python):2025-02-07-15:58:06.449.075 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:470] CreateOutputTensor] Create device tensor:0xfffe740095a0, size: 512 type:48 output node:Default/AllGather-op2 output index:0 output position:0, origin output device tensor: 0x34477770 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.168 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op1 [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.174 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:480] InitGraphParameterStore] Init graph parameter store: kernel_graph_0, outer size: 2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.177 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion in 122.48 us [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.191 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 4_Default/StreamSend-op2, task_id_on_stream : 3. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.202 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.208 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.218 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 0, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.252 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 1, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.297 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496c0f20 for node:ValueNode 2 node addr:0x47e0a8d0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.338 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496c0f20 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.348 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.346 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion in 120.42 us [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.372 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 4, actor name : 5_Default/StreamRecv-op2, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.369 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496d4550 for node:ValueNode (0, 0, 2) node addr:0x47e0bb80 device type:2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.373 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.390 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.390 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d4550 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.408 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.418 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496d4bd0 for node:ValueNode (4, 4, 4) node addr:0x47e0e390 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.436 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d4bd0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.460 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496d5090 for node:ValueNode 0 node addr:0x47e0a7a0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.478 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d5090 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.497 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.496 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion in 98.7 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.503 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496d58a0 for node:ValueNode (2, 2, 4) node addr:0x47e0bcb0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.523 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d58a0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.521 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.539 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 6_Default/StridedSlice-op0, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.547 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496d5d80 for node:ValueNode 1 node addr:0x47e0a110 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:06.449.558 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.565 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d5d80 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.602 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496d6550 for node:ValueNode (2, 0, 0) node addr:0x47e0e0a0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.623 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d6550 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.648 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496d6d40 for node:ValueNode (0, 0, 0) node addr:0x47e0ccf0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.665 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d6d40 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.689 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x496d7530 for node:ValueNode (1, 1, 1) node addr:0x47e0c000 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.707 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d7530 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.765 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion in 214.77 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.449.789 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.856 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:442] ChangeGraphMode] Enable kbk subgraph execute and set run mode for graph: 0 to GraphMode. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.878 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:462] TryEnableKbkSubGraphExecMode] Enable kbk subgraph execute mode for actor set: kernel_graph_0 [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.940 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:391] TryEnableInputOptimize] Enable input optimize for actor set: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.449.988 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_y for host data source actor. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.045 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion in 231.3 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.049 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_y for front node:@4_3_1___main___Net_construct_20:param_y index:0 position:1 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.070 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.079 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_x for host data source actor. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.098 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion in 5.41 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.115 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_x for front node:@4_3_1___main___Net_construct_20:param_x index:0 position:0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.121 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion is enabled. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.157 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2268] BuildDataPrepareActorForGraphParameterStore] Create data prepare actor: kernel_graph_0_DataPrepareActor [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.270 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion in 123.13 us [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.273 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2224] BuildLoopCountActor] Create loop count actor: kernel_graph_0_LoopCountActor [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.294 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_12_shape_reshape is enabled. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.305 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2257] BuildOutputActor] Create output actor: kernel_graph_0_OutputActor [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.409 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1509] CacheGraphOutputToActor] Cache graph 0 output node:Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} with index:0 to actor:kernel_graph0_SuperKernelActor, from front node:Default/AllGather-op2 debug string:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} with index:0 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.413 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_12_shape_reshape in 94.67 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.438 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.601 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.616 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion in 154.4 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.628 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d3490 origin ref count:2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.642 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.663 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.685 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d3860 origin ref count:2 [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.782 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1711] Link] [PROF]GraphSchedulerLinkSinkMode costs 0.23 msec. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.807 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion in 130.65 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.816 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph_0_LoopCountActor@ [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.831 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.841 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph0_SuperKernelActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.861 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_LoopCountActor@ to actor:kernel_graph_0_OutputActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.882 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_OutputActor@ to actor:kernel_graph_0_DataPrepareActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.901 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:3713] LinkControlArrowForCopyActor] Link control arrow for copy actor start, copy actor size:0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.935 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496dc080 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.965 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion in 108.57 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.450.972 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:556] AddResultArrow] Add result arrow from actor:kernel_graph0_SuperKernelActor to actor:kernel_graph_0_OutputActor@ from kernel@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} device address:0x496dc080 original ref count:18446744073709551615 ref count:18446744073709551615 dynamic ref count:2147483647 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.450.989 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion is enabled. [INFO] GE_ADPT(187789,fffe9cff90f0,python):2025-02-07-15:58:06.451.030 [mindspore/ccsrc/transform/acl_ir/acl_allocator.cc:104] RegisterAllocator] Register AclAllocator [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.064 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.451.107 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion in 93.86 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.114 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.451.131 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.165 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.206 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.244 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.451.270 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion in 115.72 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.281 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.451.293 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.521 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.569 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.451.585 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion in 266.47 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.451.614 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.635 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.679 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.758 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.805 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.451.861 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion in 222.2 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.862 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.451.888 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.908 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.451.987 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.029 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.087 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.452.130 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion in 216.2 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.137 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.452.156 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops is enabled. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.174 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.216 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.253 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.298 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.327 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 4, send_actor : 0x47e56ea0, recv_actor : 0x47e57900. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.347 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 8, send_actor : 0x496ea670, recv_actor : 0x496eae80. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.364 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 3, send_actor : 0x47e35ad0, recv_actor : 0x47e565d0. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.452.375 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops in 193.25 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.380 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 11, send_actor : 0x496ee620, recv_actor : 0x496ef060. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.398 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 10, send_actor : 0x496ed1a0, recv_actor : 0x496edbe0. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.416 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 1, send_actor : 0x496e2dd0, recv_actor : 0x496e3710. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.452.423 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:318] GEUnifyMindIR] [PROF]GEUnifyMindIR costs 8.053 msec. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.434 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 5, send_actor : 0x47e58340, recv_actor : 0x47e58d80. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.452 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 6, send_actor : 0x496e0f70, recv_actor : 0x496e1940. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.470 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 7, send_actor : 0x496e5420, recv_actor : 0x496e5d60. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.487 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 9, send_actor : 0x496ebea0, recv_actor : 0x496ec760. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.504 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 2, send_actor : 0x496e7050, recv_actor : 0x496e7eb0. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.807 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op0 addr:0x496d4160 type:48, kernel tensor addr:0x496d3ef0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.452.911 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.925 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op1 addr:0x496d7a20 type:48, kernel tensor addr:0x496d77b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.452.992 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.453.084 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:38] MarkRefGraph] Mark graph is ref graph: 0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.104 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op2 addr:0x496d7f30 type:48, kernel tensor addr:0x496d7cc0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.167 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.251 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op3 addr:0x496d8440 type:48, kernel tensor addr:0x496d81d0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.349 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op4 addr:0x496d8950 type:48, kernel tensor addr:0x496d86e0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.409 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.508 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op5 addr:0x496d8e60 type:48, kernel tensor addr:0x496d8bf0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.568 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.453.701 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unfold_inputs_for_special_nodes_pm_0_ascend_convert_tuple_input_to_dynamic_input in 521.43 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.711 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Mul-op0 addr:0x496d9370 type:48, kernel tensor addr:0x496d9100, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.763 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.837 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op0 addr:0x496d9880 type:48, kernel tensor addr:0x496d9610, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.892 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36} is thread safe. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.453.890 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_0_process_call_inline in 95.84 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.453.973 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op0 addr:0x496d9e00 type:48, kernel tensor addr:0x496d9c20, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.004 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op0 addr:0x496da2f0 type:48, kernel tensor addr:0x496da110, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.047 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.123 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op0 addr:0x496da770 type:48, kernel tensor addr:0x496da500, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.172 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2} is thread safe. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.454.223 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_1_seed_adapter in 292.51 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.234 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op1 addr:0x496dac80 type:48, kernel tensor addr:0x496daa10, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.276 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43} is thread safe. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.454.284 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_2_insert_tensor_move_for_communication in 27.58 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.358 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op1 addr:0x496db200 type:48, kernel tensor addr:0x496db020, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.387 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op1 addr:0x496db6f0 type:48, kernel tensor addr:0x496db510, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.454.405 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_3_process partial inline in 91.01 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.429 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.502 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op1 addr:0x496dbb70 type:48, kernel tensor addr:0x496db900, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.562 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1} is thread safe. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.454.586 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_4_expander_fallback in 145.8 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.627 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op2 addr:0x496dc080 type:48, kernel tensor addr:0x496dbe10, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.667 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} is thread safe. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.454.709 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_5_convert_pad_v3_paddings in 92.48 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.454.823 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_6_convert_pad_v3_grad_paddings in 84.8 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.826 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op1 input kernel:Default/StridedSlice-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.853 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d4160 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.898 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op2 input kernel:Default/StridedSlice-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.919 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d7a20 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.971 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op4 input kernel:Default/StridedSlice-op3 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.454.992 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d8440 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.031 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op5 input kernel:Default/StridedSlice-op4 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.455.036 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_7_resize_bilinear_add_attr in 183.96 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.052 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d8950 origin ref count:2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.455.069 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_8_backend_custom_depend in 5.97 us [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.094 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op2 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.115 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d7f30 origin ref count:2 [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.455.113 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:237] GEBackendOptimizeACL] [PROF]GEBackendOptimizeACL costs 1.336 msec. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.148 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op5 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.455.169 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:137] OptimizeACLGraph] [PROF]OptimizeACLGraph costs 2.088 msec. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.167 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d8e60 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.210 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op0 input kernel:Default/Mul-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.230 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d9370 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.259 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op0 input kernel:Default/AllGather-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.276 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d9880 origin ref count:2 [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.455.279 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.304 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.323 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496d9e00 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.345 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.363 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496da2f0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.391 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op1 input kernel:Default/Concat-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.409 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496da770 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.437 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op1 input kernel:Default/AllGather-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.454 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496dac80 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.480 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.498 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496db200 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.521 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.547 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496db6f0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.578 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op2 input kernel:Default/Concat-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.597 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x496dbb70 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.655 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[y] debug_name: @kernel_graph0:param_y use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.682 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[x] debug_name: @kernel_graph0:param_x use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.708 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1603] AddControlArrowForNoInputActor] Add control arrow for no input arrow actor: kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.729 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph0_SuperKernelActor@ [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.838 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:887] Transform] [PROF]GraphSchedulerLink costs 5.391 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.455.875 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] StridedSlice select aclop kernel [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.925 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 1_actor_set_kernel_graph_0_invalid_data_arrow_elimination in 1.50001 us [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.972 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 2_actor_set_kernel_graph_0_multi_actor_fusion in 18.29 us [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.455.995 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 3_actor_set_kernel_graph_0_batch_data_arrow_fusion in 1.05007 us [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.021 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:904] Transform] Graph(kernel_graph_0) transforms actor end. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.093 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:354] Init] kernel_graph_0 has the parameter input num: 2 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.150 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1153] CompileGraphs] [PROF]GraphScheduler costs 7.23 msec. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.181 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:29] operator()] Create MultiStreamController. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.206 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:40] Refresh] Stream manager initialize, device_context : 0x35083c70, stream_size : 5. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.228 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:214] Resize] Task id on stream manager initialize : 0, stream_size : 5. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.264 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1166] CompileGraphs] [PROF]compile_backend_graph costs 1805.89 msec. [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.297 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1169] CompileGraphs] Status record: end compile function graph: 4_3_1___main___Net_construct_20, produce actor: kernel_graph_0 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.329 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end task_emit action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.353 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.389 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:281] SetLoopCount] Change vm_loop_flag to 0, set loop_size to 1 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.422 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start execute action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.453 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end execute action. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.456.471 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 42 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.456.607 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Mul select aclnn kernel [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.456.665 [mindspore/ops/kernel/ascend/opapi/aclnn_kernel_build.cc:77] IsEnabledAclnnDispatch] AllGather is not defined in opdef. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.456.802 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] AllGather select hccl kernel [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.456.886 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op0 is view op and not support aclnn [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.457.063 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Split select aclop kernel TotalTime = 1.90408, [21] [bootstrap]: 0.00141885 [type_inference]: 0.0177534 [auto_monad]: 0.00030273 [graph_reusing]: 9.163e-05 [inline]: 0.0161894, [2] [rewriter_before_opt_a]: 0.00013947 [a1a2]: 0.0159046, [2] [Cycle 1]: 0.00241007, [11] [expand_dump_flag]: 7.68004e-06 [switch_simplify]: 7.34599e-05 [loop_unroll]: 3.416e-05 [a_1]: 0.00043073 [recompute_prepare]: 3.418e-05 [updatestate_depend_eliminate]: 1.45499e-05 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 3.98001e-06 [parameter_eliminate]: 1.034e-05 [a_2]: 0.00087879 [parallel_inline_pass]: 3.874e-05 [Cycle 2]: 0.00185294, [11] [expand_dump_flag]: 6.12997e-06 [switch_simplify]: 3.28301e-05 [loop_unroll]: 3.086e-05 [a_1]: 0.0001927 [recompute_prepare]: 3.171e-05 [updatestate_depend_eliminate]: 1.193e-05 [updatestate_assign_eliminate]: 4.9799e-06 [updatestate_loads_eliminate]: 4.37e-06 [parameter_eliminate]: 7.58003e-06 [a_2]: 0.00085847 [parallel_inline_pass]: 3.99901e-05 [parallel-infer-symbol]: 0.00022706 [pre_auto_parallel]: 0.00019212 [insert-virtual-dataset]: 0.00139406 [parallel-infer-symbol-second]: 7.207e-05 [dataset_repeat_opt]: 0.00042545 [pipeline_split]: 0.00016551 [optimize]: 0.0568691, [52] [py_interpret_to_execute]: 7.751e-05 [rewriter_before_opt_a]: 0.00010416 [opt_a]: 0.0464026, [3] [Cycle 1]: 0.0253315, [46] [expand_dump_flag]: 5.28002e-06 [switch_simplify]: 5.27201e-05 [loop_unroll]: 3.278e-05 [a_1]: 0.00042544 [recompute_prepare]: 3.835e-05 [updatestate_depend_eliminate]: 1.765e-05 [updatestate_assign_eliminate]: 9.50007e-06 [updatestate_loads_eliminate]: 6.38003e-06 [parameter_eliminate]: 8.61997e-06 [a_2]: 0.00108776 [accelerated_algorithm]: 4.338e-05 [shard]: 4.847e-05 [meta_shard_fg_expand]: 7.28993e-06 [shard_inline]: 4.076e-05 [auto_parallel]: 7.328e-05 [parallel]: 0.0135301 [flash_sp]: 6.22999e-05 [merge_comm]: 4.225e-05 [allreduce_fusion]: 3.669e-05 [matmul_add_comm_reduction]: 4.165e-05 [allreduce_slice_to_reducescatter]: 1.07998e-06 [virtual_shard_identity]: 0.00010508 [virtual_dataset]: 0.00017546 [get_grad_eliminate_]: 9.179e-05 [virtual_output]: 0.00011144 [merge_forward]: 3.97899e-05 [cell_reuse_recompute_pass]: 6.97002e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00018317 [before_grad]: 0.00011636 [inplace_validation]: 3.309e-05 [parallel_renormalize]: 0.00402836 [update_top_fg]: 1.85997e-06 [cast_eliminate]: 0.00011283 [meta_fg_expand]: 3.706e-05 [inplace_validation_after_expand]: 5.455e-05 [flash_sp_send_recv_attached]: 6.619e-05 [receive_attached]: 2.391e-05 [after_resolve]: 0.0001012 [a_after_grad]: 0.00014636 [special_op_eliminate]: 7.811e-05 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 1.248e-05 [auto_monad_grad]: 5.12996e-06 [auto_monad_eliminator]: 6.345e-05 [cse]: 0.00023786 [a_3]: 0.00082629 [Cycle 2]: 0.0126722, [46] [expand_dump_flag]: 6.17001e-06 [switch_simplify]: 8.824e-05 [loop_unroll]: 7.631e-05 [a_1]: 0.00181621 [recompute_prepare]: 9.196e-05 [updatestate_depend_eliminate]: 4.452e-05 [updatestate_assign_eliminate]: 2.758e-05 [updatestate_loads_eliminate]: 2.378e-05 [parameter_eliminate]: 8.07992e-06 [a_2]: 0.00171604 [accelerated_algorithm]: 0.00018298 [shard]: 4.881e-05 [meta_shard_fg_expand]: 1.98201e-05 [shard_inline]: 7.77e-05 [auto_parallel]: 8.42101e-05 [parallel]: 1.885e-05 [flash_sp]: 4.602e-05 [merge_comm]: 3.79001e-05 [allreduce_fusion]: 3.09999e-05 [matmul_add_comm_reduction]: 3.07299e-05 [allreduce_slice_to_reducescatter]: 7.79983e-07 [virtual_shard_identity]: 8.347e-05 [virtual_dataset]: 7.295e-05 [get_grad_eliminate_]: 7.17499e-05 [virtual_output]: 7.12399e-05 [merge_forward]: 3.13801e-05 [cell_reuse_recompute_pass]: 7.52008e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00017446 [before_grad]: 0.00010433 [inplace_validation]: 3.04e-05 [parallel_renormalize]: 3.19909e-07 [update_top_fg]: 1.25007e-06 [cast_eliminate]: 7.739e-05 [meta_fg_expand]: 3.702e-05 [inplace_validation_after_expand]: 4.304e-05 [flash_sp_send_recv_attached]: 5.82996e-06 [receive_attached]: 2.32004e-06 [after_resolve]: 9.13501e-05 [a_after_grad]: 0.0001058 [special_op_eliminate]: 0.00020772 [renormalize]: 0.00295478 [add_forward_monad_depend]: 1.00899e-05 [auto_monad_grad]: 3.90992e-06 [auto_monad_eliminator]: 5.82599e-05 [cse]: 0.00017679 [a_3]: 0.00081643 [Cycle 3]: 0.00831968, [46] [expand_dump_flag]: 3.36999e-06 [switch_simplify]: 7.522e-05 [loop_unroll]: 6.916e-05 [a_1]: 0.00157669 [recompute_prepare]: 7.75701e-05 [updatestate_depend_eliminate]: 3.995e-05 [updatestate_assign_eliminate]: 2.831e-05 [updatestate_loads_eliminate]: 2.76e-05 [parameter_eliminate]: 6.68992e-06 [a_2]: 0.00160987 [accelerated_algorithm]: 0.00010294 [shard]: 4.301e-05 [meta_shard_fg_expand]: 2.095e-05 [shard_inline]: 7.362e-05 [auto_parallel]: 8.914e-05 [parallel]: 1.739e-05 [flash_sp]: 3.16999e-06 [merge_comm]: 3.71899e-05 [allreduce_fusion]: 3.23299e-05 [matmul_add_comm_reduction]: 3.784e-05 [allreduce_slice_to_reducescatter]: 1.66998e-06 [virtual_shard_identity]: 7.668e-05 [virtual_dataset]: 7.011e-05 [get_grad_eliminate_]: 6.92101e-05 [virtual_output]: 6.965e-05 [merge_forward]: 2.892e-05 [cell_reuse_recompute_pass]: 6.48992e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0001504 [before_grad]: 0.00010267 [inplace_validation]: 2.669e-05 [parallel_renormalize]: 7.99773e-08 [update_top_fg]: 9.69972e-07 [cast_eliminate]: 7.15e-05 [meta_fg_expand]: 3.38601e-05 [inplace_validation_after_expand]: 4.084e-05 [flash_sp_send_recv_attached]: 3.83006e-06 [receive_attached]: 2.30002e-06 [after_resolve]: 8.059e-05 [a_after_grad]: 0.00010556 [special_op_eliminate]: 6.815e-05 [renormalize]: 7.0082e-08 [add_forward_monad_depend]: 6.86001e-06 [auto_monad_grad]: 4.75009e-06 [auto_monad_eliminator]: 5.00099e-05 [cse]: 0.00018568 [a_3]: 0.00071158 [py_interpret_to_execute_after_opt_a]: 0.00010042 [slice_cell_reuse_recomputed_activation]: 4.36901e-05 [rewriter_after_opt_a]: 0.00047413 [convert_after_rewriter]: 7.965e-05 [order_py_execute_after_rewriter]: 8.12301e-05 [opt_b]: 0.00296011, [1] [Cycle 1]: 0.00290764, [7] [b_1]: 0.00225616 [b_2]: 7.394e-05 [updatestate_depend_eliminate]: 3.26301e-05 [updatestate_ass[INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.457.259 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Concat select aclnn kernel ign_eliminate]: 2.482e-05 [updatestate_loads_eliminate]: 2.423e-05 [renormalize]: 1.74996e-06 [cse]: 0.00014498 [optimize_parallel_all_gather_comm]: 8.038e-05 [overlap_param_gather]: 3.896e-05 [cconv]: 8.96701e-05 [loop_unroll]: 0.00096681 [opt_after_cconv]: 0.00088621, [1] [Cycle 1]: 0.00084127, [7] [c_1]: 0.00028598 [parameter_eliminate]: 4.62995e-06 [updatestate_depend_eliminate]: 3.126e-05 [updatestate_assign_eliminate]: 2.63101e-05 [updatestate_loads_eliminate]: 2.406e-05 [cse]: 0.00013186 [renormalize]: 6.10016e-07 [remove_dup_value]: 0.0003508 [tuple_transform]: 0.00050475, [1] [Cycle 1]: 0.00046042, [2] [d_1]: 0.00036183 [renormalize]: 4.29922e-07 [partial_unused_args_eliminate]: 3.91999e-05 [add_cache_embedding]: 0.00010482 [add_recomputation]: 0.00031568 [cse_after_recomputation]: 0.0001767, [1] [Cycle 1]: 0.00012613, [1] [cse]: 7.195e-05 [environ_conv]: 9.308e-05 [swap_dp_allreduce_reducescatter]: 6.705e-05 [bias_add_comm_swap]: 3.985e-05 [label_micro_interleaved_index]: 3.819e-05 [label_fine_grained_interleaved_index]: 3.97e-05 [merge_cast_opt]: 3.618e-05 [slice_recompute_activation]: 7.006e-05 [micro_interleaved_order_control]: 3.69099e-05 [assign_add_opt]: 0.00022191 [ForceFp32Comm]: 3.719e-05 [remove_cast_before_assign_add]: 6.188e-05 [full_micro_interleaved_order_control]: 3.915e-05 [reorder_send_recv_between_fp_bp]: 3.706e-05 [comm_op_add_attrs]: 0.00010291 [add_comm_op_reuse_tag]: 0.00010368 [interleave_split_concat_branches]: 3.734e-05 [interleave_parallel_branches]: 3.524e-05 [overlap_opt_shard_in_pipeline]: 6.914e-05 [overlap_opt_shard_grad_in_pipeline]: 4.01201e-05 [control_data_broadcast_order]: 4.432e-05 [grouped_pairwise_exchange_alltoall]: 5.005e-05 [offloading_packed_experts]: 9.114e-05 [overlap_recompute_and_grad_model_parallel]: 3.668e-05 [overlap_grad_matmul_and_grad_allreduce]: 3.51199e-05 [overlap_recompute_allgather_and_fa_grad]: 5.53001e-05 [overlap_grad_ring_attention]: 8.295e-05 [overlap_grad_flash_sp]: 7.373e-05 [begin_end_overlap_inline]: 3.307e-05 [split_matmul_comm_elemetwise]: 4.65399e-05 [split_layernorm_comm]: 3.882e-05 [handle_group_info]: 4.097e-05 [symbol_engine_optimizer]: 0.00067703, [1] [Cycle 1]: 0.00063103, [6] [build]: 4.442e-05 [elim_shapecalc]: 6.798e-05 [elim_not_effective]: 8.75801e-05 [opt_reshape]: 5.97701e-05 [fold_const_symbol]: 8.312e-05 [renormalize]: 4.20026e-07 [pipeline_parallel_scheduler]: 5.915e-05 [auto_monad_reorder]: 0.00013485 [get_jit_bprop_graph]: 5.482e-05 [rewriter_after_jit_bprop_graph]: 5.356e-05 [eliminate_special_op_node]: 0.00107938 [distribtued_split]: 0.00031106 [validate]: 0.00018061 [task_emit]: 1.80646 [execute]: 6.682e-05 Sums bootstrap : 0.001419s : 0.08% type_inference : 0.017753s : 0.94% auto_monad : 0.000303s : 0.02% graph_reusing : 0.000092s : 0.00% inline.rewriter_before_opt_a : 0.000139s : 0.01% inline.a1a2.expand_dump_flag : 0.000014s : 0.00% inline.a1a2.switch_simplify : 0.000106s : 0.01% inline.a1a2.loop_unroll : 0.000065s : 0.00% inline.a1a2.a_1 : 0.000623s : 0.03% inline.a1a2.recompute_prepare : 0.000066s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000026s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000010s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000008s : 0.00% inline.a1a2.parameter_eliminate : 0.000018s : 0.00% inline.a1a2.a_2 : 0.001737s : 0.09% inline.a1a2.parallel_inline_pass : 0.000079s : 0.00% parallel-infer-symbol : 0.000227s : 0.01% pre_auto_parallel : 0.000192s : 0.01% insert-virtual-dataset : 0.001394s : 0.07% parallel-infer-symbol-second : 0.000072s : 0.00% dataset_repeat_opt : 0.000425s : 0.02% pipeline_split : 0.000166s : 0.01% optimize.py_interpret_to_execute : 0.000078s : 0.00% optimize.rewriter_before_opt_a : 0.000104s : 0.01% optimize.opt_a.expand_dump_flag : 0.000015s : 0.00% optimize.opt_a.switch_simplify : 0.000216s : 0.01% optimize.opt_a.loop_unroll : 0.000178s : 0.01% optimize.opt_a.a_1 : 0.003818s : 0.20% optimize.opt_a.recompute_prepare : 0.000208s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000102s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000065s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000058s : 0.00% optimize.opt_a.parameter_eliminate : 0.000023s : 0.00% optimize.opt_a.a_2 : 0.004414s : 0.23% optimize.opt_a.accelerated_algorithm : 0.000329s : 0.02% optimize.opt_a.shard : 0.000140s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000048s : 0.00% optimize.opt_a.shard_inline : 0.000192s : 0.01% optimize.opt_a.auto_parallel : 0.000247s : 0.01% optimize.opt_a.parallel : 0.013566s : 0.72% optimize.opt_a.flash_sp : 0.000111s : 0.01% optimize.opt_a.merge_comm : 0.000117s : 0.01% optimize.opt_a.allreduce_fusion : 0.000100s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000110s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000004s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000265s : 0.01% optimize.opt_a.virtual_dataset : 0.000319s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000233s : 0.01% optimize.opt_a.virtual_output : 0.000252s : 0.01% optimize.opt_a.merge_forward : 0.000100s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000021s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000508s : 0.03% optimize.opt_a.before_grad : 0.000323s : 0.02% optimize.opt_a.inplace_validation : 0.000090s : 0.00% optimize.opt_a.parallel_renormalize : 0.004029s : 0.21% optimize.opt_a.update_top_fg : 0.000004s : 0.00% optimize.opt_a.cast_eliminate : 0.000262s : 0.01% optimize.opt_a.meta_fg_expand : 0.000108s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000138s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000076s : 0.00% optimize.opt_a.receive_attached : 0.000029s : 0.00% optimize.opt_a.after_resolve : 0.000273s : 0.01% optimize.opt_a.a_after_grad : 0.000358s : 0.02% optimize.opt_a.special_op_eliminate : 0.000354s : 0.02% optimize.opt_a.renormalize : 0.002955s : 0.16% optimize.opt_a.add_forward_monad_depend : 0.000029s : 0.00% optimize.opt_a.auto_monad_grad : 0.000014s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000172s : 0.01% optimize.opt_a.cse : 0.000600s : 0.03% optimize.opt_a.a_3 : 0.002354s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000100s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000044s : 0.00% optimize.rewriter_after_opt_a : 0.000474s : 0.03% optimize.convert_after_rewriter : 0.000080s : 0.00% optimize.order_py_execute_after_rewriter : 0.000081s : 0.00% optimize.opt_b.b_1 : 0.002256s : 0.12% optimize.opt_b.b_2 : 0.000074s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000033s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000025s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000024s : 0.00% optimize.opt_b.renormalize : 0.000002s : 0.00% optimize.opt_b.cse : 0.000145s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000080s : 0.00% optimize.overlap_param_gather : 0.000039s : 0.00% optimize.cconv : 0.000090s : 0.00% optimize.loop_unroll : 0.000967s : 0.05% optimize.opt_after_cconv.c_1 : 0.000286s : 0.02% optimize.opt_after_cconv.parameter_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000031s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000026s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000024s : 0.00% optimize.opt_after_cconv.cse : 0.000132s : 0.01% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000351s : 0.02% optimize.tuple_transform.d_1 : 0.000362s : 0.02% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000039s : 0.00% optimize.add_cache_embedding : 0.000105s : 0.01% optimize.add_recomputation : 0.000316s : 0.02% optimize.cse_after_recomputation.cse : 0.000072s : 0.00% optimize.environ_conv : 0.000093s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000067s : 0.00% optimize.bias_add_comm_swap : 0.000040s : 0.00% optimize.label_micro_interleaved_index : 0.000038s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000040s : 0.00% optimize.merge_cast_opt : 0.000036s : 0.00% optimize.slice_recompute_activation : 0.000070s : 0.00% optimize.micro_interleaved_order_control : 0.000037s : 0.00% optimize.assign_add_opt : 0.000222s : 0.01% optimize.ForceFp32Comm : 0.000037s : 0.00% optimize.remove_cast_before_assign_add : 0.000062s : 0.00% optimize.full_micro_interleaved_order_control : 0.000039s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000037s : 0.00% optimize.comm_op_add_attrs : 0.000103s : 0.01% optimize.add_comm_op_reuse_tag : 0.000104s : 0.01% optimize.interleave_split_concat_branches : 0.000037s : 0.00% optimize.interleave_parallel_branches : 0.000035s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000069s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000040s : 0.00% optimize.control_data_broadcast_order : 0.000044s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000050s : 0.00% optimize.offloading_packed_experts : 0.000091s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000037s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000035s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000055s : 0.00% optimize.overlap_grad_ring_attention : 0.000083s : 0.00% optimize.overlap_grad_flash_sp : 0.000074s : 0.00% optimize.begin_end_overlap_inline : 0.000033s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000047s : 0.00% optimize.split_layernorm_comm : 0.000039s : 0.00% optimize.handle_group_info : 0.000041s : 0.00% optimize.symbol_engine_optimizer.build : 0.000044s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000068s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000088s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000060s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000083s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000059s : 0.00% auto_monad_reorder : 0.000135s : 0.01% get_jit_bprop_graph : 0.000055s : 0.00% rewriter_after_jit_bprop_graph : 0.000054s : 0.00% eliminate_special_op_node : 0.001079s : 0.06% distribtued_split : 0.000311s : 0.02% validate : 0.000181s : 0.01% task_emit : 1.806463s : 96.09% execute : 0.000067s : 0.00% [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.457.375 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op1 is view op and not support aclnn Time group info: ------[substitution.] 0.000929 352 14.40% : 0.000134s : 10: substitution.arithmetic_simplify 1.07% : 0.000010s : 21: substitution.elim_not_effective 3.09% : 0.000029s : 14: substitution.float_tuple_getitem_switch 1.01% : 0.000009s : 21: substitution.fold_const_symbol 2.96% : 0.000027s : 29: substitution.graph_param_transform 13.15% : 0.000122s : 1: substitution.inline 4.84% : 0.000045s : 66: substitution.j_node_and_user_rematch 5.68% : 0.000053s : 4: substitution.less_batch_normalization 1.92% : 0.000018s : 10: substitution.minmaximum_grad 6.34% : 0.000059s : 66: substitution.remove_not_recompute_node 1.68% : 0.000016s : 6: substitution.replace_old_param 7.19% : 0.000067s : 18: substitution.tuple_list_convert_item_index_to_positive 5.24% : 0.000049s : 18: substitution.tuple_list_get_item_const_eliminator 4.19% : 0.000039s : 18: substitution.tuple_list_get_item_depend_reorder 19.53% : 0.000181s : 30: substitution.tuple_list_get_item_eliminator 4.37% : 0.000041s : 18: substitution.tuple_list_get_set_item_eliminator 2.83% : 0.000026s : 1: substitution.virtual_dataset_eliminate 0.51% : 0.000005s : 1: substitution.virtual_output_eliminate ------[type_inference.] 0.017588 2 96.74% : 0.017015s : 1: type_inference.infer 3.26% : 0.000573s : 1: type_inference.specialize ------[replace.] 0.000149 5 14.70% : 0.000022s : 1: replace.inline 43.27% : 0.000064s : 2: replace.tuple_list_get_item_eliminator 31.57% : 0.000047s : 1: replace.virtual_dataset_eliminate 10.46% : 0.000016s : 1: replace.virtual_output_eliminate ------[match.] 0.000159 5 75.88% : 0.000121s : 1: match.inline 6.30% : 0.000010s : 2: match.tuple_list_get_item_eliminator 15.68% : 0.000025s : 1: match.virtual_dataset_eliminate 2.13% : 0.000003s : 1: match.virtual_output_eliminate ------[predicate.] 0.001827 11225 0.79% : 0.000014s : 100: predicate.accumulaten_eliminater 0.43% : 0.000008s : 29: predicate.ad_related_special_op_eliminate 0.87% : 0.000016s : 97: predicate.addn_check_dump 0.76% : 0.000014s : 100: predicate.addn_zero_filter 0.71% : 0.000013s : 100: predicate.adjust_all_reduce_mul_add 2.51% : 0.000046s : 197: predicate.arithmetic_simplify 2.18% : 0.000040s : 226: predicate.cast_eliminate 1.04% : 0.000019s : 126: predicate.check_bprop_eliminate 0.85% : 0.000016s : 97: predicate.compare_switch_simplify 0.26% : 0.000005s : 41: predicate.const_output_eliminate 0.27% : 0.000005s : 29: predicate.convert_tensor_all_eliminate 1.17% : 0.000021s : 102: predicate.convert_tensor_eliminate 0.87% : 0.000016s : 97: predicate.depend_value_elim 0.76% : 0.000014s : 100: predicate.dict_get_item_const_eliminator 0.85% : 0.000016s : 100: predicate.dict_get_item_eliminator 0.84% : 0.000015s : 100: predicate.dict_set_item_eliminator 0.17% : 0.000003s : 29: predicate.elim_not_effective 0.32% : 0.000006s : 29: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000020s : 141: predicate.environ_add_const_eliminate 1.05% : 0.000019s : 141: predicate.environ_get_add_eliminate 1.06% : 0.000019s : 141: predicate.environ_get_depend_swap 2.07% : 0.000038s : 238: predicate.environ_get_eliminate 1.06% : 0.000019s : 141: predicate.environ_get_set_eliminate 0.78% : 0.000014s : 103: predicate.exchange_switch_depend_value 1.26% : 0.000023s : 103: predicate.float_depend_g_call 0.89% : 0.000016s : 97: predicate.float_environ_get_switch 1.28% : 0.000023s : 138: predicate.float_tuple_getitem_switch 0.16% : 0.000003s : 29: predicate.fold_const_symbol 1.14% : 0.000021s : 127: predicate.get_grad_eliminate 0.18% : 0.000003s : 29: predicate.graph_param_transform 0.96%[INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.457.649 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:298] SelectKernel] [PROF]SelectKernel costs 2.437 msec. : 0.000017s : 97: predicate.incorporate_call 0.86% : 0.000016s : 97: predicate.incorporate_call_switch 5.62% : 0.000103s : 479: predicate.inline 1.59% : 0.000029s : 126: predicate.inline_without_move 0.66% : 0.000012s : 126: predicate.j_node_and_user_rematch 1.13% : 0.000021s : 89: predicate.less_batch_normalization 1.49% : 0.000027s : 172: predicate.list_to_tuple_eliminator_ 2.10% : 0.000038s : 284: predicate.load_eliminater 0.53% : 0.000010s : 41: predicate.loop_unroll_after_grad 1.06% : 0.000019s : 105: predicate.loop_unroll_before_grad 1.48% : 0.000027s : 182: predicate.make_slice_get_slice_eliminator 0.90% : 0.000017s : 97: predicate.merge_addn 1.03% : 0.000019s : 126: predicate.micro_step_allgather_replace 1.06% : 0.000019s : 126: predicate.mini_step_allgather_replace 0.76% : 0.000014s : 100: predicate.minmaximum_grad 0.33% : 0.000006s : 29: predicate.mutable_eliminate 0.28% : 0.000005s : 29: predicate.opt_reshape 0.83% : 0.000015s : 41: predicate.parallel_virtual_node 1.22% : 0.000022s : 103: predicate.partial_defer_inline 1.22% : 0.000022s : 143: predicate.partial_eliminate 0.76% : 0.000014s : 100: predicate.print_const_string_wrapper 0.95% : 0.000017s : 97: predicate.reduce_all_const_elim 0.88% : 0.000016s : 100: predicate.reduce_eliminate 0.71% : 0.000013s : 126: predicate.remove_not_recompute_node 1.58% : 0.000029s : 228: predicate.replace_applicator 0.74% : 0.000013s : 126: predicate.replace_old_param 0.26% : 0.000005s : 41: predicate.reset_defer_inline 0.77% : 0.000014s : 100: predicate.reshape_eliminate 1.15% : 0.000021s : 126: predicate.row_tensor_add_zeros_like 0.37% : 0.000007s : 41: predicate.row_tensor_eliminate 1.28% : 0.000023s : 126: predicate.same_eliminate 0.65% : 0.000012s : 97: predicate.set_cell_output_no_recompute 1.31% : 0.000024s : 127: predicate.shard_identity_eliminate 1.62% : 0.000030s : 167: predicate.special_op_eliminate 1.21% : 0.000022s : 97: predicate.specialize_transform 1.22% : 0.000022s : 126: predicate.split_environ_get_set_with_tuple_value 1.17% : 0.000021s : 126: predicate.stack_unstack_eliminate 2.15% : 0.000039s : 284: predicate.stopgrad_eliminater 0.34% : 0.000006s : 41: predicate.switch_call_monad_eliminater 0.82% : 0.000015s : 103: predicate.switch_defer_inline 1.89% : 0.000035s : 229: predicate.switch_layer_defer_inline 3.41% : 0.000062s : 305: predicate.switch_simplify 0.77% : 0.000014s : 100: predicate.tile_eliminate 0.76% : 0.000014s : 100: predicate.transpose_eliminate 1.42% : 0.000026s : 170: predicate.tuple_list_convert_item_index_to_positive 1.49% : 0.000027s : 170: predicate.tuple_list_get_item_const_eliminator 1.32% : 0.000024s : 170: predicate.tuple_list_get_item_depend_reorder 2.51% : 0.000046s : 269: predicate.tuple_list_get_item_eliminator 1.39% : 0.000025s : 170: predicate.tuple_list_get_set_item_eliminator 2.41% : 0.000044s : 267: predicate.tuple_list_set_item_eliminator 1.40% : 0.000026s : 172: predicate.tuple_to_list_eliminator_ 2.13% : 0.000039s : 284: predicate.updatestate_pure_node_eliminater 3.16% : 0.000058s : 381: predicate.updatestate_useless_node_eliminater 0.38% : 0.000007s : 41: predicate.value_based_eliminate 1.25% : 0.000023s : 130: predicate.virtual_dataset_eliminate 1.17% : 0.000021s : 128: predicate.virtual_output_eliminate 0.39% : 0.000007s : 41: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000678 5 5.01% : 0.000034s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.99% : 0.000644s : 4: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.996647 283 0.00% : 0.000042s : 1: ForceFp32Comm 0.80% : 0.015913s : 1: a1a2 0.01% : 0.000111s : 1: add_cache_embedding 0.01% : 0.000110s : 1: add_comm_op_reuse_tag 0.02% : 0.000323s : 1: add_recomputation 0.01% : 0.000228s : 1: assign_add_opt 0.02% : 0.000321s : 1: auto_monad 0.01% : 0.000144s : 1: auto_monad_reorder 0.00% : 0.000037s : 1: begin_end_overlap_inline 0.00% : 0.000045s : 1: bias_add_comm_swap 0.07% : 0.001460s : 1: bootstrap 0.00% : 0.000096s : 1: cconv 0.01% : 0.000109s : 1: comm_op_add_attrs 0.00% : 0.000049s : 1: control_data_broadcast_order 0.00% : 0.000087s : 1: convert_after_rewriter 0.01% : 0.000182s : 1: cse_after_recomputation 0.02% : 0.000441s : 1: dataset_repeat_opt 0.02% : 0.000323s : 1: distribtued_split 0.05% : 0.001093s : 1: eliminate_special_op_node 0.00% : 0.000100s : 1: environ_conv 0.00% : 0.000077s : 1: execute 0.00% : 0.000043s : 1: full_micro_interleaved_order_control 0.00% : 0.000062s : 1: get_jit_bprop_graph 0.01% : 0.000103s : 1: graph_reusing 0.00% : 0.000054s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000045s : 1: handle_group_info 0.81% : 0.016210s : 1: inline 0.07% : 0.001428s : 1: insert-virtual-dataset 0.00% : 0.000039s : 1: interleave_parallel_branches 0.00% : 0.000042s : 1: interleave_split_concat_branches 0.00% : 0.000045s : 1: label_fine_grained_interleaved_index 0.00% : 0.000043s : 1: label_micro_interleaved_index 0.05% : 0.000976s : 1: loop_unroll 0.00% : 0.000041s : 1: merge_cast_opt 0.00% : 0.000041s : 1: micro_interleaved_order_control 0.00% : 0.000095s : 1: offloading_packed_experts 0.05% : 0.000934s : 44: opt.transform.a1a2 0.00% : 0.000063s : 1: opt.transform.loop_unroll_optimizer 0.49% : 0.009709s : 123: opt.transform.opt_a 0.01% : 0.000266s : 1: opt.transform.opt_after_cconv 0.06% : 0.001255s : 27: opt.transform.opt_b 0.02% : 0.000341s : 1: opt.transform.opt_trans_graph 0.01% : 0.000133s : 3: opt.transform.special_op_eliminate 0.01% : 0.000220s : 4: opt.transform.symbol_engine_opt 2.32% : 0.046411s : 1: opt_a 0.04% : 0.000892s : 1: opt_after_cconv 0.15% : 0.002966s : 1: opt_b 2.85% : 0.056884s : 1: optimize 0.00% : 0.000086s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000079s : 1: overlap_grad_flash_sp 0.00% : 0.000039s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000088s : 1: overlap_grad_ring_attention 0.00% : 0.000045s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000074s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000044s : 1: overlap_param_gather 0.00% : 0.000060s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000040s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000247s : 1: parallel-infer-symbol 0.00% : 0.000085s : 1: parallel-infer-symbol-second 0.00% : 0.000044s : 1: partial_unused_args_eliminate 0.00% : 0.000067s : 1: pipeline_parallel_scheduler 0.01% : 0.000177s : 1: pipeline_split 0.01% : 0.000211s : 1: pre_auto_parallel 0.00% : 0.000087s : 1: py_interpret_to_execute 0.01% : 0.000108s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000067s : 1: remove_cast_before_assign_add 0.02% : 0.000359s : 1: remove_dup_value 0.23% : 0.004597s : 2: renormalize.infer 0.12% : 0.002350s : 2: renormalize.specialize 0.00% : 0.000041s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000060s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000487s [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.457.676 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:302] PrintOpSelectedNum] Number of GE_KERNEL, INTERNAL_KERNEL, OPAPI_KERNEL, ACL_KERNEL, HCCL_KERNEL, HOST_KERNEL: : 1: rewriter_after_opt_a 0.01% : 0.000263s : 2: rewriter_before_opt_a 0.00% : 0.000049s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000076s : 1: slice_recompute_activation 0.00% : 0.000043s : 1: split_layernorm_comm 0.00% : 0.000051s : 1: split_matmul_comm_elemetwise 0.00% : 0.000072s : 1: swap_dp_allreduce_reducescatter 0.03% : 0.000682s : 1: symbol_engine_optimizer 90.48% : 1.806506s : 1: task_emit 0.03% : 0.000510s : 1: tuple_transform 0.89% : 0.017783s : 1: type_inference 0.02% : 0.000348s : 1: validate [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.457.700 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:308] PrintOpSelectedNum] 0 0 3 8 3 0 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.457.737 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1785] Run] End [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.457.834 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:239] SavePassesConfig] Running_passes: ['a1a2.r1.a_1', 'a1a2.r1.a_1.inline', 'opt_a.r1.auto_parallel', 'opt_a.r1.flash_sp', 'opt_a.r1.flash_sp_send_recv_attached', 'opt_a.r1.parallel', 'opt_a.r1.parallel_renormalize', 'opt_a.r1.receive_attached', 'opt_a.r1.virtual_dataset', 'opt_a.r1.virtual_dataset.virtual_dataset_eliminate', 'opt_a.r1.virtual_output', 'opt_a.r1.virtual_output.virtual_output_eliminate', 'opt_a.r2.a_1', 'opt_a.r2.a_1.tuple_list_get_item_eliminator', 'opt_a.r2.accelerated_algorithm', 'opt_a.r2.accelerated_algorithm.less_batch_normalization', 'opt_a.r2.auto_parallel', 'opt_a.r2.flash_sp', 'opt_a.r2.renormalize', 'opt_a.r3.auto_parallel'] [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.457.846 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_0_set_fracz_group_attr in 59.06 us [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.457.882 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1047] SaveCompiledGraph] Save compiled func graph(4_3_1___main___Net_construct_20) phase(train.1738915084346594048.281469968999824.0..)! [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.457.932 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1065] SaveCompiledGraph] End save compiled func graph! [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.457.971 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1326] CompileInner] [PROF]ParallelPostProcess costs 0.011 msec. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.457.993 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1154] CleanCompileRes] Clean compile resource start [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.097 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_1_insert_identity in 216.16 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.505 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_2_insert_type_transform_op in 375.47 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.579 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_3_graph_view_replace in 45.05 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.620 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:288] GEBackendOptimizeACLAfterKernelSelect] [PROF]GEBackendOptimizeACLAfterKernelSelect costs 0.89 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.667 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:172] OptimizeACLGraphAfterKernelSelect] [PROF]OptimizeACLGraphAfterKernelSelect costs 0.941 msec. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.746 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.857 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_after_inline_pm_0_DropoutGenMask is enabled. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.884 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_after_inline_pm_0_DropoutGenMask in 1.28 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.458.985 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_1_cse in 75.92 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.027 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_2_eliminate_maketuple_getitem in 16.98 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.052 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_3_insert_move_to in 0.54 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.089 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:355] GEAfterInlineOptimize] [PROF]GEAfterInlineOptimize costs 0.242 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.120 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:391] InlineCallGraph] [PROF]InlineCallGraph costs 0.428 msec. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.193 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.307 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:868] InlineSwitchGraph] [PROF]InlineSwitchGraph costs 0.159 msec. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.347 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1055] CompileGraphImpl] [PROF]OptimizeGraph costs 6.268 msec. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.418 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.459.515 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.460.009 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.460.010 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1175] CleanCompileRes] Clean compile resource end [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.460.043 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.460.052 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] End compiling 'Net.construct'. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.460.085 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1334] CompileInner] [PROF]CleanCompileRes costs 2.083 msec. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.460.105 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1335] CompileInner] Finish compiling. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.460.127 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1336] CompileInner] [PROF]compile_graph costs 2109.79 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.460.140 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_pool.cc:423] BestFitAscendMemoryPool] BestFitAscendMemoryPool constructed, older memory allocator is enabled. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.460.181 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:167] Initialize] Skip initialization of memory pool since init size is not configured. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.460.219 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.460.563 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_x, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] PARALLEL(187803,ffff93d7bc10,python):2025-02-07-15:58:06.460.621 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_y, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] UTILS(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.213 [mindspore/ccsrc/utils/dynamic_obfuscation/registry_opaque_predicate.cc:112] init_calling_count] calling_count_ has been initialized to 0 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.259 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.335 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1893] RunGraph] Status record: start run actor: kernel_graph_0 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.395 [mindspore/ccsrc/runtime/device/pre_launch_comm.cc:200] PreLaunchCommKernel] No hccl kernel to pre launch [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.411 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.428 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1918] RunGraph] [PROF]PreLaunchCommKernel costs 0.042 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.441 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.467 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.474 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:966] SpawnMultiPipelineActor] Enable runtime asynchronously launch kernel, default actor thread num 5, current actor thread num: 5 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.515 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.583 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.596 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.619 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:394] operator()] Init defrag memory step freq. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.624 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.641 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:396] operator()] Config defrag memory step freq : . [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.651 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.660 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:405] operator()] Defrag memory step freq : 100. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.721 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:638] PrepareDataForDeviceTensorStore] Prepare store data, input tensor size: 0, arg size: 2 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.744 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:835] AllocGEFixMemory] Start AllocGEFixMemory [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.767 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.792 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.794 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:652] PrepareDataForDeviceTensorStore] prepare data for graph:kernel_graph0 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.817 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.833 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-0, debug name:ValueNode 2, front node:ValueNode 2 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.864 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 2 front node:ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.887 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496c0f20 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.907 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 2 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.935 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:06.461.957 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.968 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.461.995 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.462.113 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.462.137 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.462.492 [mindspore/ccsrc/transform/acl_ir/op_api_exec.cc:145] GetAscendDefaultCustomPath] Add path [/usr/local/Ascend/latest/opp/vendors/customize/op_api/lib/libcust_opapi.so to custom opapi paths. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.462.586 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-1, debug name:ValueNode (0, 0, 2), front node:ValueNode (0, 0, 2) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.462.625 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 2) front node:ValueNode (0, 0, 2) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.462.650 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d4550 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.462.673 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 2) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.462.925 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-2, debug name:ValueNode (4, 4, 4), front node:ValueNode (4, 4, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.462.958 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (4, 4, 4) front node:ValueNode (4, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.462.979 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d4bd0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.000 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (4, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.244 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-3, debug name:ValueNode 0, front node:ValueNode 0 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.275 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 0 front node:ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.296 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d5090 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.316 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.550 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-4, debug name:ValueNode (2, 2, 4), front node:ValueNode (2, 2, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.583 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 4) front node:ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.603 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d58a0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.623 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.848 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-5, debug name:ValueNode 1, front node:ValueNode 1 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.877 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 1 front node:ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.900 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d5d80 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.463.919 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.153 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-6, debug name:ValueNode (2, 0, 0), front node:ValueNode (2, 0, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.188 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 0, 0) front node:ValueNode (2, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.219 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d6550 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.242 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.477 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-7, debug name:ValueNode (0, 0, 0), front node:ValueNode (0, 0, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.509 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 0) front node:ValueNode (0, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.530 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d6d40 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.551 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.827 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-8, debug name:ValueNode (1, 1, 1), front node:ValueNode (1, 1, 1) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.861 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (1, 1, 1) front node:ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.883 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x496d7530 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.464.903 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.465.152 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_y front node:@4_3_1___main___Net_construct_20:param_y backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:06.465.198 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_x front node:@4_3_1___main___Net_construct_20:param_x backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe63fff0f0,python):2025-02-07-15:58:06.465.308 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:0, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.465.345 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph0_SuperKernelActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:1, sequential num:2001075757 [INFO] GE_ADPT(187803,fffe84ff90f0,python):2025-02-07-15:58:06.465.632 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.466.041 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 6_Default/StridedSlice-op0, front node: @4_3_1___main___Net_construct_20:param_x, with index: 0, addr index: 0, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.466.090 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 6_Default/StridedSlice-op0, outer index: 0, inner index:0, front node: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.466.170 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 0, inner index: 0, dynamic is 0 [INFO] GE_ADPT(187803,fffe857fa0f0,python):2025-02-07-15:58:06.466.199 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.466.515 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 0_Default/StreamSend-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.466.553 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.466.597 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 6_Default/StridedSlice-op0, input index: 0, device tensor: 0x496d3860, ptr: 0x12c7fd801200, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.466.645 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor6_Default/StridedSlice-op0, actor input: 0, graph input: 1, device tensor: 0x496d3860, ptr: 0x12c7fd801200, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.466.724 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 9_Default/StridedSlice-op3, front node: @4_3_1___main___Net_construct_20:param_y, with index: 0, addr index: 1, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.466.755 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.466.764 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 9_Default/StridedSlice-op3, outer index: 1, inner index:0, front node: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.466.793 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 2, actor name : 1_Default/StreamRecv-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.466.820 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.466.824 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 1, inner index: 0, dynamic is 0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.466.847 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.466.984 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.012 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 2_Default/StreamSend-op1, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.467.022 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 9_Default/StridedSlice-op3, input index: 0, device tensor: 0x496d3490, ptr: 0x12c7fd801600, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.034 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.467.056 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor9_Default/StridedSlice-op3, actor input: 0, graph input: 0, device tensor: 0x496d3490, ptr: 0x12c7fd801600, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.467.161 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph0_SuperKernelActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.180 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe63fff0f0,python):2025-02-07-15:58:06.467.184 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:317] RunOpData] Actor(kernel_graph_0_OutputActor) receive the input op data and output position:0 device tensor:0x496dc080 ptr:0 ref count:18446744073709551615 origin ref count:18446744073709551615 dynamic ref count:2147483647 from memory pool:0 output node:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} index:0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.214 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 3, actor name : 3_Default/StreamRecv-op1, task_id_on_stream : 1. [INFO] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.467.229 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:57] IncreaseLoopCount] Loop count actor(kernel_graph_0_LoopCountActor) running, loop count: 1, current count: 1, total running count: 1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.242 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:06.467.250 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.261 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe63fff0f0,python):2025-02-07-15:58:06.467.275 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:388] CreateOutputTensor] Create output tensor, output node: Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}, output index: 0, output position: 0, output kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.353 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe63fff0f0,python):2025-02-07-15:58:06.467.365 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:470] CreateOutputTensor] Create device tensor:0xfffe540095a0, size: 512 type:48 output node:Default/AllGather-op2 output index:0 output position:0, origin output device tensor: 0x496dc080 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.382 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 4_Default/StreamSend-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.402 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.532 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.558 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 4, actor name : 5_Default/StreamRecv-op2, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.578 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.597 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.687 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.731 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 6_Default/StridedSlice-op0, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:06.467.752 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op0 [INFO] GE_ADPT(187803,fffe857fa0f0,python):2025-02-07-15:58:06.469.458 [mindspore/ccsrc/transform/acl_ir/acl_allocator.cc:104] RegisterAllocator] Register AclAllocator [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.517.900 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Mul, kernel type:opapi_kernel [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.518.012 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.518.172 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.518.250 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.518.294 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.518.317 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.518.342 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.518.711 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.518.742 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.609.685 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.609.762 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.609.847 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.609.911 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.609.954 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.609.990 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.610.016 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.610.162 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.610.187 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.610.900 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.610.939 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.610.995 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.611.041 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.611.075 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.611.099 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.611.416 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_0_erase_visit_attr in 255.98 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.611.783 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_1_deal_ref_output in 330.97 us [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.611.840 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:166] AclAfterCreateKernel] [PROF]AclAfterCreateKernel costs 0.701 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.611.888 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:195] OptimizeACLGraphAfterCreateKernel] [PROF]OptimizeACLGraphAfterCreateKernel costs 0.762 msec. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.611.985 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.163 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1102] OptimizeExecutionOrder] [PROF]OptimizeExecutionOrder costs 0.247 msec. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.200 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1064] CompileGraphImpl] [PROF]CreateKernel costs 152.824 msec. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.389 [mindspore/ccsrc/backend/common/session/session_basic.cc:1152] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.415 [mindspore/ccsrc/debug/summary/summary.cc:52] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.443 [mindspore/ccsrc/debug/summary/summary.cc:57] RecurseSetSummaryNodesForAllGraphs] This function should be skipped on GE backend. [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.462 [mindspore/ccsrc/debug/data_dump/dump_json_parser.cc:1207] UpdateNeedDumpKernels] Get kernel dump flag [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.509 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1183] PreprocessBeforeRun] Current Exec Order Algo in MS Context is bfs [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.540 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1046] DoStreamAssign] Status record: start stream assign, kernel_graph0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.570 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.617 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op1 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.670 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op2 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.701 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op3 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.738 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op4 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.776 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op5 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.819 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Mul-op0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.612.849 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op0 [WARNING] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:06.613.787 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for 2-6853331267304275293 [INFO] DEVICE(187834,fffea67fc0f0,python):2025-02-07-15:58:06.613.882 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/multi_ascend_communication_group.cc:49] Initialize] Successfully initialize HCCL group 2-6853331267304275293 [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.613.939 [mindspore/ccsrc/distributed/collective/collective_manager.cc:784] CreateDeviceCommunicator] [PROF]InitDeviceCommunicator costs 239.064 msec. [WARNING] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:06.613.969 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: 2-6853331267304275293 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.025 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:247] InitCommGroup] The MOC occupied by HCCL of graph: 4_3_1___main___Net_construct_20 is 1600 MB. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.063 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1072] CompileGraphs] [PROF]InitCommGroup costs 2241.79 msec. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.088 [mindspore/ccsrc/distributed/collective/collective_manager.cc:833] WaitAllCommInitDone] All device communictor is initialized. You can launch communication operators after this step. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.109 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1076] CompileGraphs] [PROF]WaitAllCommInit costs 0.024 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.614.153 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 2 for node Default/AllGather-op0, group: 2-6853331267304275293 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.614.201 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.614.247 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op2 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.614.278 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op1 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.305 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1347] IsEnableControlFlowInline] Disable switch inline, executor mode:1 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.340 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1180] DoUnifyMindIRPass] Do unify mindir pass for graph 4_3_1___main___Net_construct_20 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.374 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_pm_0_erase_invalid_micro_depend in 3.34 us [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.522 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:937] EnableKBKCompileCache] Disable backend compile cache by front config. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.799 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:620] BuildSymbolEngine] Status record: skip build symbol engine for function graph: 4_3_1___main___Net_construct_20 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.614.847 [mindspore/ccsrc/backend/graph_compiler/graph_partition.cc:866] Partition] GraphPartion Info: 4_3_1___main___Net_construct_20 inline mode:0 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.615.196 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1519] CompileGraph] Compile graph: 4_3_1___main___Net_construct_20, Split segments size: 2 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.615.270 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1555] CompileGraphFromSegment] Compile normal segment, the first node: @4_3_1___main___Net_construct_20:CNode_21{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.615.447 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 3 for node Default/AllGather-op1, group: 2-5435772415009061329 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.615.493 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op1 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.615.530 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:629] CompileGraph] Status record: start compile graph. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.615.534 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op3 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.615.577 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op2 [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.615.600 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:2227] ConstructKernelGraph] Create graph: 0 [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.616.853 [mindspore/ccsrc/backend/common/session/kernel_graph_mgr.cc:3487] ConstructOutput] Output:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.616.964 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 4 for node Default/AllGather-op2, group: 2-511848487187618470 [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.617.148 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.617.311 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:637] CompileGraph] [PROF]ConstructKernelGraph costs 1.74 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.617.453 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc:200] GetRunMode] RunMode::kKernelMode, graph: kernel_graph0 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.617.645 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:449] RecursiveSetRunMode] Kernel graph: kernel_graph0, set run mode:KernelMode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.617.800 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:191] EliminateIllegalDataTypePass] Start eliminate illegal data type for kernel graph id:0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.617.828 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1062] DoStreamAssign] Status record: end stream assign, kernel_graph0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.617.873 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_0_convert_list_to_tuple in 25.62 us [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.617.894 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.617.991 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:2 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.031 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.058 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 2, record_stream_id_ : 0. [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.090 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.101 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_eliminate_illegal_data_type_pm_1_eliminate_func_type in 194.21 us [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.113 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:3 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.144 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.151 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:204] EliminateIllegalDataTypePass] [PROF]EliminateIllegalDataTypePass costs 0.348 msec. [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.168 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 3, record_stream_id_ : 0. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.186 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:151] CommonUnifyMindIR] start common unify mindir opt graph:0 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.199 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.217 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: conv_transpose_to_conv_backprop_input [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.225 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:4 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.255 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.278 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 4, record_stream_id_ : 0. [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.310 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.333 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:5 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.365 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.372 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_0_conv_transpose_to_conv_backprop_input in 150.08 us [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.390 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 5, record_stream_id_ : 0. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.398 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: custom_op_reg_info_to_attr [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.420 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.438 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_1_custom_op_reg_info_to_attr in 38.11 us [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.444 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.461 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Custom not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.481 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_2_inplace_assign_for_custom_op in 20.36 us [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.473 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.508 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 0, record_stream_id_ : 2. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.501 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_attr_to_unify_mindir [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.538 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.561 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:6 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.589 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.611 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 6, record_stream_id_ : 0. [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.640 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.656 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_unify_mindir_pm_3_convert_attr_to_unify_mindir in 149.97 us [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.663 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:1 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.692 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.700 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:164] CommonUnifyMindIR] [PROF]CommonUnifyMindIR costs 0.51 msec. [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.715 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 1, record_stream_id_ : 3. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.741 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:81] BackendCommonOptimization] Status record: start common optimization. graph id: 0 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.746 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.769 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:7 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.790 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: convert_dynamic_broadcast_to [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.797 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.821 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 7, record_stream_id_ : 0. [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.853 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.876 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:8 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.905 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.618.908 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_0_convert_dynamic_broadcast_to in 113.53 us [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.928 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 8, record_stream_id_ : 2. [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.958 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.618.982 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:9 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.011 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.035 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 9, record_stream_id_ : 3. [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.064 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.619.083 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_1_convert_const_input_to_attr in 145.52 us [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.087 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:10 [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.126 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.150 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 10, record_stream_id_ : 4. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.179 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:931] PrintGraphExecuteOrder] Graph 0 execution order: [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.619.222 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_2_custom_op_const_input_to_attr in 107.75 us [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.230 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[0], node name[Default/StreamSend-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_24{[0]: ValueNode StreamSend}], event id[2] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.266 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[1], node name[Default/StreamRecv-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_25{[0]: ValueNode StreamRecv}], event id[2] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.297 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[2], node name[Default/StreamSend-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_26{[0]: ValueNode StreamSend}], event id[3] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.329 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[3], node name[Default/StreamRecv-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_27{[0]: ValueNode StreamRecv}], event id[3] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.619.344 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_3_convert_const_input_to_tensor_input_for_print in 95.4 us [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.360 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[4], node name[Default/StreamSend-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_28{[0]: ValueNode StreamSend}], event id[4] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.391 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[5], node name[Default/StreamRecv-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_29{[0]: ValueNode StreamRecv}], event id[4] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.456 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[6], node name[Default/StridedSlice-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_30{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_x, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.506 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[7], node name[Default/StridedSlice-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.555 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[8], node name[Default/StridedSlice-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.612 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[9], node name[Default/StridedSlice-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_33{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.662 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[10], node name[Default/StridedSlice-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.710 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[11], node name[Default/StridedSlice-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.750 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[12], node name[Default/Mul-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.780 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[13], node name[Default/StreamSend-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_37{[0]: ValueNode StreamSend}], event id[5] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.810 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[14], node name[Default/StreamRecv-op3], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_38{[0]: ValueNode StreamRecv}], event id[5] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.619.840 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_4_convert_tuple_output_to_maketuple in 460.87 us [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.847 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[15], node name[Default/AllGather-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36}], group[2-6853331267304275293] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.878 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[16], node name[Default/StreamSend-op4], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_40{[0]: ValueNode StreamSend}], event id[0] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.619.881 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_5_convert_unused_tuple_para_to_make_tuple in 4.21 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.619.907 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_6_flatten_concat_fission is enabled. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.907 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[17], node name[Default/StreamRecv-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_41{[0]: ValueNode StreamRecv}], event id[0] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.943 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[18], node name[Default/Split-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.619.994 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[19], node name[Default/Concat-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.024 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[20], node name[Default/StreamSend-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_46{[0]: ValueNode StreamSend}], event id[6] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.042 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_6_flatten_concat_fission in 108.92 us [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.054 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[21], node name[Default/StreamRecv-op5], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_47{[0]: ValueNode StreamRecv}], event id[6] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.089 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[22], node name[Default/AllGather-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43}], group[2-5435772415009061329] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.118 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[23], node name[Default/StreamSend-op6], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_49{[0]: ValueNode StreamSend}], event id[1] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.146 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[24], node name[Default/StreamRecv-op6], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_50{[0]: ValueNode StreamRecv}], event id[1] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.170 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_7_inset_input_structural_for_py_execute in 97.29 us [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.182 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[25], node name[Default/Split-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.193 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_8_broadcast_to_fusion is enabled. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.224 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[26], node name[Default/Concat-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1}] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.254 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[27], node name[Default/StreamSend-op7], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_55{[0]: ValueNode StreamSend}], event id[7] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.282 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[28], node name[Default/StreamRecv-op7], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_56{[0]: ValueNode StreamRecv}], event id[7] [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.309 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_8_broadcast_to_fusion in 91.3 us [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.316 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[29], node name[Default/AllGather-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}], group[2-511848487187618470] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.345 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[30], node name[Default/StreamSend-op8], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_58{[0]: ValueNode StreamSend}], event id[8] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.381 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[31], node name[Default/StreamRecv-op8], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_59{[0]: ValueNode StreamRecv}], event id[8] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.410 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[32], node name[Default/StreamSend-op9], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_60{[0]: ValueNode StreamSend}], event id[9] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.439 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[33], node name[Default/StreamRecv-op9], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_61{[0]: ValueNode StreamRecv}], event id[9] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.468 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[34], node name[Default/StreamSend-op10], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_62{[0]: ValueNode StreamSend}], event id[10] [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.496 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[35], node name[Default/StreamRecv-op10], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_63{[0]: ValueNode StreamRecv}], event id[10] [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.531 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1115] CompileGraphImpl] [PROF]PreprocessBeforeRun costs 8.042 msec. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.620.572 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1161] CreateDeviceAddress] Status record: start create device address. graph id: 0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.664 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_common_pm_9_add_attr_to_node in 320.01 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.699 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_common_pm_10_replace_addn is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.828 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_common_pm_10_replace_addn in 102 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.874 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:93] BackendCommonOptimization] [PROF]BackendCommonOptimization costs 2.128 msec. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.894 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:100] BackendCommonOptimization] Status record: end common optimization. graph id: 0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.620.966 [mindspore/ccsrc/backend/common/optimizer/common_backend_optimization.cc:258] OptimizationWithoutBackend] [PROF]OptimizationWithoutBackend costs 3.166 msec. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.385 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_0_renorm_split in 121.2 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.416 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: reduce_axis_update [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.621.647 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1168] CreateDeviceAddress] Status record: end create device address. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.621.688 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1123] CompileGraphImpl] [PROF]CreateDeviceAddress costs 1.108 msec. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.713 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_1_reduce_axis_update in 292.42 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.739 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.765 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim ClipByNorm not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.786 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_2_clip_by_norm_fission in 23.16 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.805 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: space_to_batch_nd_attr_update [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.621.806 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1177] CacheGraphOutputToFrontNodeWithIndex] Get graph backend output nodes. [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.621.844 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1185] CacheGraphOutputToFrontNodeWithIndex] Get graph front output nodes. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.852 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_3_space_to_batch_nd_attr_update in 43.28 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.875 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: batch_to_space_nd_attr_update [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.909 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_4_batch_to_space_nd_attr_update in 32.41 us [INFO] SESSION(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.621.911 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1203] CacheGraphOutputToFrontNodeWithIndex] Backend output: Default/AllGather-op2 debug string: @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} node ptr:0x3b8da890 with index: 0 map to front node: Default/AllGather-op2 debug string: @4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} node ptr: 0x3b88cc80 with index: 0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.942 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdamWeightDecay not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.621.963 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_5_adam_weight_decay_unify_mindir in 30.07 us [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.621.957 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:766] CompileGraph] Status record: end compile graph. graph id: 0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.011 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_6_add_depend_for_adamw in 26.3 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.031 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_7_cdist_fission is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.050 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Cdist not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.071 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_7_cdist_fission in 20.48 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.088 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.105 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim CdistGrad not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.123 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_8_cdist_grad_fission in 16.77 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.150 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion is enabled. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.138 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1605] CompileGraphFromSegment] Compile cut segment, the cut node: @4_3_1___main___Net_construct_20:ValueNode_64{[0]: ValueNode Return, [1]: CNode_22} [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.177 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_9_batchmatmul_reducescatter_alltoall_fusion in 7.32 us [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.187 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1129] CompileGraphs] [PROF]CompileSubGraph costs 183.494 msec. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.197 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion is enabled. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.213 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:976] ExportCompileCacheKBK] Compile cache: disable by front compile cache config. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.220 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_10_alltoall_allgather_batch_matmul_fusion in 4.31 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.244 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.263 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_11_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir in 21.46 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.281 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.273 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1143] CompileGraphs] Status record: construct the graph compiler info. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.300 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_12_grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2 in 17.41 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.308 [mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc:1001] Parse] Control node parser is not inited. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.338 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:859] Transform] Graph(kernel_graph_0) transforms actor begin, strategy:pipeline [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.346 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SparseSoftmaxCrossEntropyWithLogits not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.367 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_13_sparse_softmax_cross_entropy_with_logits_unify_mindir in 47.23 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.412 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutExt not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.433 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_14_dropout_ext_unify_mindir1 in 45.02 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.452 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim DropoutGradExt not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.470 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_15_dropoutgrad_ext_unify_mindir in 17.73 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.488 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Dropout not exist in name to cnode [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.482 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:480] InitGraphParameterStore] Init graph parameter store: kernel_graph_0, outer size: 2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.507 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_16_dropout_unify_mindir1 in 17.71 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.523 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: dropoutgrad_unify_mindir [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.522 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 0, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.554 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 1, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_y [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.571 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_17_dropoutgrad_unify_mindir in 44.31 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.593 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d14a0f0 for node:ValueNode 1 node addr:0x3b893360 device type:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.594 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchange not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.620 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d14a0f0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.626 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_18_neighbor_exchange_unify_mindir in 30.57 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.646 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2 not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.649 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d15d790 for node:ValueNode (1, 1, 1) node addr:0x3b895250 device type:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.664 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_19_neighbor_exchange_v2_unify_mindir in 17.72 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.670 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15d790 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.682 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim NeighborExchangeV2Grad not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.700 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_20_neighbor_exchange_v2_grad_unify_mindir in 16.68 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.694 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d15db40 for node:ValueNode 2 node addr:0x3b893b20 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.716 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15db40 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.719 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAll not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.737 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_21_all_to_all_unify_mindir in 16.53 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.739 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d15e230 for node:ValueNode (2, 2, 2) node addr:0x3b894f00 device type:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.756 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AlltoAllV not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.759 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15e230 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.773 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_22_all_to_all_v_unify_mindir in 16.71 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.783 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d15ea20 for node:ValueNode (4, 4, 4) node addr:0x3b8975e0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.802 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15ea20 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.818 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.838 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_23_bn_split in 44.24 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.835 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d15f210 for node:ValueNode (2, 0, 0) node addr:0x3b8972f0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.854 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15f210 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.858 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:385] Run] Run fast pass: bn_grad_unify_mindir [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.875 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d15f6d0 for node:ValueNode 0 node addr:0x3b8939f0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.895 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15f6d0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.919 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d15fee0 for node:ValueNode (0, 0, 0) node addr:0x3b894dd0 device type:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.931 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_24_bn_grad_unify_mindir in 68.79 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.937 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15fee0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.963 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.962 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d1606d0 for node:ValueNode (0, 2, 0) node addr:0x3b895f40 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.622.981 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d1606d0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.622.984 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_25_bn_grad_split in 27.88 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.004 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.003 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x3d160ec0 for node:ValueNode (2, 4, 4) node addr:0x3b8961d0 device type:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.022 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_26_batchnormgrad_to_bninfergrad in 18 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.024 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d160ec0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.040 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.057 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNormGrad not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.076 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_27_batch_norm_grad_infer_fission in 16.88 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.094 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim BatchNorm not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.120 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_28_batchnorm_to_bninfer in 24.49 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.137 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.151 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:442] ChangeGraphMode] Enable kbk subgraph execute and set run mode for graph: 0 to GraphMode. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.159 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Lamb not exist in name to cnode [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.172 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:462] TryEnableKbkSubGraphExecMode] Enable kbk subgraph execute mode for actor set: kernel_graph_0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.179 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_29_lamb_fission_ge in 23.21 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.199 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim Print not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.216 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_30_print_insert_placeholder_for_tensor_name in 17.2 us [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.229 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:391] TryEnableInputOptimize] Enable input optimize for actor set: kernel_graph_0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.237 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim GetNext not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.256 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_31_getnext_for_ge in 20.64 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.267 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_y for host data source actor. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.276 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNorm not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.294 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_32_sync_bn_split in 19.25 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.312 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim SyncBatchNormGrad not exist in name to cnode [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.309 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_y for front node:@4_3_1___main___Net_construct_20:param_y index:0 position:1 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.331 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_33_sync_bn_grad_split in 17.63 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.347 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.336 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_x for host data source actor. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.367 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AdaptiveMaxPool2D not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.386 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_34_adaptive_max_pool2d_ge_fusion in 20.51 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.384 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_x for front node:@4_3_1___main___Net_construct_20:param_x index:0 position:0 [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.421 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2268] BuildDataPrepareActorForGraphParameterStore] Create data prepare actor: kernel_graph_0_DataPrepareActor [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.431 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim AvgPoolGrad not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.452 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_35_avg_pool_grad_for_ge in 44.9 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.469 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.487 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.495 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2224] BuildLoopCountActor] Create loop count actor: kernel_graph_0_LoopCountActor [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.505 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_36_matmul_reduce_scatter_fusion in 17.99 us [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.522 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2257] BuildOutputActor] Create output actor: kernel_graph_0_OutputActor [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.529 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.549 [mindspore/ccsrc/backend/common/optimizer/node_pass.cc:394] Run] Prim MatMul not exist in name to cnode [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.566 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_1_37_allgather_matmul_fusion in 17.54 us [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.604 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1509] CacheGraphOutputToActor] Cache graph 0 output node:Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} with index:0 to actor:kernel_graph0_SuperKernelActor, from front node:Default/AllGather-op2 debug string:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} with index:0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.767 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.793 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d15c6d0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.823 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.843 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d15caa0 origin ref count:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.623.864 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_38_add_attr_to_dump in 271.05 us [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.934 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1711] Link] [PROF]GraphSchedulerLinkSinkMode costs 0.206 msec. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.964 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.623.986 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph0_SuperKernelActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.005 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_LoopCountActor@ to actor:kernel_graph_0_OutputActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.025 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_OutputActor@ to actor:kernel_graph_0_DataPrepareActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.043 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:3713] LinkControlArrowForCopyActor] Link control arrow for copy actor start, copy actor size:0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.072 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d165a10 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.123 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:556] AddResultArrow] Add result arrow from actor:kernel_graph0_SuperKernelActor to actor:kernel_graph_0_OutputActor@ from kernel@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} device address:0x3d165a10 original ref count:18446744073709551615 ref count:18446744073709551615 dynamic ref count:2147483647 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.183 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.225 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.263 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.297 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.624.311 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unify_mindir_1_39_ascend_mindir_op_adapter in 409.34 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.330 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.624.342 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.364 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.624.370 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:263] DefineFlashAttentionPattern] Do FlashAttentionPattern V1. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.582 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.626 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.698 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.624.699 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_0_FlashAttentionFusionV1 in 324.2 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.624.731 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.738 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.624.755 [mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.cc:377] DefineFlashAttentionPattern] Do FlashAttentionPattern V2. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.805 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.846 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.897 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.624.935 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.005 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.041 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.625.065 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_1_FlashAttentionFusionV2 in 306.47 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.625.091 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.094 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.142 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.174 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.209 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.244 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.276 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.295 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 4, send_actor : 0x3b8e1090, recv_actor : 0x3b8e1ad0. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.313 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 8, send_actor : 0x3d174870, recv_actor : 0x3d175080. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.330 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 3, send_actor : 0x3b8c07a0, recv_actor : 0x3b8e0880. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.625.344 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_2_quantbatchmatmul_reduce_fusion in 227.55 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.347 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 11, send_actor : 0x3d178820, recv_actor : 0x3d179260. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.364 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 10, send_actor : 0x3d1773a0, recv_actor : 0x3d177de0. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.625.369 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.380 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 1, send_actor : 0x3d16cfd0, recv_actor : 0x3d16d910. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.396 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 5, send_actor : 0x3b8e2510, recv_actor : 0x3d1664b0. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.413 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 6, send_actor : 0x3d16b170, recv_actor : 0x3d16bb40. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.430 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 7, send_actor : 0x3d16f620, recv_actor : 0x3d16ff60. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.447 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 9, send_actor : 0x3d1760a0, recv_actor : 0x3d176960. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.464 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 2, send_actor : 0x3d171250, recv_actor : 0x3d1720b0. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.625.606 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_3_MatMulAllReduce in 212.61 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.625.630 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.662 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op0 addr:0x3d15d3a0 type:48, kernel tensor addr:0x3d15d130, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.767 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op1 addr:0x3d1613b0 type:48, kernel tensor addr:0x3d161140, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.625.827 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_4_MatMulAllReduceAddRmsNorm in 171.37 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.827 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.625.852 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.913 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op2 addr:0x3d1618c0 type:48, kernel tensor addr:0x3d161650, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.625.967 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.007 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_5_add_layer_norm_fusion in 129.89 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.041 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.041 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op3 addr:0x3d161dd0 type:48, kernel tensor addr:0x3d161b60, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.128 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op4 addr:0x3d1622e0 type:48, kernel tensor addr:0x3d162070, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.183 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.190 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_6_add_layer_norm_v3_fusion in 124.19 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.215 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.266 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op5 addr:0x3d1627f0 type:48, kernel tensor addr:0x3d162580, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.334 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_7_add_layer_norm_ext_fusion in 97.14 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.319 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 0), [3]: ValueNode (2, 2, 2), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.358 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.443 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Mul-op0 addr:0x3d162d00 type:48, kernel tensor addr:0x3d162a90, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.490 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.557 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op0 addr:0x3d163210 type:48, kernel tensor addr:0x3d162fa0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.596 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36} is thread safe. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.631 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_8_inference_qbmm_add_fusion in 248.33 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.657 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.666 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op0 addr:0x3d163790 type:48, kernel tensor addr:0x3d1635b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.695 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op0 addr:0x3d163c80 type:48, kernel tensor addr:0x3d163aa0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.734 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.798 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op0 addr:0x3d164100 type:48, kernel tensor addr:0x3d163e90, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.840 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.894 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op1 addr:0x3d164610 type:48, kernel tensor addr:0x3d1643a0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.626.930 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43} is thread safe. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.957 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_9_inference_swiglu_fusion in 273.37 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.626.983 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.012 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_10_inference_matmul_split_fusion in 6.58 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.007 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op1 addr:0x3d164b90 type:48, kernel tensor addr:0x3d1649b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.034 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.045 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op1 addr:0x3d165080 type:48, kernel tensor addr:0x3d164ea0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.084 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.148 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op1 addr:0x3d165500 type:48, kernel tensor addr:0x3d165290, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.186 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_11_add_rms_norm_dynamic_quant_fusion in 127.79 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.189 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1} is thread safe. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.209 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_12_shape_reshape is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.244 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op2 addr:0x3d165a10 type:48, kernel tensor addr:0x3d1657a0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.280 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} is thread safe. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.336 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_12_shape_reshape in 102.58 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.359 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.408 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op1 input kernel:Default/StridedSlice-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.433 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d15d3a0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.474 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op2 input kernel:Default/StridedSlice-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.495 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d1613b0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.539 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op4 input kernel:Default/StridedSlice-op3 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.540 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_13_add_rms_norm_quant_fusion in 157.16 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.558 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d161dd0 origin ref count:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.565 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.595 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op5 input kernel:Default/StridedSlice-op4 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.615 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d1622e0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.659 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op2 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.679 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d1618c0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.709 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op5 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.727 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d1627f0 origin ref count:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.729 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_14_add_cast_rms_norm_cast_quant_fusion in 139.33 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.754 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op0 input kernel:Default/Mul-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.762 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.775 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d162d00 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.805 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op0 input kernel:Default/AllGather-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.822 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d163210 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.847 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.865 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d163790 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.887 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.904 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d163c80 origin ref count:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.904 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_15_rms_norm_quant_fusion in 117.89 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.627.928 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.932 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op1 input kernel:Default/Concat-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.952 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d164100 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.627.978 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op1 input kernel:Default/AllGather-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.003 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d164610 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.030 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.628.045 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_16_add_rms_norm_fusion in 95.41 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.048 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d164b90 origin ref count:2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.628.067 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.070 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.088 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d165080 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.115 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op2 input kernel:Default/Concat-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.133 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x3d165500 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.177 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[y] debug_name: @kernel_graph0:param_y use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.202 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[x] debug_name: @kernel_graph0:param_x use count is: 1 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.628.215 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_17_add_cast_rms_norm_cast_fusion in 124.6 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.224 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1603] AddControlArrowForNoInputActor] Add control arrow for no input arrow actor: kernel_graph0_SuperKernelActor [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.628.239 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.244 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph0_SuperKernelActor@ [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.352 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:887] Transform] [PROF]GraphSchedulerLink costs 4.717 msec. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.424 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 1_actor_set_kernel_graph_0_invalid_data_arrow_elimination in 1.24995 us [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.462 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 2_actor_set_kernel_graph_0_multi_actor_fusion in 11.4801 us [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.484 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 3_actor_set_kernel_graph_0_batch_data_arrow_fusion in 0.92003 us [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.507 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:904] Transform] Graph(kernel_graph_0) transforms actor end. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.557 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:354] Init] kernel_graph_0 has the parameter input num: 2 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.628.563 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_18_split_concat_fusion in 297.31 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.628.592 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion is enabled. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.603 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1153] CompileGraphs] [PROF]GraphScheduler costs 6.301 msec. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.642 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:29] operator()] Create MultiStreamController. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.670 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:40] Refresh] Stream manager initialize, device_context : 0x28b10d60, stream_size : 5. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.692 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:214] Resize] Task id on stream manager initialize : 0, stream_size : 5. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.718 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1166] CompileGraphs] [PROF]compile_backend_graph costs 2488.89 msec. [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.748 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1169] CompileGraphs] Status record: end compile function graph: 4_3_1___main___Net_construct_20, produce actor: kernel_graph_0 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.774 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end task_emit action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.797 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.829 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:281] SetLoopCount] Change vm_loop_flag to 0, set loop_size to 1 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.858 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start execute action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.885 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end execute action. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.628.903 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.628.892 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_19_matmul_elemwise_fusion in 271.92 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.628.922 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.629.194 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_20_inference_qbmm_allreduce_add_fusion in 245.53 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.629.220 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.629.466 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_unify_mindir_2_21_remove_fa_tensor_to_tuple_ops in 219.8 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.629.516 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:318] GEUnifyMindIR] [PROF]GEUnifyMindIR costs 8.485 msec. TotalTime = 2.56621, [21] [bootstrap]: 0.00136931 [type_inference]: 0.0158688 [auto_monad]: 0.00033505 [graph_reusing]: 9.92001e-05 [inline]: 0.0137439, [2] [rewriter_before_opt_a]: 0.00017081 [a1a2]: 0.0134597, [2] [Cycle 1]: 0.00200611, [11] [expand_dump_flag]: 4.0899e-06 [switch_simplify]: 9.287e-05 [loop_unroll]: 7.832e-05 [a_1]: 0.00028424 [recompute_prepare]: 2.645e-05 [updatestate_depend_eliminate]: 8.11997e-06 [updatestate_assign_eliminate]: 3.52005e-06 [updatestate_loads_eliminate]: 3.09001e-06 [parameter_eliminate]: 5.42006e-06 [a_2]: 0.00070697 [parallel_inline_pass]: 2.264e-05 [Cycle 2]: 0.00140467, [11] [expand_dump_flag]: 1.17009e-06 [switch_simplify]: 2.416e-05 [loop_unroll]: 2.384e-05 [a_1]: 0.00013573 [recompute_prepare]: 2.358e-05 [updatestate_depend_eliminate]: 3.84997e-06 [updatestate_assign_eliminate]: 2.96999e-06 [updatestate_loads_eliminate]: 2.72004e-06 [parameter_eliminate]: 1.87999e-06 [a_2]: 0.00069887 [parallel_inline_pass]: 2.36599e-05 [parallel-infer-symbol]: 0.00012474 [pre_auto_parallel]: 0.00011377 [insert-virtual-dataset]: 0.00099768 [parallel-infer-symbol-second]: 6.142e-05 [dataset_repeat_opt]: 0.00029999 [pipeline_split]: 0.0001266 [optimize]: 0.0417228, [52] [py_interpret_to_execute]: 5.46599e-05 [rewriter_before_opt_a]: 6.99301e-05 [opt_a]: 0.0326885, [3] [Cycle 1]: 0.0166769, [46] [expand_dump_flag]: 1.67999e-06 [switch_simplify]: 3.43201e-05 [loop_unroll]: 2.855e-05 [a_1]: 0.00029831 [recompute_prepare]: 2.791e-05 [updatestate_depend_eliminate]: 7.55e-06 [updatestate_assign_eliminate]: 5.29003e-06 [updatestate_loads_eliminate]: 5.12996e-06 [parameter_eliminate]: 2.64996e-06 [a_2]: 0.00077529 [accelerated_algorithm]: 2.80001e-05 [shard]: 3.698e-05 [meta_shard_fg_expand]: 4.22006e-06 [shard_inline]: 2.754e-05 [auto_parallel]: 3.595e-05 [parallel]: 0.00873632 [flash_sp]: 2.483e-05 [merge_comm]: 3.031e-05 [allreduce_fusion]: 2.663e-05 [matmul_add_comm_reduction]: 3.197e-05 [allreduce_slice_to_reducescatter]: 5.50062e-07 [virtual_shard_identity]: 7.54e-05 [virtual_dataset]: 0.00010306 [get_grad_eliminate_]: 6.367e-05 [virtual_output]: 7.826e-05 [merge_forward]: 2.602e-05 [cell_reuse_recompute_pass]: 3.9899e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00013384 [before_grad]: 8.443e-05 [inplace_validation]: 2.502e-05 [parallel_renormalize]: 0.00248783 [update_top_fg]: 1.00001e-06 [cast_eliminate]: 7.348e-05 [meta_fg_expand]: 2.71001e-05 [inplace_validation_after_expand]: 3.533e-05 [flash_sp_send_recv_attached]: 4.43601e-05 [receive_attached]: 1.3e-05 [after_resolve]: 6.99101e-05 [a_after_grad]: 9.24499e-05 [special_op_eliminate]: 6.72099e-05 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 5.72996e-06 [auto_monad_grad]: 2.81993e-06 [auto_monad_eliminator]: 4.325e-05 [cse]: 0.00016642 [a_3]: 0.00067756 [Cycle 2]: 0.00926792, [46] [expand_dump_flag]: 2.05997e-06 [switch_simplify]: 6.219e-05 [loop_unroll]: 6.1e-05 [a_1]: 0.00132628 [recompute_prepare]: 6.064e-05 [updatestate_depend_eliminate]: 2.791e-05 [updatestate_assign_eliminate]: 2.31101e-05 [updatestate_loads_eliminate]: 2.167e-05 [parameter_eliminate]: 3.13995e-06 [a_2]: 0.00131985 [accelerated_algorithm]: 0.00012944 [shard]: 4.02599e-05 [meta_shard_fg_expand]: 1.429e-05 [shard_inline]: 7.13799e-05 [auto_parallel]: 6.104e-05 [parallel]: 1.22499e-05 [flash_sp]: 3.537e-05 [merge_comm]: 3.00599e-05 [allreduce_fusion]: 2.60801e-05 [matmul_add_comm_reduction]: 2.87701e-05 [allreduce_slice_to_reducescatter]: 7.59959e-07 [virtual_shard_identity]: 6.18501e-05 [virtual_dataset]: 6.017e-05 [get_grad_eliminate_]: 5.905e-05 [virtual_output]: 5.703e-05 [merge_forward]: 2.30201e-05 [cell_reuse_recompute_pass]: 4.47e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00012604 [before_grad]: 7.88401e-05 [inplace_validation]: 2.271e-05 [parallel_renormalize]: 1.30036e-07 [update_top_fg]: 5.00004e-07 [cast_eliminate]: 6.011e-05 [meta_fg_expand]: 2.52699e-05 [inplace_validation_after_expand]: 3.28401e-05 [flash_sp_send_recv_attached]: 1.83005e-06 [receive_attached]: 1.69e-06 [after_resolve]: 6.598e-05 [a_after_grad]: 8.283e-05 [special_op_eliminate]: 5.84801e-05 [renormalize]: 0.00215592 [add_forward_monad_depend]: 6.27991e-06 [auto_monad_grad]: 2.84996e-06 [auto_monad_eliminator]: 4.382e-05 [cse]: 0.0001503 [a_3]: 0.00060546 [Cycle 3]: 0.00668595, [46] [expand_dump_flag]: 2.13995e-06 [switch_simplify]: 6.133e-05 [loop_unroll]: 5.98e-05 [a_1]: 0.00121306 [recompute_prepare]: 5.76801e-05 [updatestate_depend_eliminate]: 2.925e-05 [updatestate_assign_eliminate]: 2.522e-05 [updatestate_loads_eliminate]: 2.366e-05 [parameter_eliminate]: 3.91994e-06 [a_2]: 0.00128735 [accelerated_algorithm]: 6.45601e-05 [shard]: 4.3e-05 [meta_shard_fg_expand]: 1.30601e-05 [shard_inline]: 5.97701e-05 [auto_parallel]: 6.16299e-05 [parallel]: 9.98005e-06 [flash_sp]: 1.36998e-06 [merge_comm]: 2.90599e-05 [allreduce_fusion]: 2.53801e-05 [matmul_add_comm_reduction]: 3.101e-05 [allreduce_slice_to_reducescatter]: 5.10016e-07 [virtual_shard_identity]: 6.059e-05 [virtual_dataset]: 5.883e-05 [get_grad_eliminate_]: 5.79801e-05 [virtual_output]: 5.766e-05 [merge_forward]: 2.304e-05 [cell_reuse_recompute_pass]: 3.82005e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00011746 [before_grad]: 7.699e-05 [inplace_validation]: 2.24001e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 3.59956e-07 [cast_eliminate]: 5.79001e-05 [meta_fg_expand]: 2.61801e-05 [inplace_validation_after_expand]: 3.129e-05 [flash_sp_send_recv_attached]: 1.76008e-06 [receive_attached]: 1.45996e-06 [after_resolve]: 6.22601e-05 [a_after_grad]: 8.35999e-05 [special_op_eliminate]: 6.504e-05 [renormalize]: 1.20024e-07 [add_forward_monad_depend]: 3.01003e-06 [auto_monad_grad]: 2.91003e-06 [auto_monad_eliminator]: 3.922e-05 [cse]: 0.00014054 [a_3]: 0.00060805 [py_interpret_to_execute_after_opt_a]: 7.57601e-05 [slice_cell_reuse_recomputed_activation]: 3.815e-05 [rewriter_after_opt_a]: 0.00034266 [convert_after_rewriter]: 6.84101e-05 [order_py_execute_after_rewriter]: 5.796e-05 [opt_b]: 0.00255455, [1] [Cycle 1]: 0.00250813, [7] [b_1]: 0.00193324 [b_2]: 6.152e-05 [updatestate_depend_eliminate]: 2.51801e-05 [updatestate_assign_eliminate]: 2.23201e-05 [updatestate_loads_eliminate]: 2.177e-05 [renormalize]: 3.40049e-07 [cse]: 0.00012662 [optimize_parallel_all_gather_comm]: 7.21e-05 [overlap_param_gather]: 3.687e-05 [cconv]: 6.883e-05 [loop_unroll]: 0.00083757 [opt_after_cconv]: 0.00078561, [1] [Cycle 1]: 0.00074279, [7] [c_1]: 0.00022402 [parameter_eliminate]: 2.2701e-06 [updatestate_depend_eliminate]: 2.908e-05 [updatestate_assign_eliminate]: 2.239e-05 [updatestate_loads_eliminate]: 2.329e-05 [cse]: 0.00013177 [renormalize]: 4.30038e-07 [remove_dup_value]: 0.00030778 [tuple_transform]: 0.00042252, [1] [Cycle 1]: 0.00038014, [2] [d_1]: 0.00028904 [renormalize]: 2.59955e-07 [partial_unused_args_eliminate]: 3.802e-05 [add_cache_embedding]: 8.524e-05 [add_recomputation]: 0.00021317 [cse_after_recomputation]: 0.00017981, [1] [Cycle 1]: 0.00013557, [1] [cse]: 7.548e-05 [environ_conv]: 7.944e-05 [swap_dp_allreduce_reducescatter]: 6.12e-05 [bias_add_comm_swap]: 3.713e-05 [label_micro_interleaved_index]: 3.642e-05 [label_fine_grained_interleaved_index]: 3.54099e-05 [merge_cast_opt]: 3.537e-05 [slice_recompute_activation]: 6.296e-05 [micro_interleaved_order_control]: 3.594e-05 [assign_add_opt]: 0.00018422 [ForceFp32Comm]: 4.411e-05 [remove_cast_before_assign_add]: 5.883e-05 [full_micro_interleaved_order_control]: 3.634e-05 [reorder_send_recv_between_fp_bp]: 3.575e-05 [comm_op_add_attrs]: 8.22101e-05 [add_comm_op_reuse_tag]: 9.15e-05 [interleave_split_concat_branches]: 3.47201e-05 [interleave_parallel_branches]: 3.54199e-05 [overlap_opt_shard_in_pipeline]: 6.35501e-05 [overlap_opt_shard_grad_in_pipeline]: 3.76101e-05 [control_data_broadcast_order]: 3.566e-05 [grouped_pairwise_exchange_alltoall]: 4.329e-05 [offloading_packed_experts]: 9.409e-05 [overlap_recompute_and_grad_model_parallel]: 3.506e-05 [overlap_grad_matmul_and_grad_allreduce]: 8.935e-05 [overlap_recompute_allgather_and_fa_grad]: 5.296e-05 [overlap_grad_ring_attention]: 7.96199e-05 [overlap_grad_flash_sp]: 6.307e-05 [begin_end_overlap_inline]: 3.529e-05 [split_matmul_comm_elemetwise]: 3.471e-05 [split_layernorm_comm]: 3.526e-05 [handle_group_info]: 4.556e-05 [symbol_engine_optimizer]: 0.0005738, [1] [Cycle 1]: 0.00053372, [6] [build]: 3.42299e-05 [elim_shapecalc]: 5.812e-05 [elim_not_effective]: 7.175e-05 [opt_reshape]: 5.10201e-05 [fold_const_symbol]: 6.87001e-05 [renormalize]: 3.30037e-07 [pipeline_parallel_scheduler]: 5.66901e-05 [auto_monad_reorder]: 0.00011728 [get_jit_bprop_graph]: 5.373e-05 [rewriter_after_jit_bprop_graph]: 4.995e-05 [eliminate_special_op_node]: 0.00090184 [distribtued_split]: 0.00023568 [validate]: 0.00015839 [task_emit]: 2.48932 [execute]: 6.38601e-05 Sums bootstrap : 0.001369s : 0.05% type_inference : 0.015869s : 0.62% auto_monad : 0.000335s : 0.01% graph_reusing : 0.000099s : 0.00% inline.rewriter_before_opt_a : 0.000171s : 0.01% inline.a1a2.expand_dump_flag : 0.000005s : 0.00% inline.a1a2.switch_simplify : 0.000117s : 0.00% inline.a1a2.loop_unroll : 0.000102s : 0.00% inline.a1a2.a_1 : 0.000420s : 0.02% inline.a1a2.recompute_prepare : 0.000050s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000012s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000006s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000006s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.001406s : 0.06% inline.a1a2.parallel_inline_pass : 0.000046s : 0.00% parallel-infer-symbol : 0.000125s : 0.00% pre_auto_parallel : 0.000114s : 0.00% insert-virtual-dataset : 0.000998s : 0.04% parallel-infer-symbol-second : 0.000061s : 0.00% dataset_repeat_opt : 0.000300s : 0.01% pipeline_split : 0.000127s : 0.00% optimize.py_interpret_to_execute : 0.000055s : 0.00% optimize.rewriter_before_opt_a : 0.000070s : 0.00% optimize.opt_a.expand_dump_flag : 0.000006s : 0.00% optimize.opt_a.switch_simplify : 0.000158s : 0.01% optimize.opt_a.loop_unroll : 0.000149s : 0.01% optimize.opt_a.a_1 : 0.002838s : 0.11% optimize.opt_a.recompute_prepare : 0.000146s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000065s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000054s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000050s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.003382s : 0.13% optimize.opt_a.accelerated_algorithm : 0.000222s : 0.01% optimize.opt_a.shard : 0.000120s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000032s : 0.00% optimize.opt_a.shard_inline : 0.000159s : 0.01% optimize.opt_a.auto_parallel : 0.000159s : 0.01% optimize.opt_a.parallel : 0.008759s : 0.34% optimize.opt_a.flash_sp : 0.000062s : 0.00% optimize.opt_a.merge_comm : 0.000089s : 0.00% optimize.opt_a.allreduce_fusion : 0.000078s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000092s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000198s : 0.01% optimize.opt_a.virtual_dataset : 0.000222s : 0.01% optimize.opt_a.get_grad_eliminate_ : 0.000181s : 0.01% optimize.opt_a.virtual_output : 0.000193s : 0.01% optimize.opt_a.merge_forward : 0.000072s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000012s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000377s : 0.01% optimize.opt_a.before_grad : 0.000240s : 0.01% optimize.opt_a.inplace_validation : 0.000070s : 0.00% optimize.opt_a.parallel_renormalize : 0.002488s : 0.10% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000191s : 0.01% optimize.opt_a.meta_fg_expand : 0.000079s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000099s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000048s : 0.00% optimize.opt_a.receive_attached : 0.000016s : 0.00% optimize.opt_a.after_resolve : 0.000198s : 0.01% optimize.opt_a.a_after_grad : 0.000259s : 0.01% optimize.opt_a.special_op_eliminate : 0.000191s : 0.01% optimize.opt_a.renormalize : 0.002156s : 0.08% optimize.opt_a.add_forward_monad_depend : 0.000015s : 0.00% optimize.opt_a.auto_monad_grad : 0.000009s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000126s : 0.00% optimize.opt_a.cse : 0.000457s : 0.02% optimize.opt_a.a_3 : 0.001891s : 0.07% optimize.py_interpret_to_execute_after_opt_a : 0.000076s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000038s : 0.00% optimize.rewriter_after_opt_a : 0.000343s : 0.01% optimize.convert_after_rewriter : 0.000068s : 0.00% optimize.order_py_execute_after_rewriter : 0.000058s : 0.00% optimize.opt_b.b_1 : 0.001933s : 0.08% optimize.opt_b.b_2 : 0.000062s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000025s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000022s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000022s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000127s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000072s : 0.00% optimize.overlap_param_gather : 0.000037s : 0.00% optimize.cconv : 0.000069s : 0.00% optimize.loop_unroll : 0.000838s : 0.03% optimize.opt_after_cconv.c_1 : 0.000224s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000029s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000022s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000023s : 0.00% optimize.opt_after_cconv.cse : 0.000132s : 0.01% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000308s : 0.01% optimize.tuple_transform.d_1 : 0.000289s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000038s : 0.00% optimize.add_cache_embedding : 0.000085s : 0.00% optimize.add_recomputation : 0.000213s : 0.01% optimize.cse_after_recomputation.cse : 0.000075s : 0.00% optimize.environ_conv : 0.000079s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000061s : 0.00% optimize.bias_add_comm_swap : 0.000037s : 0.00% optimize.label_micro_interleaved_index : 0.000036s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000035s : 0.00% optimize.merge_cast_opt : 0.000035s : 0.00% optimize.slice_recompute_activation : 0.000063s : 0.00% optimize.micro_interleaved_order_control : 0.000036s : 0.00% optimize.assign_add_opt : 0.000184s : 0.01% optimize.ForceFp32Comm : 0.000044s : 0.00% optimize.remove_cast_before_assign_add : 0.000059s : 0.00% optimize.full_micro_interleaved_order_control : 0.000036s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000036s : 0.00% optimize.comm_op_add_attrs : 0.000082s : 0.00% optimize.add_comm_op_reuse_tag : 0.000091s : 0.00% optimize.interleave_split_concat_branches : 0.000035s : 0.00% optimize.interleave_parallel_branches : 0.000035s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000064s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000038s : 0.00% optimize.control_data_broadcast_order : 0.000036s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000043s : 0.00% optimize.offloading_packed_experts : 0.000094s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000035s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000089s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000053s : 0.00% optimize.overlap_grad_ring_attention : 0.000080s : 0.00% optimize.overlap_grad_flash_sp : 0.000063s : 0.00% optimize.begin_end_overlap_inline : 0.000035s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000035s : 0.00% optimize.split_layernorm_comm : 0.000035s : 0.00% optimize.handle_group_info : 0.000046s : 0.00% optimize.symbol_engine_optimizer.build : 0.000034s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000058s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000072s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000051s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000069s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000057s : 0.00% auto_monad_reorder : 0.000117s : 0.00% get_jit_bprop_graph : 0.000054s : 0.00% rewriter_after_jit_bprop_graph : 0.000050s : 0.00% eliminate_special_op_node : 0.000902s : 0.04% distribtued_split : 0.000236s : 0.01% validate : 0.000158s : 0.01% task_emit : 2.489321s : 97.75% execute : 0.000064s : 0.00% Time group info: ------[substitution.] 0.000536 353 10.72% : 0.000057s : 10: substitution.arithmetic_simplify 1.30% : 0.000007s : 21: substitution.elim_not_effective 3.10% : 0.000017s : 14: substitution.float_tuple_getitem_switch 1.22% : 0.000007s : 21: substitution.fold_const_symbol 3.86% : 0.000021s : 30: substitution.graph_param_transform 10.76% : 0.000058s : 1: substitution.inline 5.53% : 0.000030s : 66: substitution.j_node_and_user_rematch 6.42% : 0.000034s : 4: substitution.less_batch_normalization 2.65% : 0.000014s : 10: substitution.minmaximum_grad 7.33% : 0.000039s : 66: substitution.remove_not_recompute_node 1.54% : 0.000008s : 6: substitution.replace_old_param 11.27% : 0.000060s : 18: substitution.tuple_list_convert_item_index_to_positive 6.29% : 0.000034s : 18: substitution.tuple_list_get_item_const_eliminator 5.44% : 0.000029s : 18: substitution.tuple_list_get_item_depend_reorder 14.41% : 0.000077s : 30: substitution.tuple_list_get_item_eliminator 5.97% : 0.000032s : 18: substitution.tuple_list_get_set_item_eliminator 1.76% : 0.000009s : 1: substitution.virtual_dataset_eliminate 0.43% : 0.000002s : 1: substitution.virtual_output_eliminate ------[type_inference.] 0.015697 2 97.75% : 0.015343s : 1: type_inference.infer 2.25% : 0.000353s : 1: type_inference.specialize ------[replace.] 0.000064 5 18.43% : 0.000012s : 1: replace.inline 41.67% : 0.000027s : 2: replace.tuple_list_get_item_eliminator 27.11% : 0.000017s : 1: replace.virtual_dataset_eliminate 12.79% : 0.000008s : 1: replace.virtual_output_eliminate ------[match.] 0.000070 5 80.95% : 0.000057s : 1: match.inline 5.67% : 0.000004s : 2: match.tuple_list_get_item_eliminator 11.41% : 0.000008s : 1: match.virtual_dataset_eliminate 1.97% : 0.000001s : 1: match.virtual_output_eliminate ------[predicate.] 0.001428 11241 0.79% : 0.000011s : 100: predicate.accumulaten_eliminater 0.43% : 0.000006s : 30: predicate.ad_related_special_op_eliminate 0.88% : 0.000013s : 97: predicate.addn_check_dump 0.77% : 0.000011s : 100: predicate.addn_zero_filter 0.75% : 0.000011s : 100: predicate.adjust_all_reduce_mul_add 2.18% : 0.000031s : 197: predicate.arithmetic_simplify 2.09% : 0.000030s : 226: predicate.cast_eliminate 1.12% : 0.000016s : 126: predicate.check_bprop_eliminate 0.91% : 0.000013s : 97: predicate.compare_switch_simplify 0.26% : 0.000004s : 41: predicate.const_output_eliminate 0.32% : 0.000005s : 30: predicate.convert_tensor_all_eliminate 1.09% : 0.000016s : 102: predicate.convert_tensor_eliminate 0.89% : 0.000013s : 97: predicate.depend_value_elim 0.84% : 0.000012s : 100: predicate.dict_get_item_const_eliminator 0.86% : 0.000012s : 100: predicate.dict_get_item_eliminator 0.85% : 0.000012s : 100: predicate.dict_set_item_eliminator 0.17% : 0.000002s : 30: predicate.elim_not_effective 0.36% : 0.000005s : 30: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000016s : 141: predicate.environ_add_const_eliminate 1.11% : 0.000016s : 141: predicate.environ_get_add_eliminate 1.11% : 0.000016s : 141: predicate.environ_get_depend_swap 2.05% : 0.000029s : 238: predicate.environ_get_eliminate 1.10% : 0.000016s : 141: predicate.environ_get_set_eliminate 0.87% : 0.000012s : 103: predicate.exchange_switch_depend_value 1.08% : 0.000015s : 103: predicate.float_depend_g_call 0.90% : 0.000013s : 97: predicate.float_environ_get_switch 1.26% : 0.000018s : 138: predicate.float_tuple_getitem_switch 0.16% : 0.000002s : 30: predicate.fold_const_symbol 1.20% : 0.000017s : 127: predicate.get_grad_eliminate 0.21% : 0.000003s : 30: predicate.graph_param_transform 0.90% : 0.000013s : 97: predicate.incorporate_call 0.83% : 0.000012s : 97: predicate.incorporate_call_switch 5.09% : 0.000073s : 479: predicate.inline 1.37% : 0.000020s : 126: predicate.inline_without_move 0.66% : 0.000009s : 126: predicate.j_node_and_user_rematch 1.07% : 0.000015s : 89: predicate.less_batch_normalization 1.48% : 0.000021s : 173: predicate.list_to_tuple_eliminator_ 2.27% : 0.000032s : 284: predicate.load_eliminater 0.56% : 0.000008s : 41: predicate.loop_unroll_after_grad 1.02% : 0.000015s : 105: predicate.loop_unroll_before_grad 1.53% : 0.000022s : 182: predicate.make_slice_get_slice_eliminator 0.87% : 0.000012s : 97: predicate.merge_addn 1.16% : 0.000017s : 126: predicate.micro_step_allgather_replace 1.18% : 0.000017s : 126: predicate.mini_step_allgather_replace 0.79% : 0.000011s : 100: predicate.minmaximum_grad 0.34% : 0.000005s : 30: predicate.mutable_eliminate 0.31% : 0.000004s : 30: predicate.opt_reshape 0.37% : 0.000005s : 41: predicate.parallel_virtual_node 1.02% : 0.000015s : 103: predicate.partial_defer_inline 1.23% : 0.000018s : 143: predicate.partial_eliminate 0.78% : 0.000011s : 100: predicate.print_const_string_wrapper 0.91% : 0.000013s : 97: predicate.reduce_all_const_elim 0.96% : 0.000014s : 100: predicate.reduce_eliminate 0.68% : 0.000010s : 126: predicate.remove_not_recompute_node 1.55% : 0.000022s : 228: predicate.replace_applicator 0.70% : 0.000010s : 126: predicate.replace_old_param 0.25% : 0.000004s : 41: predicate.reset_defer_inline 0.79% : 0.000011s : 100: predicate.reshape_eliminate 1.14% : 0.000016s : 126: predicate.row_tensor_add_zeros_like 0.38% : 0.000005s : 41: predicate.row_tensor_eliminate 1.26% : 0.000018s : 126: predicate.same_eliminate 0.63% : 0.000009s : 97: predicate.set_cell_output_no_recompute 1.25% : 0.000018s : 127: predicate.shard_identity_eliminate 1.59% : 0.000023s : 167: predicate.special_op_eliminate 1.02% : 0.000015s : 97: predicate.specialize_transform 1.22% : 0.000017s : 126: predicate.split_environ_get_set_with_tuple_value 1.35% : 0.000019s : 126: predicate.stack_unstack_eliminate 2.25% : 0.000032s : 284: predicate.stopgrad_eliminater 0.35% : 0.000005s : 41: predicate.switch_call_monad_eliminater 0.91% : 0.000013s : 103: predicate.switch_defer_inline 2.05% : 0.000029s : 229: predicate.switch_layer_defer_inline 3.24% : 0.000046s : 305: predicate.switch_simplify 0.80% : 0.000011s : 100: predicate.tile_eliminate 0.78% : 0.000011s : 100: predicate.transpose_eliminate 1.59% : 0.000023s : 171: predicate.tuple_list_convert_item_index_to_positive 1.55% : 0.000022s : 171: predicate.tuple_list_get_item_const_eliminator 1.39% : 0.000020s : 171: predicate.tuple_list_get_item_depend_reorder 2.61% : 0.000037s : 270: predicate.tuple_list_get_item_eliminator 1.42% : 0.000020s : 171: predicate.tuple_list_get_set_item_eliminator 2.44% : 0.000035s : 268: predicate.tuple_list_set_item_eliminator 1.48% : 0.000021s : 173: predicate.tuple_to_list_eliminator_ 2.28% : 0.000033s : 284: predicate.updatestate_pure_node_eliminater 3.45% : 0.000049s : 381: predicate.updatestate_useless_node_eliminater 0.39% : 0.000006s : 41: predicate.value_based_eliminate 1.23% : 0.000018s : 130: predicate.virtual_dataset_eliminate 1.15% : 0.000016s : 128: predicate.virtual_output_eliminate 0.38% : 0.000005s : 41: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000399 5 5.56% : 0.000022s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.44% : 0.000376s : 4: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 -[INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.630.025 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs -----[others.] 2.635546 283 0.00% : 0.000049s : 1: ForceFp32Comm 0.51% : 0.013465s : 1: a1a2 0.00% : 0.000090s : 1: add_cache_embedding 0.00% : 0.000097s : 1: add_comm_op_reuse_tag 0.01% : 0.000219s : 1: add_recomputation 0.01% : 0.000189s : 1: assign_add_opt 0.01% : 0.000347s : 1: auto_monad 0.00% : 0.000125s : 1: auto_monad_reorder 0.00% : 0.000039s : 1: begin_end_overlap_inline 0.00% : 0.000041s : 1: bias_add_comm_swap 0.05% : 0.001395s : 1: bootstrap 0.00% : 0.000073s : 1: cconv 0.00% : 0.000087s : 1: comm_op_add_attrs 0.00% : 0.000040s : 1: control_data_broadcast_order 0.00% : 0.000074s : 1: convert_after_rewriter 0.01% : 0.000185s : 1: cse_after_recomputation 0.01% : 0.000308s : 1: dataset_repeat_opt 0.01% : 0.000245s : 1: distribtued_split 0.03% : 0.000911s : 1: eliminate_special_op_node 0.00% : 0.000085s : 1: environ_conv 0.00% : 0.000073s : 1: execute 0.00% : 0.000040s : 1: full_micro_interleaved_order_control 0.00% : 0.000060s : 1: get_jit_bprop_graph 0.00% : 0.000106s : 1: graph_reusing 0.00% : 0.000047s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000050s : 1: handle_group_info 0.52% : 0.013755s : 1: inline 0.04% : 0.001011s : 1: insert-virtual-dataset 0.00% : 0.000039s : 1: interleave_parallel_branches 0.00% : 0.000039s : 1: interleave_split_concat_branches 0.00% : 0.000040s : 1: label_fine_grained_interleaved_index 0.00% : 0.000040s : 1: label_micro_interleaved_index 0.03% : 0.000844s : 1: loop_unroll 0.00% : 0.000039s : 1: merge_cast_opt 0.00% : 0.000040s : 1: micro_interleaved_order_control 0.00% : 0.000098s : 1: offloading_packed_experts 0.02% : 0.000589s : 44: opt.transform.a1a2 0.00% : 0.000052s : 1: opt.transform.loop_unroll_optimizer 0.27% : 0.007154s : 123: opt.transform.opt_a 0.01% : 0.000205s : 1: opt.transform.opt_after_cconv 0.04% : 0.000999s : 27: opt.transform.opt_b 0.01% : 0.000268s : 1: opt.transform.opt_trans_graph 0.00% : 0.000106s : 3: opt.transform.special_op_eliminate 0.01% : 0.000173s : 4: opt.transform.symbol_engine_opt 1.24% : 0.032694s : 1: opt_a 0.03% : 0.000791s : 1: opt_after_cconv 0.10% : 0.002560s : 1: opt_b 1.58% : 0.041734s : 1: optimize 0.00% : 0.000077s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000062s : 1: order_py_execute_after_rewriter 0.00% : 0.000068s : 1: overlap_grad_flash_sp 0.00% : 0.000093s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000085s : 1: overlap_grad_ring_attention 0.00% : 0.000042s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000068s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000042s : 1: overlap_param_gather 0.00% : 0.000057s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000039s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000134s : 1: parallel-infer-symbol 0.00% : 0.000069s : 1: parallel-infer-symbol-second 0.00% : 0.000042s : 1: partial_unused_args_eliminate 0.00% : 0.000064s : 1: pipeline_parallel_scheduler 0.01% : 0.000134s : 1: pipeline_split 0.00% : 0.000122s : 1: pre_auto_parallel 0.00% : 0.000060s : 1: py_interpret_to_execute 0.00% : 0.000082s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000063s : 1: remove_cast_before_assign_add 0.01% : 0.000314s : 1: remove_dup_value 0.11% : 0.002953s : 2: renormalize.infer 0.06% : 0.001675s : 2: renormalize.specialize 0.00% : 0.000040s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000056s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000350s : 1: rewriter_after_opt_a 0.01% : 0.000253s : 2: rewriter_before_opt_a 0.00% : 0.000043s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000067s : 1: slice_recompute_activation 0.00% : 0.000039s : 1: split_layernorm_comm 0.00% : 0.000039s : 1: split_matmul_comm_elemetwise 0.00% : 0.000066s : 1: swap_dp_allreduce_reducescatter 0.02% : 0.000579s : 1: symbol_engine_optimizer 94.45% : 2.489359s : 1: task_emit 0.02% : 0.000427s : 1: tuple_transform 0.60% : 0.015885s : 1: type_inference 0.01% : 0.000289s : 1: validate [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.630.113 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1785] Run] End [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.630.213 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:239] SavePassesConfig] Running_passes: ['a1a2.r1.a_1', 'a1a2.r1.a_1.inline', 'opt_a.r1.auto_parallel', 'opt_a.r1.flash_sp', 'opt_a.r1.flash_sp_send_recv_attached', 'opt_a.r1.parallel', 'opt_a.r1.parallel_renormalize', 'opt_a.r1.receive_attached', 'opt_a.r1.virtual_dataset', 'opt_a.r1.virtual_dataset.virtual_dataset_eliminate', 'opt_a.r1.virtual_output', 'opt_a.r1.virtual_output.virtual_output_eliminate', 'opt_a.r2.a_1', 'opt_a.r2.a_1.tuple_list_get_item_eliminator', 'opt_a.r2.accelerated_algorithm', 'opt_a.r2.accelerated_algorithm.less_batch_normalization', 'opt_a.r2.auto_parallel', 'opt_a.r2.flash_sp', 'opt_a.r2.renormalize', 'opt_a.r3.auto_parallel'] [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.630.225 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:38] MarkRefGraph] Mark graph is ref graph: 0 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.630.256 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1047] SaveCompiledGraph] Save compiled func graph(4_3_1___main___Net_construct_20) phase(train.1738915083917166592.281470987261328.0..)! [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.630.293 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1065] SaveCompiledGraph] End save compiled func graph! [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.630.327 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1326] CompileInner] [PROF]ParallelPostProcess costs 0.008 msec. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.630.346 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1154] CleanCompileRes] Clean compile resource start [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.630.968 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_unfold_inputs_for_special_nodes_pm_0_ascend_convert_tuple_input_to_dynamic_input in 643.45 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.631.170 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_0_process_call_inline in 102.03 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.631.562 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_1_seed_adapter in 334.22 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.631.630 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_2_insert_tensor_move_for_communication in 31.41 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.631.759 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_3_process partial inline in 96.23 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.631.962 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_4_expander_fallback in 170 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.632.093 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_5_convert_pad_v3_paddings in 99.43 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.632.213 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_6_convert_pad_v3_grad_paddings in 88.58 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.632.448 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_7_resize_bilinear_add_attr in 204.64 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.632.483 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_pm_8_backend_custom_depend in 7.38 us [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.632.494 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1175] CleanCompileRes] Clean compile resource end [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.632.528 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:237] GEBackendOptimizeACL] [PROF]GEBackendOptimizeACL costs 1.484 msec. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.632.531 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] End compiling 'Net.construct'. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.632.562 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1334] CompileInner] [PROF]CleanCompileRes costs 2.208 msec. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.632.580 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1335] CompileInner] Finish compiling. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.632.590 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:137] OptimizeACLGraph] [PROF]OptimizeACLGraph costs 2.368 msec. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.632.601 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1336] CompileInner] [PROF]compile_graph costs 2712.12 msec. [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.632.741 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.022 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_x, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] PARALLEL(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.075 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_y, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.633.420 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] StridedSlice select aclop kernel [INFO] UTILS(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.613 [mindspore/ccsrc/utils/dynamic_obfuscation/registry_opaque_predicate.cc:112] init_calling_count] calling_count_ has been initialized to 0 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.727 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1893] RunGraph] Status record: start run actor: kernel_graph_0 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.782 [mindspore/ccsrc/runtime/device/pre_launch_comm.cc:200] PreLaunchCommKernel] No hccl kernel to pre launch [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.810 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1918] RunGraph] [PROF]PreLaunchCommKernel costs 0.041 msec. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.850 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:966] SpawnMultiPipelineActor] Enable runtime asynchronously launch kernel, default actor thread num 5, current actor thread num: 5 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.897 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.941 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.975 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:394] operator()] Init defrag memory step freq. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.633.995 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:396] operator()] Config defrag memory step freq : . [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.012 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:405] operator()] Defrag memory step freq : 100. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.052 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:638] PrepareDataForDeviceTensorStore] Prepare store data, input tensor size: 0, arg size: 2 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.074 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:835] AllocGEFixMemory] Start AllocGEFixMemory [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.122 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:652] PrepareDataForDeviceTensorStore] prepare data for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.160 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-0, debug name:ValueNode 1, front node:ValueNode 1 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.186 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 1 front node:ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.207 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d14a0f0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.226 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 1 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.269 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.634.263 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Mul select aclnn kernel [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.634.307 [mindspore/ops/kernel/ascend/opapi/aclnn_kernel_build.cc:77] IsEnabledAclnnDispatch] AllGather is not defined in opdef. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.634.460 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] AllGather select hccl kernel [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.634.571 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op0 is view op and not support aclnn [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.634.763 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Split select aclop kernel [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.871 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-1, debug name:ValueNode (1, 1, 1), front node:ValueNode (1, 1, 1) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.909 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (1, 1, 1) front node:ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.931 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15d790 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.634.958 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:612] CollectOpSelectedType] Concat select aclnn kernel [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.634.952 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (1, 1, 1) [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.635.068 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:728] IsEnableAclnn] Kernel Default/Split-op1 is view op and not support aclnn [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.214 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-2, debug name:ValueNode 2, front node:ValueNode 2 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.244 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 2 front node:ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.267 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15db40 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.289 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 2 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.635.381 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:298] SelectKernel] [PROF]SelectKernel costs 2.741 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.635.423 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:302] PrintOpSelectedNum] Number of GE_KERNEL, INTERNAL_KERNEL, OPAPI_KERNEL, ACL_KERNEL, HCCL_KERNEL, HOST_KERNEL: [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.635.445 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:308] PrintOpSelectedNum] 0 0 3 8 3 0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.531 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-3, debug name:ValueNode (2, 2, 2), front node:ValueNode (2, 2, 2) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.564 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 2) front node:ValueNode (2, 2, 2) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.584 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15e230 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.606 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 2) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.635.634 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_0_set_fracz_group_attr in 68.15 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.841 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-4, debug name:ValueNode (4, 4, 4), front node:ValueNode (4, 4, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.873 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (4, 4, 4) front node:ValueNode (4, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.893 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15ea20 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.635.914 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (4, 4, 4) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.635.918 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_1_insert_identity in 249.38 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.148 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-5, debug name:ValueNode (2, 0, 0), front node:ValueNode (2, 0, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.181 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 0, 0) front node:ValueNode (2, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.201 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15f210 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.230 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 0, 0) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.398 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_2_insert_type_transform_op in 444.73 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.463 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-6, debug name:ValueNode 0, front node:ValueNode 0 for graph:kernel_graph0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.478 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_after_kernel_select_pm_3_graph_view_replace in 49.09 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.492 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 0 front node:ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.516 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15f6d0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.522 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:288] GEBackendOptimizeACLAfterKernelSelect] [PROF]GEBackendOptimizeACLAfterKernelSelect costs 1.046 msec. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.536 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.576 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:172] OptimizeACLGraphAfterKernelSelect] [PROF]OptimizeACLGraphAfterKernelSelect costs 1.105 msec. [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.670 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.782 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-7, debug name:ValueNode (0, 0, 0), front node:ValueNode (0, 0, 0) for graph:kernel_graph0 [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.792 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:117] Run] graph kernel pass hwopt_fusion_after_inline_pm_0_DropoutGenMask is enabled. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.817 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 0) front node:ValueNode (0, 0, 0) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.820 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_fusion_after_inline_pm_0_DropoutGenMask in 1.28 us [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.838 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d15fee0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.636.859 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 0) [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.936 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_1_cse in 87.91 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.636.981 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_2_eliminate_maketuple_getitem in 18.2 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.637.005 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_after_inline_pm_3_insert_move_to in 0.54 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.637.043 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:355] GEAfterInlineOptimize] [PROF]GEAfterInlineOptimize costs 0.265 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.637.078 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:391] InlineCallGraph] [PROF]InlineCallGraph costs 0.477 msec. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.091 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-8, debug name:ValueNode (0, 2, 0), front node:ValueNode (0, 2, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.124 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 2, 0) front node:ValueNode (0, 2, 0) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.144 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d1606d0 [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.637.158 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.165 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 2, 0) [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.637.283 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:868] InlineSwitchGraph] [PROF]InlineSwitchGraph costs 0.177 msec. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.637.315 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1055] CompileGraphImpl] [PROF]OptimizeGraph costs 7.098 msec. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.396 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-9, debug name:ValueNode (2, 4, 4), front node:ValueNode (2, 4, 4) for graph:kernel_graph0 [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.637.392 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.428 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 4, 4) front node:ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.458 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x3d160ec0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.480 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 4, 4) [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.637.512 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.731 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_y front node:@4_3_1___main___Net_construct_20:param_y backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:06.637.777 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_x front node:@4_3_1___main___Net_construct_20:param_x backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.637.882 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph0_SuperKernelActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb97fa0f0,python):2025-02-07-15:58:06.637.879 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:0, sequential num:2001075757 [INFO] GE_ADPT(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.087 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.638.084 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.638.122 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.638.218 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_pool.cc:423] BestFitAscendMemoryPool] BestFitAscendMemoryPool constructed, older memory allocator is enabled. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.638.258 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:167] Initialize] Skip initialization of memory pool since init size is not configured. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.638.299 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.361 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 6_Default/StridedSlice-op0, front node: @4_3_1___main___Net_construct_20:param_x, with index: 0, addr index: 0, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.403 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 6_Default/StridedSlice-op0, outer index: 0, inner index:0, front node: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.460 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 0, inner index: 0, dynamic is 0 [INFO] GE_ADPT(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.638.484 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.707 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 6_Default/StridedSlice-op0, input index: 0, device tensor: 0x3d15caa0, ptr: 0x12c7fd801400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.638.710 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 0_Default/StreamSend-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.738 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor6_Default/StridedSlice-op0, actor input: 0, graph input: 1, device tensor: 0x3d15caa0, ptr: 0x12c7fd801400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.638.759 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.820 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 9_Default/StridedSlice-op3, front node: @4_3_1___main___Net_construct_20:param_y, with index: 0, addr index: 1, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.851 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 9_Default/StridedSlice-op3, outer index: 1, inner index:0, front node: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.638.879 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 1, inner index: 0, dynamic is 0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.638.970 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.002 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 2, actor name : 1_Default/StreamRecv-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.025 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.639.032 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 9_Default/StridedSlice-op3, input index: 0, device tensor: 0x3d15c6d0, ptr: 0x12c7fd801800, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.059 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.639.071 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor9_Default/StridedSlice-op3, actor input: 0, graph input: 0, device tensor: 0x3d15c6d0, ptr: 0x12c7fd801800, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.165 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.639.166 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph0_SuperKernelActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.195 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 2_Default/StreamSend-op1, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb97fa0f0,python):2025-02-07-15:58:06.639.189 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:317] RunOpData] Actor(kernel_graph_0_OutputActor) receive the input op data and output position:0 device tensor:0x3d165a10 ptr:0 ref count:18446744073709551615 origin ref count:18446744073709551615 dynamic ref count:2147483647 from memory pool:0 output node:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} index:0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.215 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op1 [INFO] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.639.223 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:57] IncreaseLoopCount] Loop count actor(kernel_graph_0_LoopCountActor) running, loop count: 1, current count: 1, total running count: 1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:06.639.277 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb97fa0f0,python):2025-02-07-15:58:06.639.303 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:388] CreateOutputTensor] Create output tensor, output node: Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}, output index: 0, output position: 0, output kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb97fa0f0,python):2025-02-07-15:58:06.639.368 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:470] CreateOutputTensor] Create device tensor:0xfffe9c0095a0, size: 512 type:48 output node:Default/AllGather-op2 output index:0 output position:0, origin output device tensor: 0x3d165a10 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.382 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op1 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.373 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.408 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 3, actor name : 3_Default/StreamRecv-op1, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.428 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.448 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.539 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op1 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.548 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.565 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 4_Default/StreamSend-op2, task_id_on_stream : 3. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.577 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.586 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op2 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.604 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.714 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.739 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 4, actor name : 5_Default/StreamRecv-op2, task_id_on_stream : 1. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.743 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.768 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.761 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 3. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.792 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.794 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.889 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op2 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.918 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.936 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 6_Default/StridedSlice-op0, task_id_on_stream : 4. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.942 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.639.958 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op0 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.639.966 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.640.085 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.640.109 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.640.132 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive StridedSlice [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.640.266 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive StridedSlice [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.640.291 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:StridedSlice, kernel type:acl_kernel [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.640.677 [mindspore/ccsrc/transform/acl_ir/op_api_exec.cc:145] GetAscendDefaultCustomPath] Add path [/usr/local/Ascend/latest/opp/vendors/customize/op_api/lib/libcust_opapi.so to custom opapi paths. [INFO] GE_ADPT(187818,fffeb9ffb0f0,python):2025-02-07-15:58:06.641.710 [mindspore/ccsrc/transform/acl_ir/acl_allocator.cc:104] RegisterAllocator] Register AclAllocator [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.695.993 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Mul, kernel type:opapi_kernel [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.696.153 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.696.370 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.696.471 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.696.525 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.696.549 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.696.578 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.697.109 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.697.145 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.798.533 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.798.611 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.798.692 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.798.754 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.798.793 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.798.816 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.798.863 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:32] AclOpBuild] Begin to create acl kernel module for primitive Split [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.799.019 [mindspore/ccsrc/plugin/device/ascend/kernel/acl/acl_kernel_build.cc:74] AclOpBuild] Finished creating acl kernel module for primitive Split [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.799.044 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Split, kernel type:acl_kernel [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.799.746 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:Concat, kernel type:opapi_kernel [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.799.791 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel_build.cc:32] HcclOpBuild] Build hccl op [AllGather] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.799.852 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.799.901 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc:210] CalLoopSize] Get Hccl Kernel: AllGather, output size: 1 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.799.933 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.799.954 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:179] GenerateKernelMod] kernel opname:AllGather, kernel type:hccl_kernel [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.800.340 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_0_erase_visit_attr in 306.54 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.800.817 [mindspore/ccsrc/backend/common/optimizer/pass_manager.cc:75] RunPass] Run pass hwopt_opt_acl_ack_1_deal_ref_output in 436.19 us [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:06.800.888 [mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc:166] AclAfterCreateKernel] [PROF]AclAfterCreateKernel costs 0.886 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.800.950 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.cc:195] OptimizeACLGraphAfterCreateKernel] [PROF]OptimizeACLGraphAfterCreateKernel costs 0.967 msec. [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.059 [mindspore/ccsrc/backend/common/session/exec_order_builder.cc:75] Build] exec order build by bfs [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.258 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1102] OptimizeExecutionOrder] [PROF]OptimizeExecutionOrder costs 0.278 msec. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.295 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1064] CompileGraphImpl] [PROF]CreateKernel costs 163.948 msec. [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.532 [mindspore/ccsrc/backend/common/session/session_basic.cc:1152] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.558 [mindspore/ccsrc/debug/summary/summary.cc:52] RecurseSetSummaryNodesForAllGraphs] Recurse set summary nodes for all graphs in graph: 0 start [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.579 [mindspore/ccsrc/debug/summary/summary.cc:57] RecurseSetSummaryNodesForAllGraphs] This function should be skipped on GE backend. [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.598 [mindspore/ccsrc/debug/data_dump/dump_json_parser.cc:1207] UpdateNeedDumpKernels] Get kernel dump flag [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.657 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1183] PreprocessBeforeRun] Current Exec Order Algo in MS Context is bfs [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.688 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1046] DoStreamAssign] Status record: start stream assign, kernel_graph0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.722 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.777 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op1 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.823 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op2 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.851 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op3 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.892 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op4 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.934 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/StridedSlice-op5 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.801.984 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Mul-op0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.802.013 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.803.339 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 2 for node Default/AllGather-op0, group: 2-6853331267304275293 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.803.394 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.803.443 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op2 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.803.473 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op1 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.804.462 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 3 for node Default/AllGather-op1, group: 2-16057586909177180503 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.804.516 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Split-op1 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.804.561 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/Concat-op3 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.804.591 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:156] AssignStream] Set stream id by group for node Default/AllGather-op2 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.805.943 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc:94] AddStreamIdByGroup] Set stream id by group 4 for node Default/AllGather-op2, group: 2-5488101015797526856 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.806.889 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc:1062] DoStreamAssign] Status record: end stream assign, kernel_graph0 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.806.972 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.086 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:2 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.132 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.161 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 2, record_stream_id_ : 0. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.195 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.217 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:3 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.247 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.270 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 3, record_stream_id_ : 0. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.298 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.320 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:4 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.347 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.370 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 4, record_stream_id_ : 0. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.401 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.422 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:5 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.451 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.473 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 5, record_stream_id_ : 0. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.501 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.523 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:0 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.549 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.571 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 0, record_stream_id_ : 2. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.603 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.634 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:6 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.662 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.684 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 6, record_stream_id_ : 0. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.713 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.733 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:1 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.761 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.782 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 1, record_stream_id_ : 3. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.809 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.829 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:7 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.858 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.880 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 7, record_stream_id_ : 0. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.909 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.931 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:8 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.959 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.807.980 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 8, record_stream_id_ : 2. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.009 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.033 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:9 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.063 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.085 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 9, record_stream_id_ : 3. [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.111 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamsend] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.132 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/send.cc:40] Init] send op event id:10 [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.160 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/rt_kernel_build.cc:31] RtOpBuild] Op Name(tolower)[streamrecv] [INFO] KERNEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.182 [mindspore/ccsrc/plugin/device/ascend/kernel/rts/recv.cc:42] Init] recv op event_id_: 10, record_stream_id_ : 4. [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.208 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:931] PrintGraphExecuteOrder] Graph 0 execution order: [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.302 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[0], node name[Default/StreamSend-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_24{[0]: ValueNode StreamSend}], event id[2] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.342 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[1], node name[Default/StreamRecv-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_25{[0]: ValueNode StreamRecv}], event id[2] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.372 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[2], node name[Default/StreamSend-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_26{[0]: ValueNode StreamSend}], event id[3] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.401 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[3], node name[Default/StreamRecv-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_27{[0]: ValueNode StreamRecv}], event id[3] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.429 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[4], node name[Default/StreamSend-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_28{[0]: ValueNode StreamSend}], event id[4] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.457 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[5], node name[Default/StreamRecv-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_29{[0]: ValueNode StreamRecv}], event id[4] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.520 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[6], node name[Default/StridedSlice-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_30{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_x, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.569 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[7], node name[Default/StridedSlice-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.615 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[8], node name[Default/StridedSlice-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.698 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[9], node name[Default/StridedSlice-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_33{[0]: ValueNode PrimFunc_StridedSlice, [1]: param_y, [2]: ValueNode (2, 0, 0), [3]: ValueNode (4, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.756 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[10], node name[Default/StridedSlice-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.803 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[11], node name[Default/StridedSlice-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.840 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[12], node name[Default/Mul-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.870 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[13], node name[Default/StreamSend-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_37{[0]: ValueNode StreamSend}], event id[5] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.899 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[14], node name[Default/StreamRecv-op3], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_38{[0]: ValueNode StreamRecv}], event id[5] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.940 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[15], node name[Default/AllGather-op0], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36}], group[2-6853331267304275293] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.969 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[16], node name[Default/StreamSend-op4], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_40{[0]: ValueNode StreamSend}], event id[0] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.808.997 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[17], node name[Default/StreamRecv-op4], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_41{[0]: ValueNode StreamRecv}], event id[0] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.031 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[18], node name[Default/Split-op0], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.072 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[19], node name[Default/Concat-op2], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.110 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[20], node name[Default/StreamSend-op5], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_46{[0]: ValueNode StreamSend}], event id[6] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.138 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[21], node name[Default/StreamRecv-op5], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_47{[0]: ValueNode StreamRecv}], event id[6] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.170 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[22], node name[Default/AllGather-op1], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43}], group[2-16057586909177180503] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.198 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[23], node name[Default/StreamSend-op6], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_49{[0]: ValueNode StreamSend}], event id[1] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.226 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[24], node name[Default/StreamRecv-op6], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_50{[0]: ValueNode StreamRecv}], event id[1] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.259 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[25], node name[Default/Split-op1], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.297 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[26], node name[Default/Concat-op3], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1}] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.324 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[27], node name[Default/StreamSend-op7], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_55{[0]: ValueNode StreamSend}], event id[7] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.351 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[28], node name[Default/StreamRecv-op7], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_56{[0]: ValueNode StreamRecv}], event id[7] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.382 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[29], node name[Default/AllGather-op2], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}], group[2-5488101015797526856] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.410 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[30], node name[Default/StreamSend-op8], logic id[4294967295], stream id[2], node info[@kernel_graph0:CNode_58{[0]: ValueNode StreamSend}], event id[8] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.436 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[31], node name[Default/StreamRecv-op8], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_59{[0]: ValueNode StreamRecv}], event id[8] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.470 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[32], node name[Default/StreamSend-op9], logic id[4294967295], stream id[3], node info[@kernel_graph0:CNode_60{[0]: ValueNode StreamSend}], event id[9] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.497 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[33], node name[Default/StreamRecv-op9], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_61{[0]: ValueNode StreamRecv}], event id[9] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.524 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[34], node name[Default/StreamSend-op10], logic id[4294967295], stream id[4], node info[@kernel_graph0:CNode_62{[0]: ValueNode StreamSend}], event id[10] [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.550 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:971] PrintGraphExecuteOrder] Index[35], node name[Default/StreamRecv-op10], logic id[4294967295], stream id[0], node info[@kernel_graph0:CNode_63{[0]: ValueNode StreamRecv}], event id[10] [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.587 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1115] CompileGraphImpl] [PROF]PreprocessBeforeRun costs 7.952 msec. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.809.630 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1161] CreateDeviceAddress] Status record: start create device address. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.810.862 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1168] CreateDeviceAddress] Status record: end create device address. graph id: 0 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.810.909 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:1123] CompileGraphImpl] [PROF]CreateDeviceAddress costs 1.266 msec. [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.030 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1177] CacheGraphOutputToFrontNodeWithIndex] Get graph backend output nodes. [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.069 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1185] CacheGraphOutputToFrontNodeWithIndex] Get graph front output nodes. [INFO] SESSION(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.144 [mindspore/ccsrc/backend/common/session/kernel_graph.cc:1203] CacheGraphOutputToFrontNodeWithIndex] Backend output: Default/AllGather-op2 debug string: @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} node ptr:0x5ba89670 with index: 0 map to front node: Default/AllGather-op2 debug string: @4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} node ptr: 0x5ba3ba70 with index: 0 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.191 [mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc:766] CompileGraph] Status record: end compile graph. graph id: 0 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.399 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1605] CompileGraphFromSegment] Compile cut segment, the cut node: @4_3_1___main___Net_construct_20:ValueNode_64{[0]: ValueNode Return, [1]: CNode_22} [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.439 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1129] CompileGraphs] [PROF]CompileSubGraph costs 196.706 msec. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.465 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:976] ExportCompileCacheKBK] Compile cache: disable by front compile cache config. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.530 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1143] CompileGraphs] Status record: construct the graph compiler info. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.577 [mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc:1001] Parse] Control node parser is not inited. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.629 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:859] Transform] Graph(kernel_graph_0) transforms actor begin, strategy:pipeline [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.794 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:480] InitGraphParameterStore] Init graph parameter store: kernel_graph_0, outer size: 2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.836 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 0, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.868 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:487] InitGraphParameterStore] Init store inner: outer index: 1, inner size: 1, parameter: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.921 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d2f8be0 for node:ValueNode 0 node addr:0x5ba42800 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.948 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d2f8be0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.976 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30c280 for node:ValueNode (0, 0, 2) node addr:0x5ba43be0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.811.995 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30c280 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.017 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30c810 for node:ValueNode (2, 0, 0) node addr:0x5ba46100 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.034 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30c810 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.057 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30d000 for node:ValueNode (4, 4, 4) node addr:0x5ba463f0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.073 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30d000 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.093 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30d4e0 for node:ValueNode 1 node addr:0x5ba42170 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.110 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30d4e0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.133 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30dcf0 for node:ValueNode (1, 1, 1) node addr:0x5ba44060 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.160 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30dcf0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.183 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30e4e0 for node:ValueNode (2, 4, 4) node addr:0x5ba44fe0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.200 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30e4e0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.221 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30ecd0 for node:ValueNode (0, 2, 0) node addr:0x5ba44d50 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.236 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30ecd0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.256 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30f1b0 for node:ValueNode 2 node addr:0x5ba42930 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.271 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30f1b0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.292 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:207] AddDeviceTensorStore] Add device tensor store:0x5d30f9c0 for node:ValueNode (2, 2, 4) node addr:0x5ba43d10 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.307 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30f9c0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.445 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:442] ChangeGraphMode] Enable kbk subgraph execute and set run mode for graph: 0 to GraphMode. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.464 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:462] TryEnableKbkSubGraphExecMode] Enable kbk subgraph execute mode for actor set: kernel_graph_0 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.525 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:391] TryEnableInputOptimize] Enable input optimize for actor set: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.563 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_y for host data source actor. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.606 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_y for front node:@4_3_1___main___Net_construct_20:param_y index:0 position:1 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.645 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1914] BuildGraphParameterStore] Init backend input node:@kernel_graph0:param_x for host data source actor. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.682 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1945] BuildGraphParameterStore] Build graph parameter :@kernel_graph0:param_x for front node:@4_3_1___main___Net_construct_20:param_x index:0 position:0 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.727 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2268] BuildDataPrepareActorForGraphParameterStore] Create data prepare actor: kernel_graph_0_DataPrepareActor [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.852 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2224] BuildLoopCountActor] Create loop count actor: kernel_graph_0_LoopCountActor [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.881 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:2257] BuildOutputActor] Create output actor: kernel_graph_0_OutputActor [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.812.961 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1509] CacheGraphOutputToActor] Cache graph 0 output node:Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} with index:0 to actor:kernel_graph0_SuperKernelActor, from front node:Default/AllGather-op2 debug string:@4_3_1___main___Net_construct_20:CNode_22{[0]: ValueNode AllGather, [1]: CNode_23} with index:0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.144 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.169 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d30b1c0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.199 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:63] UpdateDataArrowRefCount] Process shape depend attribute for actor : kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.216 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d30b590 origin ref count:2 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.313 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1711] Link] [PROF]GraphSchedulerLinkSinkMode costs 0.211 msec. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.345 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.366 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph0_SuperKernelActor@ to actor:kernel_graph_0_LoopCountActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.383 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_LoopCountActor@ to actor:kernel_graph_0_OutputActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.402 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_OutputActor@ to actor:kernel_graph_0_DataPrepareActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.418 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:3713] LinkControlArrowForCopyActor] Link control arrow for copy actor start, copy actor size:0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.449 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d314510 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.484 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:556] AddResultArrow] Add result arrow from actor:kernel_graph0_SuperKernelActor to actor:kernel_graph_0_OutputActor@ from kernel@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} device address:0x5d314510 original ref count:18446744073709551615 ref count:18446744073709551615 dynamic ref count:2147483647 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.574 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.620 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 3. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.660 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.696 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.730 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.768 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 5. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.813.993 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.036 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 6. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.098 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.137 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 1. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.211 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.250 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 7. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.300 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.337 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.410 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.449 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 8. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.501 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.536 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 9. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.572 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.615 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 10. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.647 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.681 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:1358] ProcessStreamSendRecvEventPair] Process event pair id : 11. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.700 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 4, send_actor : 0x5ba8fe90, recv_actor : 0x5ba90890. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.716 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 8, send_actor : 0x5d323310, recv_actor : 0x5d323b20. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.732 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 3, send_actor : 0x5ba6f500, recv_actor : 0x5ba8f680. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.747 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 11, send_actor : 0x5d3272c0, recv_actor : 0x5d327d00. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.762 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 10, send_actor : 0x5d325e40, recv_actor : 0x5d326880. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.776 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 1, send_actor : 0x5d31ba70, recv_actor : 0x5d31c3b0. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.791 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 5, send_actor : 0x5ba912d0, recv_actor : 0x5d314f80. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.806 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 6, send_actor : 0x5d319c10, recv_actor : 0x5d31a5e0. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.821 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 7, send_actor : 0x5d31e0c0, recv_actor : 0x5d31ea00. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.836 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 9, send_actor : 0x5d324b40, recv_actor : 0x5d325400. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.814.850 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1221] BuildKernelActors] Stream send/recv pair : 2, send_actor : 0x5d31fcf0, recv_actor : 0x5d320b50. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.105 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op0 addr:0x5d30be90 type:48, kernel tensor addr:0x5d30bc20, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.221 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op1 addr:0x5d30feb0 type:48, kernel tensor addr:0x5d30fc40, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.297 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_31{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_30, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.396 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op2 addr:0x5d3103c0 type:48, kernel tensor addr:0x5d310150, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.456 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_32{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_31, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.533 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op3 addr:0x5d3108d0 type:48, kernel tensor addr:0x5d310660, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.623 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op4 addr:0x5d310de0 type:48, kernel tensor addr:0x5d310b70, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.680 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_34{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_33, [2]: ValueNode (0, 2, 0), [3]: ValueNode (2, 4, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.766 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/StridedSlice-op5 addr:0x5d3112f0 type:48, kernel tensor addr:0x5d311080, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.821 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_35{[0]: ValueNode PrimFunc_StridedSlice, [1]: CNode_34, [2]: ValueNode (0, 0, 2), [3]: ValueNode (2, 2, 4), [4]: ValueNode (1, 1, 1), [5]: ValueNode 0, [6]: ValueNode 0, [7]: ValueNode 0, [8]: ValueNode 0, [9]: ValueNode 0} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.815.949 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Mul-op0 addr:0x5d311800 type:48, kernel tensor addr:0x5d311590, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.009 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_36{[0]: ValueNode PrimFunc_Mul, [1]: CNode_32, [2]: CNode_35} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.076 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op0 addr:0x5d311d10 type:48, kernel tensor addr:0x5d311aa0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.116 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_39{[0]: ValueNode AllGather, [1]: CNode_36} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.195 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op0 addr:0x5d312290 type:48, kernel tensor addr:0x5d3120b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.221 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op0 addr:0x5d312780 type:48, kernel tensor addr:0x5d3125a0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 2)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.260 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_42{[0]: ValueNode PrimFunc_Split, [1]: CNode_39, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.331 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op0 addr:0x5d312c00 type:48, kernel tensor addr:0x5d312990, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.376 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_43{[0]: ValueNode PrimFunc_Concat, [1]: CNode_44, [2]: CNode_45, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.438 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op1 addr:0x5d313110 type:48, kernel tensor addr:0x5d312ea0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.475 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_48{[0]: ValueNode AllGather, [1]: CNode_43} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.545 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Split-op1 addr:0x5d313690 type:48, kernel tensor addr:0x5d3134b0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.573 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[1] info for node:Default/Split-op1 addr:0x5d313b80 type:48, kernel tensor addr:0x5d3139a0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 2, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.673 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_51{[0]: ValueNode PrimFunc_Split, [1]: CNode_48, [2]: ValueNode 0, [3]: ValueNode 2} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.759 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/Concat-op1 addr:0x5d314000 type:48, kernel tensor addr:0x5d313d90, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (2, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.807 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_52{[0]: ValueNode PrimFunc_Concat, [1]: CNode_53, [2]: CNode_54, [3]: ValueNode 1} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.864 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:222] InitOutputInfo] Init output[0] info for node:Default/AllGather-op2 addr:0x5d314510 type:48, kernel tensor addr:0x5d3142a0, kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.816.901 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:163] InitMultiStreamInfo] cnode : @kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} is thread safe. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.054 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op1 input kernel:Default/StridedSlice-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.079 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d30be90 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.122 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op2 input kernel:Default/StridedSlice-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.141 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d30feb0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.187 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op4 input kernel:Default/StridedSlice-op3 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.206 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d3108d0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.244 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/StridedSlice-op5 input kernel:Default/StridedSlice-op4 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.262 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d310de0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.298 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op2 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.326 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d3103c0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.357 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Mul-op0 input kernel:Default/StridedSlice-op5 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.374 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d3112f0 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.400 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op0 input kernel:Default/Mul-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.416 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d311800 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.441 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op0 input kernel:Default/AllGather-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.459 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d311d10 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.483 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.499 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d312290 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.518 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op0 input kernel:Default/Split-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.534 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d312780 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.558 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op1 input kernel:Default/Concat-op0 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.574 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d312c00 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.598 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Split-op1 input kernel:Default/AllGather-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.614 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d313110 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.637 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.662 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d313690 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.682 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/Concat-op1 input kernel:Default/Split-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.698 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d313b80 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.722 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1462] LinkKernelActorByDeviceType] Kernel:Default/AllGather-op2 input kernel:Default/Concat-op1 need copy:1 for actor:kernel_graph0_SuperKernelActor@ [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.738 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:491] UpdateRefCount] Add origin ref count for device address:0x5d314000 origin ref count:2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.781 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[y] debug_name: @kernel_graph0:param_y use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.806 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:1329] LinkKernelActors] SuperKernelActor: kernel_graph0_SuperKernelActor Parameter[x] debug_name: @kernel_graph0:param_x use count is: 1 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.828 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1603] AddControlArrowForNoInputActor] Add control arrow for no input arrow actor: kernel_graph0_SuperKernelActor [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.847 [mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc:609] AddControlArrow] Add control arrow from actor:kernel_graph_0_DataPrepareActor@ to actor:kernel_graph0_SuperKernelActor@ [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.817.961 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:887] Transform] [PROF]GraphSchedulerLink costs 4.965 msec. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.047 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 1_actor_set_kernel_graph_0_invalid_data_arrow_elimination in 1.39 us [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.091 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 2_actor_set_kernel_graph_0_multi_actor_fusion in 16.5401 us [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.113 [mindspore/ccsrc/runtime/graph_scheduler/optimizer/optimizer.cc:54] Optimize] Run pass 3_actor_set_kernel_graph_0_batch_data_arrow_fusion in 1.04995 us [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.138 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:904] Transform] Graph(kernel_graph_0) transforms actor end. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.204 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:354] Init] kernel_graph_0 has the parameter input num: 2 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.256 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1153] CompileGraphs] [PROF]GraphScheduler costs 6.688 msec. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.284 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:29] operator()] Create MultiStreamController. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.317 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:40] Refresh] Stream manager initialize, device_context : 0x48cbe720, stream_size : 5. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.338 [mindspore/ccsrc/runtime/device/multi_stream_controller.cc:214] Resize] Task id on stream manager initialize : 0, stream_size : 5. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.365 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1166] CompileGraphs] [PROF]compile_backend_graph costs 2446.89 msec. [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.395 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1169] CompileGraphs] Status record: end compile function graph: 4_3_1___main___Net_construct_20, produce actor: kernel_graph_0 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.424 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end task_emit action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.445 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.478 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:281] SetLoopCount] Change vm_loop_flag to 0, set loop_size to 1 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.511 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1701] operator()] Status record: start execute action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.539 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1703] operator()] Status record: end execute action. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.818.556 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1709] operator()] Extra status record: total func graphs: 1, total nodes: 43 TotalTime = 2.53823, [21] [bootstrap]: 0.00154018 [type_inference]: 0.0170252 [auto_monad]: 0.0003499 [graph_reusing]: 9.17601e-05 [inline]: 0.0157016, [2] [rewriter_before_opt_a]: 0.00017308 [a1a2]: 0.0153986, [2] [Cycle 1]: 0.00247662, [11] [expand_dump_flag]: 5.53997e-06 [switch_simplify]: 0.00011803 [loop_unroll]: 9.81599e-05 [a_1]: 0.00039949 [recompute_prepare]: 3.15501e-05 [updatestate_depend_eliminate]: 1.23399e-05 [updatestate_assign_eliminate]: 5.1799e-06 [updatestate_loads_eliminate]: 3.94997e-06 [parameter_eliminate]: 1.003e-05 [a_2]: 0.00081181 [parallel_inline_pass]: 3.96e-05 [Cycle 2]: 0.00172411, [11] [expand_dump_flag]: 4.42995e-06 [switch_simplify]: 3.087e-05 [loop_unroll]: 2.679e-05 [a_1]: 0.00018445 [recompute_prepare]: 2.99e-05 [updatestate_depend_eliminate]: 1.27101e-05 [updatestate_assign_eliminate]: 5.21005e-06 [updatestate_loads_eliminate]: 3.92005e-06 [parameter_eliminate]: 5.54998e-06 [a_2]: 0.0008093 [parallel_inline_pass]: 3.835e-05 [parallel-infer-symbol]: 0.00018515 [pre_auto_parallel]: 0.00014339 [insert-virtual-dataset]: 0.00129469 [parallel-infer-symbol-second]: 6.086e-05 [dataset_repeat_opt]: 0.00039017 [pipeline_split]: 0.00014402 [optimize]: 0.0515304, [52] [py_interpret_to_execute]: 6.908e-05 [rewriter_before_opt_a]: 9.527e-05 [opt_a]: 0.0416304, [3] [Cycle 1]: 0.0226344, [46] [expand_dump_flag]: 3.75998e-06 [switch_simplify]: 4.795e-05 [loop_unroll]: 3.316e-05 [a_1]: 0.00038512 [recompute_prepare]: 3.53301e-05 [updatestate_depend_eliminate]: 1.339e-05 [updatestate_assign_eliminate]: 7.43009e-06 [updatestate_loads_eliminate]: 7.56001e-06 [parameter_eliminate]: 8.27003e-06 [a_2]: 0.00092351 [accelerated_algorithm]: 4.385e-05 [shard]: 4.339e-05 [meta_shard_fg_expand]: 6.23998e-06 [shard_inline]: 3.814e-05 [auto_parallel]: 6.588e-05 [parallel]: 0.0121175 [flash_sp]: 3.206e-05 [merge_comm]: 3.894e-05 [allreduce_fusion]: 3.15399e-05 [matmul_add_comm_reduction]: 4.005e-05 [allreduce_slice_to_reducescatter]: 7.40052e-07 [virtual_shard_identity]: 0.00010333 [virtual_dataset]: 0.00015969 [get_grad_eliminate_]: 8.358e-05 [virtual_output]: 0.00010328 [merge_forward]: 3.44299e-05 [cell_reuse_recompute_pass]: 6.37001e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00017683 [before_grad]: 0.00011503 [inplace_validation]: 3.20399e-05 [parallel_renormalize]: 0.00352364 [update_top_fg]: 1.30001e-06 [cast_eliminate]: 0.00010136 [meta_fg_expand]: 3.539e-05 [inplace_validation_after_expand]: 5.044e-05 [flash_sp_send_recv_attached]: 6.28099e-05 [receive_attached]: 6.31006e-06 [after_resolve]: 9.19801e-05 [a_after_grad]: 0.00011129 [special_op_eliminate]: 7.46399e-05 [renormalize]: 1.10012e-07 [add_forward_monad_depend]: 1.173e-05 [auto_monad_grad]: 5.62996e-06 [auto_monad_eliminator]: 6.07801e-05 [cse]: 0.00020185 [a_3]: 0.00076643 [Cycle 2]: 0.0114002, [46] [expand_dump_flag]: 4.24997e-06 [switch_simplify]: 7.89e-05 [loop_unroll]: 7.931e-05 [a_1]: 0.0017826 [recompute_prepare]: 8.137e-05 [updatestate_depend_eliminate]: 3.88899e-05 [updatestate_assign_eliminate]: 2.575e-05 [updatestate_loads_eliminate]: 2.42901e-05 [parameter_eliminate]: 7.77992e-06 [a_2]: 0.0016423 [accelerated_algorithm]: 0.00016699 [shard]: 4.09901e-05 [meta_shard_fg_expand]: 1.60499e-05 [shard_inline]: 7.74701e-05 [auto_parallel]: 7.851e-05 [parallel]: 1.64401e-05 [flash_sp]: 4.292e-05 [merge_comm]: 3.39099e-05 [allreduce_fusion]: 2.924e-05 [matmul_add_comm_reduction]: 2.898e-05 [allreduce_slice_to_reducescatter]: 6.79982e-07 [virtual_shard_identity]: 7.881e-05 [virtual_dataset]: 7.09799e-05 [get_grad_eliminate_]: 6.85201e-05 [virtual_output]: 6.825e-05 [merge_forward]: 2.90599e-05 [cell_reuse_recompute_pass]: 7.30995e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00015844 [before_grad]: 0.0001021 [inplace_validation]: 2.912e-05 [parallel_renormalize]: 3.00002e-07 [update_top_fg]: 9.00007e-07 [cast_eliminate]: 7.02899e-05 [meta_fg_expand]: 3.162e-05 [inplace_validation_after_expand]: 3.754e-05 [flash_sp_send_recv_attached]: 2.8701e-06 [receive_attached]: 2.46998e-06 [after_resolve]: 8.03999e-05 [a_after_grad]: 0.00010092 [special_op_eliminate]: 6.722e-05 [renormalize]: 0.00264104 [add_forward_monad_depend]: 7.90006e-06 [auto_monad_grad]: 4.15999e-06 [auto_monad_eliminator]: 5.24799e-05 [cse]: 0.00015741 [a_3]: 0.0006694 [Cycle 3]: 0.00752851, [46] [expand_dump_flag]: 3.02005e-06 [switch_simplify]: 7.263e-05 [loop_unroll]: 6.77999e-05 [a_1]: 0.00156158 [recompute_prepare]: 7.00201e-05 [updatestate_depend_eliminate]: 3.31e-05 [updatestate_assign_eliminate]: 2.50899e-05 [updatestate_loads_eliminate]: 2.51899e-05 [parameter_eliminate]: 4.02995e-06 [a_2]: 0.00139476 [accelerated_algorithm]: 7.89601e-05 [shard]: 3.749e-05 [meta_shard_fg_expand]: 1.45e-05 [shard_inline]: 7.03699e-05 [auto_parallel]: 7.05899e-05 [parallel]: 1.621e-05 [flash_sp]: 2.36009e-06 [merge_comm]: 3.39999e-05 [allreduce_fusion]: 2.84399e-05 [matmul_add_comm_reduction]: 3.408e-05 [allreduce_slice_to_reducescatter]: 7.3004e-07 [virtual_shard_identity]: 7.064e-05 [virtual_dataset]: 0.00015634 [get_grad_eliminate_]: 6.693e-05 [virtual_output]: 6.544e-05 [merge_forward]: 2.627e-05 [cell_reuse_recompute_pass]: 4.77e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00013971 [before_grad]: 0.00010707 [inplace_validation]: 2.562e-05 [parallel_renormalize]: 6.00703e-08 [update_top_fg]: 1.25007e-06 [cast_eliminate]: 6.797e-05 [meta_fg_expand]: 3.17e-05 [inplace_validation_after_expand]: 3.599e-05 [flash_sp_send_recv_attached]: 2.48989e-06 [receive_attached]: 2.14996e-06 [after_resolve]: 7.34901e-05 [a_after_grad]: 0.00010857 [special_op_eliminate]: 6.678e-05 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 3.14997e-06 [auto_monad_grad]: 3.53996e-06 [auto_monad_eliminator]: 4.328e-05 [cse]: 0.00014343 [a_3]: 0.00064642 [py_interpret_to_execute_after_opt_a]: 8.31899e-05 [slice_cell_reuse_recomputed_activation]: 3.694e-05 [rewriter_after_opt_a]: 0.00041856 [convert_after_rewriter]: 7.324e-05 [order_py_execute_after_rewriter]: 6.072e-05 [opt_b]: 0.00276124, [1] [Cycle 1]: 0.00271512, [7] [b_1]: 0.00210445 [b_2]: 7.211e-05 [updatestate_depend_eliminate]: 3.011e-05 [updatestate_assign_eliminate]: 2.46201e-05 [updatestate_loads_eliminate]: 2.393e-05 [renormalize]: 4.70085e-07 [cse]: 0.000132 [optimize_parallel_all_gather_comm]: 7.58e-05 [overlap_param_gather]: 3.461e-05 [cconv]: 7.959e-05 [loop_unroll]: 0.00093763 [opt_after_cconv]: 0.00092147, [1] [Cycle 1]: 0.00088088, [7] [c_1]: 0.00028441 [parameter_eliminate]: 3.93006e-06 [updatestate_depend_eliminate]: 3.156e-05 [updatestate_assign_eliminate]: 2.53801e-05 [updatestate_loads_eliminate]: 7.17599e-05 [cse]: 0.00013386 [renormalize]: 6.79982e-07 [remove_dup_value]: 0.00034297 [tuple_transform]: 0.00050335, [1] [Cycle 1]: 0.00045677, [2] [d_1]: 0.00036377 [renormalize]: 4.7998e-07 [partial_unused_args_eliminate]: 3.686e-05 [add_cache_embedding]: 8.66e-05 [add_recomputation]: 0.00025324 [cse_after_recomputation]: 0.00017752, [1] [Cycle 1]: 0.00013087, [1] [cse]: 7.917e-05 [environ_conv]: 8.324e-05 [swap_dp_allreduce_reducescatter]: 6.289e-05 [bias_add_comm_swap]: 3.707e-05 [label_micro_interleaved_index]: 3.476e-05 [label_fine_grained_interleaved_index]: 3.607e-05 [merge_cast_opt]: 3.24601e-05 [slice_recompute_activation]: 6.5e-05 [micro_interleaved_order_control]: 3.60401e-05 [assign_add_opt]: 0.00022519 [ForceFp32Comm]: 3.63401e-05 [remove_cast_before_assign_add]: 5.75101e-05 [full_micro_interleaved_order_control]: 3.536e-05 [reorder_send_recv_between_fp_bp]: 3.584e-05 [comm_op_add_attrs]: 9.669e-05 [add_comm_op_reuse_tag]: 9.796e-05 [interleave_split_concat_branches]: 3.385e-05 [interleave_parallel_branches]: 3.289e-05 [overlap_opt_shard_in_pipeline]: 6.89599e-05 [overlap_opt_shard_grad_in_pipeline]: 3.67099e-05 [control_data_broadcast_order]: 4.046e-05 [grouped_pairwise_exchange_alltoall]: 4.70299e-05 [offloading_packed_experts]: 8.219e-05 [overlap_recompute_and_grad_model_parallel]: 3.589e-05 [overlap_grad_matmul_and_grad_allreduce]: 3.20299e-05 [overlap_recompute_allgather_and_fa_grad]: 5.00201e-05 [overlap_grad_ring_attention]: 8.057e-05 [overlap_grad_flash_sp]: 7.155e-05 [begin_end_overlap_inline]: 3.26301e-05 [split_matmul_comm_elemetwise]: 4.649e-05 [split_layernorm_comm]: 3.45199e-05 [handle_group_info]: 3.893e-05 [symbol_engine_optimizer]: 0.00065326, [1] [Cycle 1]: 0.0006035, [6] [build]: 4.013e-05 [elim_shapecalc]: 6.62101e-05 [elim_not_effective]: 9.84699e-05 [opt_reshape]: 5.99601e-05 [fold_const_symbol]: 8.295e-05 [renormalize]: 4.7998e-07 [pipeline_parallel_scheduler]: 5.427e-05 [auto_monad_reorder]: 0.00012746 [get_jit_bprop_graph]: 4.862e-05 [rewriter_after_jit_bprop_graph]: 4.91401e-05 [eliminate_special_op_node]: 0.00098149 [distribtued_split]: 0.00026028 [validate]: 0.00019165 [task_emit]: 2.44744 [execute]: 6.149e-05 Sums bootstrap : 0.001540s : 0.06% type_inference : 0.017025s : 0.68% auto_monad : 0.000350s : 0.01% graph_reusing : 0.000092s : 0.00% inline.rewriter_before_opt_a : 0.000173s : 0.01% inline.a1a2.expand_dump_flag : 0.000010s : 0.00% inline.a1a2.switch_simplify : 0.000149s : 0.01% inline.a1a2.loop_unroll : 0.000125s : 0.00% inline.a1a2.a_1 : 0.000584s : 0.02% inline.a1a2.recompute_prepare : 0.000061s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000025s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000010s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000008s : 0.00% inline.a1a2.parameter_eliminate : 0.000016s : 0.00% inline.a1a2.a_2 : 0.001621s : 0.06% inline.a1a2.parallel_inline_pass : 0.000078s : 0.00% parallel-infer-symbol : 0.000185s : 0.01% pre_auto_parallel : 0.000143s : 0.01% insert-virtual-dataset : 0.001295s : 0.05% parallel-infer-symbol-second : 0.000061s : 0.00% dataset_repeat_opt : 0.000390s : 0.02% pipeline_split : 0.000144s : 0.01% optimize.py_interpret_to_execute : 0.000069s : 0.00% optimize.rewriter_before_opt_a : 0.000095s : 0.00% optimize.opt_a.expand_dump_flag : 0.000011s : 0.00% optimize.opt_a.switch_simplify : 0.000199s : 0.01% optimize.opt_a.loop_unroll : 0.000180s : 0.01% optimize.opt_a.a_1 : 0.003729s : 0.15% optimize.opt_a.recompute_prepare : 0.000187s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000085s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000058s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000057s : 0.00% optimize.opt_a.parameter_eliminate : 0.000020s : 0.00% optimize.opt_a.a_2 : 0.003961s : 0.16% optimize.opt_a.accelerated_algorithm : 0.000290s : 0.01% optimize.opt_a.shard : 0.000122s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000037s : 0.00% optimize.opt_a.shard_inline : 0.000186s : 0.01% optimize.opt_a.auto_parallel : 0.000215s : 0.01% optimize.opt_a.parallel : 0.012150s : 0.48% optimize.opt_a.flash_sp : 0.000077s : 0.00% optimize.opt_a.merge_comm : 0.000107s : 0.00% optimize.opt_a.allreduce_fusion : 0.000089s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000103s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000253s : 0.01% optimize.opt_a.virtual_dataset : 0.000387s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000219s : 0.01% optimize.opt_a.virtual_output : 0.000237s : 0.01% optimize.opt_a.merge_forward : 0.000090s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000018s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000475s : 0.02% optimize.opt_a.before_grad : 0.000324s : 0.01% optimize.opt_a.inplace_validation : 0.000087s : 0.00% optimize.opt_a.parallel_renormalize : 0.003524s : 0.14% optimize.opt_a.update_top_fg : 0.000003s : 0.00% optimize.opt_a.cast_eliminate : 0.000240s : 0.01% optimize.opt_a.meta_fg_expand : 0.000099s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000124s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000068s : 0.00% optimize.opt_a.receive_attached : 0.000011s : 0.00% optimize.opt_a.after_resolve : 0.000246s : 0.01% optimize.opt_a.a_after_grad : 0.000321s : 0.01% optimize.opt_a.special_op_eliminate : 0.000209s : 0.01% optimize.opt_a.renormalize : 0.002641s : 0.10% optimize.opt_a.add_forward_monad_depend : 0.000023s : 0.00% optimize.opt_a.auto_monad_grad : 0.000013s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000157s : 0.01% optimize.opt_a.cse : 0.000503s : 0.02% optimize.opt_a.a_3 : 0.002082s : 0.08% optimize.py_interpret_to_execute_after_opt_a : 0.000083s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000037s : 0.00% optimize.rewriter_after_opt_a : 0.000419s : 0.02% optimize.convert_after_rewriter : 0.000073s : 0.00% optimize.order_py_execute_after_rewriter : 0.000061s : 0.00% optimize.opt_b.b_1 : 0.002104s : 0.08% optimize.opt_b.b_2 : 0.000072s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000030s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000025s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000024s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000132s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000076s : 0.00% optimize.overlap_param_gather : 0.000035s : 0.00% optimize.cconv : 0.000080s : 0.00% optimize.loop_unroll : 0.000938s : 0.04% optimize.opt_after_cconv.c_1 : 0.000284s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000032s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000025s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000072s : 0.00% optimize.opt_after_cconv.cse : 0.000134s : 0.01% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000343s : 0.01% optimize.tuple_transform.d_1 : 0.000364s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000037s : 0.00% optimize.add_cache_embedding : 0.000087s : 0.00% optimize.add_recomputation : 0.000253s : 0.01% optimize.cse_after_recomputation.cse : 0.000079s : 0.00% optimize.environ_conv : 0.000083s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000063s : 0.00% optimize.bias_add_comm_swap : 0.000037s : 0.00% optimize.label_micro_interleaved_index : 0.000035s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000036s : 0.00% optimize.merge_cast_opt : 0.000032s : 0.00% optimize.slice_recompute_activation : 0.000065s : 0.00% optimize.micro_interleaved_order_control : 0.000036s : 0.00% optimize.assign_add_opt : 0.000225s : 0.01% optimize.ForceFp32Comm : 0.000036s : 0.00% optimize.remove_cast_before_assign_add : 0.000058s : 0.00% optimize.full_micro_interleaved_order_control : 0.000035s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000036s : 0.00% optimize.comm_op_add_attrs : 0.000097s : 0.00% optimize.add_comm_op_reuse_tag : 0.000098s : 0.00% optimize.interleave_split_concat_branches : 0.000034s : 0.00% optimize.interleave_parallel_branches : 0.000033s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000069s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000037s : 0.00% optimize.control_data_broadcast_order : 0.000040s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000047s : 0.00% optimize.offloading_packed_experts : 0.000082s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000036s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000032s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000050s : 0.00% optimize.overlap_grad_ring_attention : 0.000081s : 0.00% optimize.overlap_grad_flash_sp : 0.000072s : 0.00% optimize.begin_end_overlap_inline : 0.000033s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000046s : 0.00% optimize.split_layernorm_comm : 0.000035s : 0.00% optimize.handle_group_info : 0.000039s : 0.00% optimize.symbol_engine_optimizer.build : 0.000040s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000066s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000098s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000060s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000083s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000054s : 0.00% auto_monad_reorder : 0.000127s : 0.01% get_jit_bprop_graph : 0.000049s : 0.00% rewriter_after_jit_bprop_graph : 0.000049s : 0.00% eliminate_special_op_node : 0.000981s : 0.04% distribtued_split : 0.000260s : 0.01% validate : 0.000192s : 0.01% task_emit : 2.447436s : 97.29% execute : 0.000061s : 0.00% Time group info: ------[substitution.] 0.000811 353 15.70% : 0.000127s : 10: substitution.arithmetic_simplify 1.25% : 0.000010s : 21: substitution.elim_not_effective 3.10% : 0.000025s : 14: substitution.float_tuple_getitem_switch 1.20% : 0.000010s : 21: substitution.fold_const_symbol 3.57% : 0.000029s : 30: substitution.graph_param_transform 12.10% : 0.000098s : 1: substitution.inline 5.65% : 0.000046s : 66: substitution.j_node_and_user_rematch 5.91% : 0.000048s : 4: substitution.less_batch_normalization 2.12% : 0.000017s : 10: substitution.minmaximum_grad 7.38% : 0.000060s : 66: substitution.remove_not_recompute_node 1.71% : 0.000014s : 6: substitution.replace_old_param 9.66% : 0.000078s : 18: substitution.tuple_list_convert_item_index_to_positive 3.52% : 0.000029s : 18: substitution.tuple_list_get_item_const_eliminator 4.53% : 0.000037s : 18: substitution.tuple_list_get_item_depend_reorder 14.34% : 0.000116s : 30: substitution.tuple_list_get_item_eliminator 4.91% : 0.000040s : 18: substitution.tuple_list_get_set_item_eliminator 2.83% : 0.000023s : 1: substitution.virtual_dataset_eliminate 0.53% : 0.000004s : 1: substitution.virtual_output_eliminate ------[type_inference.] 0.016778 2 96.93% : 0.016264s : 1: type_inference.infer 3.07% : 0.000515s : 1: type_inference.specialize ------[replace.] 0.000122 5 13.44% : 0.000016s : 1: replace.inline 43.27% : 0.000053s : 2: replace.tuple_list_get_item_eliminator 33.15% : 0.000041s : 1: replace.virtual_dataset_eliminate 10.15% : 0.000012s : 1: replace.virtual_output_eliminate ------[match.] 0.000130 5 74.89% : 0.000097s : 1: match.inline 6.21% : 0.000008s : 2: match.tuple_list_get_item_eliminator 16.45% : 0.000021s : 1: match.virtual_dataset_eliminate 2.45% : 0.000003s : 1: match.virtual_output_eliminate ------[predicate.] 0.001753 11241 0.81% : 0.000014s : 100: predicate.accumulaten_eliminater 0.43% : 0.000008s : 30: predicate.ad_related_special_op_eliminate 0.87% : 0.000015s : 97: predicate.addn_check_dump 0.77% : 0.000013s : 100: predicate.addn_zero_filter 0.73% : 0.000013s : 100: predicate.adjust_all_reduce_mul_add 2.41% : 0.000042s : 197: predicate.arithmetic_simplify 2.16% : 0.000038s : 226: predicate.cast_eliminate 1.07% : 0.000019s : 126: predicate.check_bprop_eliminate 0.84% : 0.000015s : 97: predicate.compare_switch_simplify 0.25% : 0.000004s : 41: predicate.const_output_eliminate 0.29% : 0.000005s : 30: predicate.convert_tensor_all_eliminate 1.10% : 0.000019s : 102: predicate.convert_tensor_eliminate 0.96% : 0.000017s : 97: predicate.depend_value_elim 0.79% : 0.000014s : 100: predicate.dict_get_item_const_eliminator 0.84% : 0.000015s : 100: predicate.dict_get_item_eliminator 0.81% : 0.000014s : 100: predicate.dict_set_item_eliminator 0.19% : 0.000003s : 30: predicate.elim_not_effective 0.34% : 0.000006s : 30: predicate.elim_shapecalc_of_broadcastargs 1.10% : 0.000019s : 141: predicate.environ_add_const_eliminate 1.10% : 0.000019s : 141: predicate.environ_get_add_eliminate 1.08% : 0.000019s : 141: predicate.environ_get_depend_swap 2.04% : 0.000036s : 238: predicate.environ_get_eliminate 1.07% : 0.000019s : 141: predicate.environ_get_set_eliminate 0.78% : 0.000014s : 103: predicate.exchange_switch_depend_value 1.26% : 0.000022s : 103: predicate.float_depend_g_call 0.91% : 0.000016s : 97: predicate.float_environ_get_switch 1.37% : 0.000024s : 138: predicate.float_tuple_getitem_switch 0.17% : 0.000003s : 30: predicate.fold_const_symbol 1.12% : 0.000020s : 127: predicate.get_grad_eliminate 0.19% : 0.000003s : 30: predicate.graph_param_transform 0.93% : 0.000016s : 97: predicate.incorporate_call 0.86% : 0.000015s : 97: predicate.incorporate_call_switch 5.68% : 0.000100s : 479: predicate.inline 1.44% : 0.000025s : 126: predicate.inline_without_move 0.69% : 0.000012s : 126: predicate.j_node_and_user_rematch 1.03% : 0.000018s : 89: predicate.less_batch_normalization 1.53% : 0.000027s : 173: predicate.list_to_tuple_eliminator_ 2.19% : 0.000038s : 284: predicate.load_eliminater 0.53% : 0.000009s : 41: predicate.loop_unroll_after_grad 1.04% : 0.000018s : 105: predicate.loop_unroll_before_grad 1.63% : 0.000029s : 182: predicate.make_slice_get_slice_eliminator 0.92% : 0.000016s : 97: predicate.merge_addn 1.10% : 0.000019s : 126: predicate.micro_step_allgather_replace 1.06% : 0.000019s : 126: predicate.mini_step_allgather_replace 0.77% : 0.000014s : 100: predicate.minmaximum_grad 0.33% : 0.000006s : 30: predicate.mutable_eliminate 0.32% : 0.000006s : 30: predicate.opt_reshape 0.38% : 0.000007s : 41: predicate.parallel_virtual_node 1.21% : 0.000021s : 103: predicate.partial_defer_inline 1.27% : 0.000022s : 143: predicate.partial_eliminate 0.80% : 0.000014s : 100: predicate.print_const_string_wrapper 0.97% : 0.000017s : 97: predicate.reduce_all_const_elim 0.87% : 0.000015s : 100: predicate.reduce_eliminate 0.75% : 0.000013s : 126: predicate.remove_not_recompute_node 1.55% : 0.000027s : 228: predicate.replace_applicator 0.72% : 0.000013s : 126: predicate.replace_old_param 0.26% : 0.000004s : 41: predicate.reset_defer_inline 0.76% : 0.000013s : 100: predicate.reshape_eliminate 1.15% : 0.000020s : 126: predicate.row_tensor_add_zeros_like 0.36% : 0.000006s : 41: predicate.row_tensor_eliminate 1.23% : 0.000022s : 126: predicate.same_eliminate 0.62% : 0.000011s : 97: predicate.set_cell_output_no_recompute 1.23% : 0.000022s : 127: predicate.shard_identity_eliminate 1.59% : 0.000028s : 167: predicate.special_op_eliminate 1.13% : 0.000020s : 97: predicate.specialize_transform 1.24% : 0.000022s : 126: predicate.split_environ_get_set_with_tuple_value 1.22% : 0.000021s : 126: predicate.stack_unstack_eliminate 2.24% : 0.000039s : 284: predicate.stopgrad_eliminater 0.36% : 0.000006s : 41: predicate.switch_call_monad_eliminater 0.82% : 0.000014s : 103: predicate.switch_defer_inline 1.91% : 0.000033s : 229: predicate.switch_layer_defer_inline 3.25% : 0.000057s : 305: predicate.switch_simplify 0.76% : 0.000013s : 100: predicate.tile_eliminate 0.73% : 0.000013s : 100: predicate.transpose_eliminate 1.46% : 0.000026s : 171: predicate.tuple_list_convert_item_index_to_positive 1.56% : 0.000027s : 171: predicate.tuple_list_get_item_const_eliminator 1.34% : 0.000024s : 171: predicate.tuple_list_get_item_depend_reorder 2.56% : 0.000045s : 270: predicate.tuple_list_get_item_eliminator 1.43% : 0.000025s : 171: predicate.tuple_list_get_set_item_eliminator 2.36% : 0.000041s : 268: predicate.tuple_list_set_item_eliminator 1.50% : 0.000026s : 173: predicate.tuple_to_list_eliminator_ 2.18% : 0.000038s : 284: predicate.updatestate_pure_node_eliminater 3.22% : 0.000056s : 381: predicate.updatestate_useless_node_eliminater 0.37% : 0.000006s : 41: predicate.value_based_eliminate 1.20% : 0.000021s : 130: predicate.virtual_dataset_eliminate 1.16% : 0.000020s : 128: predicate.virtual_output_eliminate 0.38% : 0.000007s : 41: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000518 5 5.46% : 0.000028s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.54% : 0.000490s : 4: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 2.623808 283 0.00% : 0.000041s : 1: ForceFp32Comm 0.59% : 0.015407s : 1: a1a2 0.00% : 0.000091s : 1: add_cache_embedding 0.00% : 0.000104s : 1: add_comm_op_reuse_tag 0.01% : 0.000260s : 1: add_recomputation 0.01% : 0.000232s : 1: assign_add_opt 0.01% : 0.000367s : 1: auto_monad 0.01% : 0.000136s : 1: auto_monad_reorder 0.00% : 0.000037s : 1: begin_end_overlap_inline 0.00% : 0.000042s : 1: bias_add_comm_swap 0.06% : 0.001576s : 1: bootstrap 0.00% : 0.000085s : 1: cconv 0.00% : 0.000102s : 1: comm_op_add_attrs 0.00% : 0.000045s : 1: control_data_broadcast_order 0.00% : 0.000079s : 1: convert_after_rewriter 0.01% : 0.000183s : 1: cse_after_recomputation 0.02% : 0.000404s : 1: dataset_repeat_opt 0.01% : 0.000271s : 1: distribtued_split 0.04% : 0.000993s : 1: eliminate_special_op_node 0.00% : 0.000089s : 1: environ_conv 0.00% : 0.000072s : 1: execute 0.00% : 0.000040s : 1: full_micro_interleaved_order_control 0.00% : 0.000055s : 1: get_jit_bprop_graph 0.00% : 0.000101s : 1: graph_reusing 0.00% : 0.000051s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000043s : 1: handle_group_info 0.60% : 0.015719s : 1: inline 0.05% : 0.001326s : 1: insert-virtual-dataset 0.00% : 0.000037s : 1: interleave_parallel_branches 0.00% : 0.000038s : 1: interleave_split_concat_branches 0.00% : 0.000041s : 1: label_fine_grained_interleaved_index 0.00% : 0.000039s : 1: label_micro_interleaved_index 0.04% : 0.000946s : 1: loop_unroll 0.00% : 0.000036s : 1: merge_cast_opt 0.00% : 0.000040s : 1: micro_interleaved_order_control 0.00% : 0.000086s : 1: offloading_packed_experts 0.03% : 0.000874s : 44: opt.transform.a1a2 0.00% : 0.000064s : 1: opt.transform.loop_unroll_optimizer 0.36% : 0.009400s : 123: opt.transform.opt_a 0.01% : 0.000266s : 1: opt.transform.opt_after_cconv 0.05% : 0.001234s : 27: opt.transform.opt_b 0.01% : 0.000343s : 1: opt.transform.opt_trans_graph 0.01% : 0.000133s : 3: opt.transform.special_op_eliminate 0.01% : 0.000235s : 4: opt.transform.symbol_engine_opt 1.59% : 0.041638s : 1: opt_a 0.04% : 0.000928s : 1: opt_after_cconv 0.11% : 0.002767s : 1: opt_b 1.96% : 0.051544s : 1: optimize 0.00% : 0.000082s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000066s : 1: order_py_execute_after_rewriter 0.00% : 0.000077s : 1: overlap_grad_flash_sp 0.00% : 0.000036s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000086s : 1: overlap_grad_ring_attention 0.00% : 0.000041s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000074s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000040s : 1: overlap_param_gather 0.00% : 0.000054s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000041s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000202s : 1: parallel-infer-symbol 0.00% : 0.000071s : 1: parallel-infer-symbol-second 0.00% : 0.000041s : 1: partial_unused_args_eliminate 0.00% : 0.000061s : 1: pipeline_parallel_scheduler 0.01% : 0.000154s : 1: pipeline_split 0.01% : 0.000159s : 1: pre_auto_parallel 0.00% : 0.000076s : 1: py_interpret_to_execute 0.00% : 0.000089s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000062s : 1: remove_cast_before_assign_add 0.01% : 0.000352s : 1: remove_dup_value 0.16% : 0.004114s : 2: renormalize.infer 0.08% : 0.002019s : 2: renormalize.specialize 0.00% : 0.000040s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000055s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000427s : 1: rewriter_after_opt_a 0.01% : 0.000284s : 2: rewriter_before_opt_a 0.00% : 0.000042s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000070s : 1: slice_recompute_activation 0.00% : 0.000039s : 1: split_layernorm_comm 0.00% : 0.000052s : 1: split_matmul_comm_elemetwise 0.00% : 0.000068s : 1: swap_dp_allreduce_reducescatter 0.03% : 0.000658s : 1: symbol_engine_optimizer 93.28% : 2.447478s : 1: task_emit 0.02% : 0.000509s : 1: tuple_transform 0.65% : 0.017053s : 1: type_inference 0.02% : 0.000394s : 1: validate [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.819.798 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1785] Run] End [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.819.890 [mindspore/ccsrc/pipeline/jit/ps/pass_config.cc:239] SavePassesConfig] Running_passes: ['a1a2.r1.a_1', 'a1a2.r1.a_1.inline', 'opt_a.r1.auto_parallel', 'opt_a.r1.flash_sp', 'opt_a.r1.flash_sp_send_recv_attached', 'opt_a.r1.parallel', 'opt_a.r1.parallel_renormalize', 'opt_a.r1.receive_attached', 'opt_a.r1.virtual_dataset', 'opt_a.r1.virtual_dataset.virtual_dataset_eliminate', 'opt_a.r1.virtual_output', 'opt_a.r1.virtual_output.virtual_output_eliminate', 'opt_a.r2.a_1', 'opt_a.r2.a_1.tuple_list_get_item_eliminator', 'opt_a.r2.accelerated_algorithm', 'opt_a.r2.accelerated_algorithm.less_batch_normalization', 'opt_a.r2.auto_parallel', 'opt_a.r2.flash_sp', 'opt_a.r2.renormalize', 'opt_a.r3.auto_parallel'] [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.819.947 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1047] SaveCompiledGraph] Save compiled func graph(4_3_1___main___Net_construct_20) phase(train.1738915084115859456.281470816413072.0..)! [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.819.987 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1065] SaveCompiledGraph] End save compiled func graph! [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.820.023 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1326] CompileInner] [PROF]ParallelPostProcess costs 0.011 msec. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.820.043 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1154] CleanCompileRes] Clean compile resource start [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.822.099 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1175] CleanCompileRes] Clean compile resource end [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.822.142 [mindspore/ccsrc/pipeline/jit/ps/event_message_print.cc:42] PrintEventMessage] End compiling 'Net.construct'. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.822.174 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1334] CompileInner] [PROF]CleanCompileRes costs 2.123 msec. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.822.191 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1335] CompileInner] Finish compiling. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.822.212 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1336] CompileInner] [PROF]compile_graph costs 2702.64 msec. [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.822.569 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_x, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] PARALLEL(187834,ffffb35e0c10,python):2025-02-07-15:58:06.822.626 [mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc:292] GetParameterLayoutFromGraph] GetParameterLayout parameter: @1___main___Net_construct_5:param_y, layout device arrangement = [ 8 ] tensor map = [ -1 -1 -1 ] tensor shape = [ 4 4 4 ] device arrangement origin = [ 1 1 1 8 ] tensor map origin = [ -1 -1 -1 ] tensor shape origin = [ 4 4 4 ] [INFO] UTILS(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.201 [mindspore/ccsrc/utils/dynamic_obfuscation/registry_opaque_predicate.cc:112] init_calling_count] calling_count_ has been initialized to 0 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.381 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1893] RunGraph] Status record: start run actor: kernel_graph_0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.441 [mindspore/ccsrc/runtime/device/pre_launch_comm.cc:200] PreLaunchCommKernel] No hccl kernel to pre launch [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.472 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1918] RunGraph] [PROF]PreLaunchCommKernel costs 0.043 msec. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.515 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:966] SpawnMultiPipelineActor] Enable runtime asynchronously launch kernel, default actor thread num 5, current actor thread num: 5 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.555 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.611 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.666 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:394] operator()] Init defrag memory step freq. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.686 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:396] operator()] Config defrag memory step freq : . [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.701 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:405] operator()] Defrag memory step freq : 100. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.747 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:638] PrepareDataForDeviceTensorStore] Prepare store data, input tensor size: 0, arg size: 2 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.768 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge/ge_graph_executor.cc:835] AllocGEFixMemory] Start AllocGEFixMemory [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.814 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:652] PrepareDataForDeviceTensorStore] prepare data for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.849 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-0, debug name:ValueNode 0, front node:ValueNode 0 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.874 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 0 front node:ValueNode 0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.895 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d2f8be0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.913 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 0 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:06.823.958 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:132] AllocDeviceMem] Malloc Memory for Pool, size: 1073741824 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.824.590 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-1, debug name:ValueNode (0, 0, 2), front node:ValueNode (0, 0, 2) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.824.693 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 0, 2) front node:ValueNode (0, 0, 2) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.824.718 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30c280 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.824.739 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 0, 2) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.824.994 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-2, debug name:ValueNode (2, 0, 0), front node:ValueNode (2, 0, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.039 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 0, 0) front node:ValueNode (2, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.059 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30c810 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.078 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 0, 0) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.323 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-3, debug name:ValueNode (4, 4, 4), front node:ValueNode (4, 4, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.354 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (4, 4, 4) front node:ValueNode (4, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.372 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30d000 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.391 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (4, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.627 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-4, debug name:ValueNode 1, front node:ValueNode 1 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.656 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 1 front node:ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.675 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30d4e0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.692 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 1 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.930 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-5, debug name:ValueNode (1, 1, 1), front node:ValueNode (1, 1, 1) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.962 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (1, 1, 1) front node:ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.825.982 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30dcf0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.000 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (1, 1, 1) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.251 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-6, debug name:ValueNode (2, 4, 4), front node:ValueNode (2, 4, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.283 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 4, 4) front node:ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.301 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30e4e0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.321 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 4, 4) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.553 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-7, debug name:ValueNode (0, 2, 0), front node:ValueNode (0, 2, 0) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.584 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (0, 2, 0) front node:ValueNode (0, 2, 0) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.603 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30ecd0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.622 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (0, 2, 0) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.853 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-8, debug name:ValueNode 2, front node:ValueNode 2 for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.881 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode 2 front node:ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.899 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30f1b0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.826.916 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode 2 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.827.147 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:659] PrepareDataForDeviceTensorStore] Prepare data for value node:Default/data-9, debug name:ValueNode (2, 2, 4), front node:ValueNode (2, 2, 4) for graph:kernel_graph0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.827.178 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1164] PrepareDataForValueNode] Prepare data for value node:ValueNode (2, 2, 4) front node:ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.827.196 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:488] UpdateRefCount] Set origin ref count max for device address:0x5d30f9c0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.827.226 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:1125] PrepareDataForSequenceAndScalarValue] Prepare device data for value node: ValueNode (2, 2, 4) [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.827.480 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_y front node:@4_3_1___main___Net_construct_20:param_y backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:06.827.523 [mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc:690] PrepareDataForDeviceTensorStore] Backend input node:@kernel_graph0:param_x front node:@4_3_1___main___Net_construct_20:param_x backend is weight:0 front is weight:0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.827.613 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph0_SuperKernelActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe87fff0f0,python):2025-02-07-15:58:06.827.624 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph_0_DataPrepareActor and check running condition:0, sequential num:2001075757 [INFO] GE_ADPT(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.827.822 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.096 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 6_Default/StridedSlice-op0, front node: @4_3_1___main___Net_construct_20:param_x, with index: 0, addr index: 0, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.136 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 6_Default/StridedSlice-op0, outer index: 0, inner index:0, front node: @4_3_1___main___Net_construct_20:param_x [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.198 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 0, inner index: 0, dynamic is 0 [INFO] GE_ADPT(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.247 [mindspore/ccsrc/transform/acl_ir/op_api_util.cc:212] SetDeterministic] Set kernel deterministic value: 0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.432 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 6_Default/StridedSlice-op0, input index: 0, device tensor: 0x5d30b590, ptr: 0x12c7fd801400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.459 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor6_Default/StridedSlice-op0, actor input: 0, graph input: 1, device tensor: 0x5d30b590, ptr: 0x12c7fd801400, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.472 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 0_Default/StreamSend-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.508 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.510 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1180] FetchParameter] Fetch parameter for actor: 9_Default/StridedSlice-op3, front node: @4_3_1___main___Net_construct_20:param_y, with index: 0, addr index: 1, device type: 2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.569 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:1108] PrepareParameter] Prepare parameter input, actor: 9_Default/StridedSlice-op3, outer index: 1, inner index:0, front node: @4_3_1___main___Net_construct_20:param_y [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.599 [mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.cc:909] UpdateDynamicShapeAndSize] No need to update dynamic shape and size, host shape dynamic is 0, graph parameter store outer index: 1, inner index: 0, dynamic is 0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.742 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.780 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 2, actor name : 1_Default/StreamRecv-op0, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.801 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.804 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:657] FetchParameterInput] Actor: 9_Default/StridedSlice-op3, input index: 0, device tensor: 0x5d30b1c0, ptr: 0x12c7fd801800, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.826 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.838 [mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.cc:678] FetchParameterInput] Correct ref count for actor9_Default/StridedSlice-op3, actor input: 0, graph input: 0, device tensor: 0x5d30b1c0, ptr: 0x12c7fd801800, ref cnt: 1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.928 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.930 [mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc:79] RunOpControl] Actor(kernel_graph_0_LoopCountActor) receive the input op control from:kernel_graph0_SuperKernelActor and check running condition:1, sequential num:2001075757 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe87fff0f0,python):2025-02-07-15:58:06.828.950 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:317] RunOpData] Actor(kernel_graph_0_OutputActor) receive the input op data and output position:0 device tensor:0x5d314510 ptr:0 ref count:18446744073709551615 origin ref count:18446744073709551615 dynamic ref count:2147483647 from memory pool:0 output node:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52} index:0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.956 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 2_Default/StreamSend-op1, task_id_on_stream : 2. [INFO] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.828.984 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:57] IncreaseLoopCount] Loop count actor(kernel_graph_0_LoopCountActor) running, loop count: 1, current count: 1, total running count: 1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.828.993 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe87fff0f0,python):2025-02-07-15:58:06.829.027 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:388] CreateOutputTensor] Create output tensor, output node: Default/AllGather-op2 debug string:@kernel_graph0:CNode_57{[0]: ValueNode AllGather, [1]: CNode_52}, output index: 0, output position: 0, output kernel tensor: KernelTensor(Type: Tensor[Complex64], Value: value is null, Shape: (4, 4, 4)) [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:06.829.030 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:51] Wait] Begin wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187834,fffe87fff0f0,python):2025-02-07-15:58:06.829.098 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:470] CreateOutputTensor] Create device tensor:0xfffe780095a0, size: 512 type:48 output node:Default/AllGather-op2 output index:0 output position:0, origin output device tensor: 0x5d314510 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.150 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.175 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 3, actor name : 3_Default/StreamRecv-op1, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.191 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.210 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.299 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.323 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 4_Default/StreamSend-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.340 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.467 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.489 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 4, actor name : 5_Default/StreamRecv-op2, task_id_on_stream : 1. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.506 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.523 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.609 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.661 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 6_Default/StridedSlice-op0, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:06.829.683 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op0 [INFO] GE_ADPT(187834,fffea4ff90f0,python):2025-02-07-15:58:06.831.313 [mindspore/ccsrc/transform/acl_ir/acl_allocator.cc:104] RegisterAllocator] Register AclAllocator [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.282.693 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.282.820 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 7_Default/StridedSlice-op1, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.282.850 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.283.859 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.283.914 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 8_Default/StridedSlice-op2, task_id_on_stream : 6. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.283.938 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.284.912 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.284.974 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 9_Default/StridedSlice-op3, task_id_on_stream : 7. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.285.001 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.285.958 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.286.014 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 10_Default/StridedSlice-op4, task_id_on_stream : 8. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.286.041 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.286.978 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.287.026 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 11_Default/StridedSlice-op5, task_id_on_stream : 9. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.287.051 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.287.990 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.288.042 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 12_Default/Mul-op0, task_id_on_stream : 10. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.288.067 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.288.691 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.288.739 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 13_Default/StreamSend-op3, task_id_on_stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.288.765 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.288.934 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.288.961 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 2, actor name : 14_Default/StreamRecv-op3, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.288.983 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.289.005 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.289.122 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.289.158 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 2, actor name : 15_Default/AllGather-op0, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.289.179 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op0 [INFO] KERNEL(187764,fffe8affd0f0,python):2025-02-07-15:58:12.289.209 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] GE_ADPT(187764,fffe8affd0f0,python):2025-02-07-15:58:12.289.401 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:42] GetLibHandler] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed!/usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(187764,fffe8affd0f0,python):2025-02-07-15:58:12.289.426 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.404.688 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.404.882 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 7_Default/StridedSlice-op1, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.404.911 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.405.968 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.406.030 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 8_Default/StridedSlice-op2, task_id_on_stream : 6. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.406.054 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.407.021 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.407.087 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 9_Default/StridedSlice-op3, task_id_on_stream : 7. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.407.115 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.408.070 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.408.156 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 10_Default/StridedSlice-op4, task_id_on_stream : 8. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.408.180 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.409.203 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.409.265 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 11_Default/StridedSlice-op5, task_id_on_stream : 9. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.409.289 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.410.235 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.410.294 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 12_Default/Mul-op0, task_id_on_stream : 10. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.410.318 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.410.969 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.022 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 13_Default/StreamSend-op3, task_id_on_stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.044 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.204 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.233 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 2, actor name : 14_Default/StreamRecv-op3, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.251 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.272 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.374 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.409 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 2, actor name : 15_Default/AllGather-op0, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.439 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op0 [INFO] KERNEL(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.465 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] GE_ADPT(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.684 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:42] GetLibHandler] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed!/usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(187753,fffe74ff90f0,python):2025-02-07-15:58:12.411.709 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.425.207 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.425.365 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 7_Default/StridedSlice-op1, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.425.392 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.426.402 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.426.459 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 8_Default/StridedSlice-op2, task_id_on_stream : 6. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.426.483 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.427.417 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.427.478 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 9_Default/StridedSlice-op3, task_id_on_stream : 7. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.427.503 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.428.435 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.428.511 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 10_Default/StridedSlice-op4, task_id_on_stream : 8. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.428.533 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.429.508 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.429.563 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 11_Default/StridedSlice-op5, task_id_on_stream : 9. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.429.588 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.430.514 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.430.569 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 12_Default/Mul-op0, task_id_on_stream : 10. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.430.594 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.024 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.073 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 13_Default/StreamSend-op3, task_id_on_stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.093 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.245 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.270 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 2, actor name : 14_Default/StreamRecv-op3, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.286 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.306 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.395 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.425 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 2, actor name : 15_Default/AllGather-op0, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.453 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op0 [INFO] KERNEL(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.477 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] GE_ADPT(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.678 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:42] GetLibHandler] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed!/usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(187742,fffe7affd0f0,python):2025-02-07-15:58:12.431.703 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.444.806 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.445.002 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 7_Default/StridedSlice-op1, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.445.043 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.446.108 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.446.168 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 8_Default/StridedSlice-op2, task_id_on_stream : 6. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.446.193 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.447.155 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.447.218 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 9_Default/StridedSlice-op3, task_id_on_stream : 7. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.447.243 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.448.201 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.448.262 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 10_Default/StridedSlice-op4, task_id_on_stream : 8. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.448.285 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.449.262 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.449.322 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 11_Default/StridedSlice-op5, task_id_on_stream : 9. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.449.345 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.450.298 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.450.359 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 12_Default/Mul-op0, task_id_on_stream : 10. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.450.382 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.045 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.106 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 13_Default/StreamSend-op3, task_id_on_stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.127 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.296 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.323 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 2, actor name : 14_Default/StreamRecv-op3, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.339 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.361 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.462 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.495 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 2, actor name : 15_Default/AllGather-op0, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.512 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op0 [INFO] KERNEL(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.536 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] GE_ADPT(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.748 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:42] GetLibHandler] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed!/usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(187775,fffeacff90f0,python):2025-02-07-15:58:12.451.774 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.453.919 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.453.962 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op0, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.136 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 2, actor name : 16_Default/StreamSend-op4, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.162 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.454.120 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.300 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.331 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 17_Default/StreamRecv-op4, task_id_on_stream : 12. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.347 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.454.331 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 7_Default/StridedSlice-op1, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.454.365 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.401 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.490 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.526 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 18_Default/Split-op0, task_id_on_stream : 13. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:12.454.544 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.455.399 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.455.476 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 8_Default/StridedSlice-op2, task_id_on_stream : 6. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.455.501 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.456.455 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.456.521 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 9_Default/StridedSlice-op3, task_id_on_stream : 7. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.456.546 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.457.495 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.457.582 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 10_Default/StridedSlice-op4, task_id_on_stream : 8. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.457.610 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.239 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.282 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op0, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.438 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 2, actor name : 16_Default/StreamSend-op4, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.460 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.458.548 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.592 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.458.604 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 11_Default/StridedSlice-op5, task_id_on_stream : 9. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.619 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 17_Default/StreamRecv-op4, task_id_on_stream : 12. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.636 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.458.629 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.688 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.778 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.812 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 18_Default/Split-op0, task_id_on_stream : 13. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:12.458.828 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.459.564 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.459.621 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 12_Default/Mul-op0, task_id_on_stream : 10. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.459.646 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.278 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.334 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 13_Default/StreamSend-op3, task_id_on_stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.359 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.536 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.567 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 2, actor name : 14_Default/StreamRecv-op3, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.586 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.608 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.748 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.789 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 2, actor name : 15_Default/AllGather-op0, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.810 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op0 [INFO] KERNEL(187789,fffe9cff90f0,python):2025-02-07-15:58:12.460.836 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] GE_ADPT(187789,fffe9cff90f0,python):2025-02-07-15:58:12.461.026 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:42] GetLibHandler] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed!/usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(187789,fffe9cff90f0,python):2025-02-07-15:58:12.461.052 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.214 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.255 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op0, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.417 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 2, actor name : 16_Default/StreamSend-op4, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.440 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.587 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.616 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 17_Default/StreamRecv-op4, task_id_on_stream : 12. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.647 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.705 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.796 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.834 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 18_Default/Split-op0, task_id_on_stream : 13. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:12.472.851 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.477.492 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.477.531 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op0, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.477.688 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 2, actor name : 16_Default/StreamSend-op4, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.477.716 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.477.859 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.477.891 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 17_Default/StreamRecv-op4, task_id_on_stream : 12. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.477.912 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.477.969 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.478.067 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.478.105 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 18_Default/Split-op0, task_id_on_stream : 13. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:12.478.127 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.506.597 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.506.804 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 7_Default/StridedSlice-op1, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.506.840 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.507.959 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.508.042 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 8_Default/StridedSlice-op2, task_id_on_stream : 6. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.508.095 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.509.141 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.509.216 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 9_Default/StridedSlice-op3, task_id_on_stream : 7. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.509.247 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.510.221 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.510.310 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 10_Default/StridedSlice-op4, task_id_on_stream : 8. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.510.341 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.511.306 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.511.368 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 11_Default/StridedSlice-op5, task_id_on_stream : 9. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.511.395 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.512.351 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.512.415 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 12_Default/Mul-op0, task_id_on_stream : 10. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.512.442 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.097 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.153 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 13_Default/StreamSend-op3, task_id_on_stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.190 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.371 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.403 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 2, actor name : 14_Default/StreamRecv-op3, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.425 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.450 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.559 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.599 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 2, actor name : 15_Default/AllGather-op0, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.623 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op0 [INFO] KERNEL(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.656 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] GE_ADPT(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.877 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:42] GetLibHandler] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed!/usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(187803,fffe857fa0f0,python):2025-02-07-15:58:12.513.906 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.533.951 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.533.995 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op0, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.157 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 2, actor name : 16_Default/StreamSend-op4, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.182 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.321 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.353 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 17_Default/StreamRecv-op4, task_id_on_stream : 12. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.382 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.444 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.539 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.581 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 18_Default/Split-op0, task_id_on_stream : 13. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:12.534.600 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.538.572 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.538.616 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op0, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.538.784 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 2, actor name : 16_Default/StreamSend-op4, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.538.811 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.538.950 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.538.983 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 17_Default/StreamRecv-op4, task_id_on_stream : 12. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.539.016 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.539.076 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.539.172 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.539.218 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 18_Default/Split-op0, task_id_on_stream : 13. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:12.539.241 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.659.141 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.659.287 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 7_Default/StridedSlice-op1, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.659.318 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.660.385 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.660.440 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 8_Default/StridedSlice-op2, task_id_on_stream : 6. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.660.466 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.661.474 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.661.538 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 9_Default/StridedSlice-op3, task_id_on_stream : 7. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.661.565 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.662.542 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.662.599 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 10_Default/StridedSlice-op4, task_id_on_stream : 8. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.662.627 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.663.617 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.663.672 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 11_Default/StridedSlice-op5, task_id_on_stream : 9. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.663.697 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.664.697 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.664.749 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 12_Default/Mul-op0, task_id_on_stream : 10. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.664.775 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.430 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.478 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 13_Default/StreamSend-op3, task_id_on_stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.502 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.677 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.707 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 2, actor name : 14_Default/StreamRecv-op3, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.730 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.753 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.862 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.899 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 2, actor name : 15_Default/AllGather-op0, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.921 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op0 [INFO] KERNEL(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.665.958 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] GE_ADPT(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.666.175 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:42] GetLibHandler] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed!/usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.666.200 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.879.092 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.879.268 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 7_Default/StridedSlice-op1, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.879.299 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.880.348 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.880.409 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 8_Default/StridedSlice-op2, task_id_on_stream : 6. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.880.434 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.881.426 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.881.497 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 9_Default/StridedSlice-op3, task_id_on_stream : 7. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.881.525 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.882.505 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op3 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.882.569 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 10_Default/StridedSlice-op4, task_id_on_stream : 8. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.882.596 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.883.569 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op4 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.883.627 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 11_Default/StridedSlice-op5, task_id_on_stream : 9. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.883.672 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.884.644 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StridedSlice-op5 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.884.708 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 12_Default/Mul-op0, task_id_on_stream : 10. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.884.734 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.350 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Mul-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.403 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 13_Default/StreamSend-op3, task_id_on_stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.427 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.597 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op3 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.627 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 2, actor name : 14_Default/StreamRecv-op3, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.646 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 2, send task id on stream : 11. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.669 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.773 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op3 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.812 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 2, actor name : 15_Default/AllGather-op0, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.834 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op0 [INFO] KERNEL(187834,fffea4ff90f0,python):2025-02-07-15:58:12.885.861 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [INFO] GE_ADPT(187834,fffea4ff90f0,python):2025-02-07-15:58:12.886.082 [mindspore/ccsrc/transform/symbol/symbol_utils.cc:42] GetLibHandler] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed!/usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(187834,fffea4ff90f0,python):2025-02-07-15:58:12.886.121 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.906.641 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.906.682 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op0, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.906.853 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 2, actor name : 16_Default/StreamSend-op4, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.906.881 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.907.023 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.907.055 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 17_Default/StreamRecv-op4, task_id_on_stream : 12. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.907.077 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.907.136 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.907.233 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.907.272 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 18_Default/Split-op0, task_id_on_stream : 13. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:12.907.296 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.912.290 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.912.330 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op0, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.912.510 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 2, actor name : 16_Default/StreamSend-op4, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.912.536 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.912.744 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op4 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.912.778 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 17_Default/StreamRecv-op4, task_id_on_stream : 12. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.912.878 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.912.938 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.913.034 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op4 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.913.075 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 18_Default/Split-op0, task_id_on_stream : 13. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:12.913.096 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.187.339 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.187.510 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 19_Default/Concat-op0, task_id_on_stream : 14. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.187.536 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.053 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.119 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 20_Default/StreamSend-op5, task_id_on_stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.144 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.297 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.326 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 3, actor name : 21_Default/StreamRecv-op5, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.344 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.364 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.460 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.495 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 3, actor name : 22_Default/AllGather-op1, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.513 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op1 [INFO] KERNEL(187753,fffe74ff90f0,python):2025-02-07-15:58:13.190.539 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.256.885 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.256.999 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 19_Default/Concat-op0, task_id_on_stream : 14. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.257.027 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.447 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.512 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 20_Default/StreamSend-op5, task_id_on_stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.537 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.705 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.735 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 3, actor name : 21_Default/StreamRecv-op5, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.754 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.775 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.876 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.916 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 3, actor name : 22_Default/AllGather-op1, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.947 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op1 [INFO] KERNEL(187789,fffe9cff90f0,python):2025-02-07-15:58:13.259.973 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.277.174 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.277.293 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 19_Default/Concat-op0, task_id_on_stream : 14. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.277.322 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.279.901 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.279.969 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 20_Default/StreamSend-op5, task_id_on_stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.279.999 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.280.160 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.280.193 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 3, actor name : 21_Default/StreamRecv-op5, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.280.214 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.280.237 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.280.342 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.280.394 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 3, actor name : 22_Default/AllGather-op1, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.280.416 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op1 [INFO] KERNEL(187803,fffe857fa0f0,python):2025-02-07-15:58:13.280.447 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.448.033 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.448.162 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 19_Default/Concat-op0, task_id_on_stream : 14. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.448.188 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.539 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.603 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 20_Default/StreamSend-op5, task_id_on_stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.625 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.771 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.796 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 3, actor name : 21_Default/StreamRecv-op5, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.812 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.831 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.920 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.950 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 3, actor name : 22_Default/AllGather-op1, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.967 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op1 [INFO] KERNEL(187742,fffe7affd0f0,python):2025-02-07-15:58:13.450.991 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.471.475 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.471.589 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 19_Default/Concat-op0, task_id_on_stream : 14. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.471.613 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.471.946 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.472.035 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 19_Default/Concat-op0, task_id_on_stream : 14. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.472.063 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.205 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.268 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 20_Default/StreamSend-op5, task_id_on_stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.289 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.444 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.468 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 3, actor name : 21_Default/StreamRecv-op5, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.484 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.513 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.474.553 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.614 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.474.611 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 20_Default/StreamSend-op5, task_id_on_stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.474.640 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.652 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 3, actor name : 22_Default/AllGather-op1, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.672 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op1 [INFO] KERNEL(187775,fffeacff90f0,python):2025-02-07-15:58:13.474.696 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.474.801 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.474.832 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 3, actor name : 21_Default/StreamRecv-op5, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.474.851 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.474.872 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.474.975 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.475.010 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 3, actor name : 22_Default/AllGather-op1, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.475.033 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op1 [INFO] KERNEL(187764,fffe8affd0f0,python):2025-02-07-15:58:13.475.060 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.374 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.411 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op1, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.582 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 3, actor name : 23_Default/StreamSend-op6, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.608 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.749 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.779 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 24_Default/StreamRecv-op6, task_id_on_stream : 16. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.799 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.851 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.948 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.490.986 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 25_Default/Split-op1, task_id_on_stream : 17. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.491.009 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.491.710 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.491.759 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 26_Default/Concat-op1, task_id_on_stream : 18. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.491.784 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.384 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.427 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 27_Default/StreamSend-op7, task_id_on_stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.449 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.595 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.625 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 4, actor name : 28_Default/StreamRecv-op7, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.687 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.702 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.803 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.836 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 4, actor name : 29_Default/AllGather-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.856 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op2 [INFO] KERNEL(187764,fffe8affd0f0,python):2025-02-07-15:58:13.492.879 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.493.812 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.493.864 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op1, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.033 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 3, actor name : 23_Default/StreamSend-op6, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.071 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.214 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.242 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 24_Default/StreamRecv-op6, task_id_on_stream : 16. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.258 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.312 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.408 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.444 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 25_Default/Split-op1, task_id_on_stream : 17. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.494.461 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.494.621 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.494.664 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op1, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.494.814 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 3, actor name : 23_Default/StreamSend-op6, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.494.848 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.494.978 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.003 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 24_Default/StreamRecv-op6, task_id_on_stream : 16. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.020 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.069 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.184 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.495.206 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.221 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 25_Default/Split-op1, task_id_on_stream : 17. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.239 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.495.262 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 26_Default/Concat-op1, task_id_on_stream : 18. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.495.286 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.920 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.495.949 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.972 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 26_Default/Concat-op1, task_id_on_stream : 18. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.495.997 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.003 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 27_Default/StreamSend-op7, task_id_on_stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.026 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.163 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.190 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 4, actor name : 28_Default/StreamRecv-op7, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.206 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.232 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.326 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.360 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 4, actor name : 29_Default/AllGather-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.376 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op2 [INFO] KERNEL(187753,fffe74ff90f0,python):2025-02-07-15:58:13.496.394 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.572 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.620 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 27_Default/StreamSend-op7, task_id_on_stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.652 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.787 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.811 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 4, actor name : 28_Default/StreamRecv-op7, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.826 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.853 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.939 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.969 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 4, actor name : 29_Default/AllGather-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.496.986 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op2 [INFO] KERNEL(187742,fffe7affd0f0,python):2025-02-07-15:58:13.497.003 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.033 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.072 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op1, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.234 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 3, actor name : 23_Default/StreamSend-op6, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.256 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.393 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.418 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 24_Default/StreamRecv-op6, task_id_on_stream : 16. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.433 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.484 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.575 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.615 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 25_Default/Split-op1, task_id_on_stream : 17. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.499.634 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.500.361 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.500.421 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 26_Default/Concat-op1, task_id_on_stream : 18. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.500.441 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.085 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.135 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 27_Default/StreamSend-op7, task_id_on_stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.153 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.294 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.320 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 4, actor name : 28_Default/StreamRecv-op7, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.335 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.351 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.445 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.479 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 4, actor name : 29_Default/AllGather-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.497 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op2 [INFO] KERNEL(187775,fffeacff90f0,python):2025-02-07-15:58:13.501.515 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.595.196 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.595.286 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 19_Default/Concat-op0, task_id_on_stream : 14. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.595.324 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.597.763 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.597.822 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 20_Default/StreamSend-op5, task_id_on_stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.597.847 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.598.004 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.598.032 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 3, actor name : 21_Default/StreamRecv-op5, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.598.053 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.598.074 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.598.179 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.598.216 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 3, actor name : 22_Default/AllGather-op1, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.598.238 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op1 [INFO] KERNEL(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.598.264 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.314 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.360 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op1, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.524 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 3, actor name : 23_Default/StreamSend-op6, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.551 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.685 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.715 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 24_Default/StreamRecv-op6, task_id_on_stream : 16. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.734 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.790 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.885 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.928 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 25_Default/Split-op1, task_id_on_stream : 17. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.615.949 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.616.419 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.616.532 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 19_Default/Concat-op0, task_id_on_stream : 14. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.616.561 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.616.697 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.616.755 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 26_Default/Concat-op1, task_id_on_stream : 18. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.616.780 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.395 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.448 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 27_Default/StreamSend-op7, task_id_on_stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.470 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.612 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.640 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 4, actor name : 28_Default/StreamRecv-op7, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.660 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.678 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.775 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.813 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 4, actor name : 29_Default/AllGather-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.833 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op2 [INFO] KERNEL(187789,fffe9cff90f0,python):2025-02-07-15:58:13.617.856 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.147 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op0 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.223 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 20_Default/StreamSend-op5, task_id_on_stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.250 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.413 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op5 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.443 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 3, actor name : 21_Default/StreamRecv-op5, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.462 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 3, send task id on stream : 15. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.483 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.586 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op5 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.623 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 3, actor name : 22_Default/AllGather-op1, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.644 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op1 [INFO] KERNEL(187834,fffea4ff90f0,python):2025-02-07-15:58:13.619.671 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.620.755 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.620.791 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op1, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.620.946 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 3, actor name : 23_Default/StreamSend-op6, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.620.972 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.621.110 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.621.150 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 24_Default/StreamRecv-op6, task_id_on_stream : 16. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.621.171 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.621.225 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.621.320 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.621.358 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 25_Default/Split-op1, task_id_on_stream : 17. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.621.379 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.622.102 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.622.150 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 26_Default/Concat-op1, task_id_on_stream : 18. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.622.174 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.622.769 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.622.814 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 27_Default/StreamSend-op7, task_id_on_stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.622.838 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.622.992 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.623.019 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 4, actor name : 28_Default/StreamRecv-op7, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.623.039 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.623.058 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.623.164 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.623.199 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 4, actor name : 29_Default/AllGather-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.623.221 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op2 [INFO] KERNEL(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.623.244 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.636.622 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.636.705 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op2, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.636.861 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 2, actor name : 30_Default/StreamSend-op8, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.636.886 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.019 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.049 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 31_Default/StreamRecv-op8, task_id_on_stream : 20. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.078 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.096 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.185 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.211 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 3, actor name : 32_Default/StreamSend-op9, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.228 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.354 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.382 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 33_Default/StreamRecv-op9, task_id_on_stream : 21. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.401 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.419 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.506 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.532 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 4, actor name : 34_Default/StreamSend-op10, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.550 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.673 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.698 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1fe1be40, name : Ascend, stream id : 0, actor name : 35_Default/StreamRecv-op10, task_id_on_stream : 22. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.715 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.765 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe9cff90f0,python):2025-02-07-15:58:13.637.871 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:13.637.969 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:13.638.006 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:79] IncreaseLoopCount] Sync stream in the step end. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.638.702 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.638.743 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op1, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.638.904 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 3, actor name : 23_Default/StreamSend-op6, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.638.929 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.639.064 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.639.092 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 24_Default/StreamRecv-op6, task_id_on_stream : 16. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.639.111 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.639.177 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.639.271 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.639.312 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 25_Default/Split-op1, task_id_on_stream : 17. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.639.333 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.640.059 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.640.116 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 26_Default/Concat-op1, task_id_on_stream : 18. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.640.141 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.640.783 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.640.836 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 27_Default/StreamSend-op7, task_id_on_stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.640.860 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.641.006 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.641.034 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 4, actor name : 28_Default/StreamRecv-op7, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.641.053 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.641.072 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.641.169 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.641.202 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 4, actor name : 29_Default/AllGather-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.641.231 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op2 [INFO] KERNEL(187834,fffea4ff90f0,python):2025-02-07-15:58:13.641.252 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.641.520 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.641.563 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op2, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.641.709 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 2, actor name : 30_Default/StreamSend-op8, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.641.730 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.641.861 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.641.886 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 31_Default/StreamRecv-op8, task_id_on_stream : 20. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.641.903 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.641.919 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.002 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.025 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 3, actor name : 32_Default/StreamSend-op9, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.041 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.178 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.202 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 33_Default/StreamRecv-op9, task_id_on_stream : 21. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.217 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.233 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.314 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.335 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 4, actor name : 34_Default/StreamSend-op10, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.349 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.361 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.396 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op2, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.468 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.490 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x2bc02470, name : Ascend, stream id : 0, actor name : 35_Default/StreamRecv-op10, task_id_on_stream : 22. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.505 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.550 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.541 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 2, actor name : 30_Default/StreamSend-op8, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.565 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe7affd0f0,python):2025-02-07-15:58:13.642.635 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op10 [INFO] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:13.642.621 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:96] IncreaseLoopCount] Sync stream success. [DEBUG] RUNTIME_FRAMEWORK(187789,fffe97fff0f0,python):2025-02-07-15:58:13.642.693 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:220] RunOpControl] Actor(kernel_graph_0_OutputActor) receive the input op control and current count:1 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.699 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:13.642.696 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.728 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 31_Default/StreamRecv-op8, task_id_on_stream : 20. [INFO] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:13.642.747 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:79] IncreaseLoopCount] Sync stream in the step end. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.748 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.768 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op8 [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:13.642.788 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1348] SetActorExecutionStrategy] kernel_graph_0 execution count: 1, execution time: 7199.53 ms in multi thread or not: 1. [INFO] UTILS(187789,ffffaa419c10,python):2025-02-07-15:58:13.642.830 [mindspore/ccsrc/utils/utils.cc:415] SkipOrResetCopyAction] Step end, reset copy action flag [INFO] UTILS(187789,ffffaa419c10,python):2025-02-07-15:58:13.642.849 [mindspore/ccsrc/utils/utils.cc:429] SkipOrResetSyncAction] Step end, reset sync action flag [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.859 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op8 [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:58:13.642.888 [mindspore/ccsrc/debug/summary/summary.cc:81] SummaryTensor] This function should be skipped on GE backend. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.888 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 3, actor name : 32_Default/StreamSend-op9, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.642.908 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:13.642.940 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:576] UpdateOutputDeviceAddress] Swap ptr:0x12c83d800600 from device tensor:0x34477770 device type:2 to :0xfffe740095a0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.039 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.067 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 33_Default/StreamRecv-op9, task_id_on_stream : 21. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.094 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.114 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op9 [INFO] VM(187789,ffffaa419c10,python):2025-02-07-15:58:13.643.196 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1943] RunGraph] Status record: end run actor: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.204 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.231 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 4, actor name : 34_Default/StreamSend-op10, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.252 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.380 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.408 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x28b10d60, name : Ascend, stream id : 0, actor name : 35_Default/StreamRecv-op10, task_id_on_stream : 22. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.427 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.477 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb9ffb0f0,python):2025-02-07-15:58:13.643.586 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:13.643.667 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:13.643.734 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:79] IncreaseLoopCount] Sync stream in the step end. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.033 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.085 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op1, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.262 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 3, actor name : 23_Default/StreamSend-op6, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.290 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.428 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op6 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.460 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 24_Default/StreamRecv-op6, task_id_on_stream : 16. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.480 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.540 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.646 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op6 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.694 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 25_Default/Split-op1, task_id_on_stream : 17. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.644.718 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Split-op1 [INFO] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:13.644.725 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:96] IncreaseLoopCount] Sync stream success. [DEBUG] RUNTIME_FRAMEWORK(187742,fffe79ffb0f0,python):2025-02-07-15:58:13.644.793 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:220] RunOpControl] Actor(kernel_graph_0_OutputActor) receive the input op control and current count:1 [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:13.644.880 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1348] SetActorExecutionStrategy] kernel_graph_0 execution count: 1, execution time: 7360.56 ms in multi thread or not: 1. [INFO] UTILS(187742,ffffa187dc10,python):2025-02-07-15:58:13.644.928 [mindspore/ccsrc/utils/utils.cc:415] SkipOrResetCopyAction] Step end, reset copy action flag [INFO] UTILS(187742,ffffa187dc10,python):2025-02-07-15:58:13.644.946 [mindspore/ccsrc/utils/utils.cc:429] SkipOrResetSyncAction] Step end, reset sync action flag [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:58:13.644.978 [mindspore/ccsrc/debug/summary/summary.cc:81] SummaryTensor] This function should be skipped on GE backend. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:13.645.018 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:576] UpdateOutputDeviceAddress] Swap ptr:0x12c83d400600 from device tensor:0x40255b50 device type:2 to :0xfffe640095a0 device type:2 [INFO] VM(187742,ffffa187dc10,python):2025-02-07-15:58:13.645.256 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1943] RunGraph] Status record: end run actor: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.645.491 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Split-op1 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.645.549 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 26_Default/Concat-op1, task_id_on_stream : 18. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.645.588 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.645.830 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.645.872 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op2, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.032 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 2, actor name : 30_Default/StreamSend-op8, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.058 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.211 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.221 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/Concat-op1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.241 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 31_Default/StreamRecv-op8, task_id_on_stream : 20. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.262 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.275 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 27_Default/StreamSend-op7, task_id_on_stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.283 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.301 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.389 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.416 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 3, actor name : 32_Default/StreamSend-op9, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.446 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.447 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op7 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.478 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 4, actor name : 28_Default/StreamRecv-op7, task_id_on_stream : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.497 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 4, send task id on stream : 19. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.517 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.594 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.612 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op7 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.624 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 33_Default/StreamRecv-op9, task_id_on_stream : 21. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.644 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.647 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 4, actor name : 29_Default/AllGather-op2, task_id_on_stream : 3. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.666 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.670 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/AllGather-op2 [INFO] KERNEL(187803,fffe857fa0f0,python):2025-02-07-15:58:13.646.694 [mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc:39] ConvertHcclType] HcomDataType Can't support Current Ascend Data Type : Complex64, Convert it to Float32 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.775 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.803 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 4, actor name : 34_Default/StreamSend-op10, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.822 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op10 [INFO] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:13.646.915 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:96] IncreaseLoopCount] Sync stream success. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.951 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.977 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1f2b1970, name : Ascend, stream id : 0, actor name : 35_Default/StreamRecv-op10, task_id_on_stream : 22. [DEBUG] RUNTIME_FRAMEWORK(187818,fffeb8ff90f0,python):2025-02-07-15:58:13.646.988 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:220] RunOpControl] Actor(kernel_graph_0_OutputActor) receive the input op control and current count:1 [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.646.996 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.647.047 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op10 [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.647.094 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1348] SetActorExecutionStrategy] kernel_graph_0 execution count: 1, execution time: 7013.07 ms in multi thread or not: 1. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe8affd0f0,python):2025-02-07-15:58:13.647.137 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op10 [INFO] UTILS(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.647.157 [mindspore/ccsrc/utils/utils.cc:415] SkipOrResetCopyAction] Step end, reset copy action flag [INFO] UTILS(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.647.180 [mindspore/ccsrc/utils/utils.cc:429] SkipOrResetSyncAction] Step end, reset sync action flag [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:13.647.210 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.647.218 [mindspore/ccsrc/debug/summary/summary.cc:81] SummaryTensor] This function should be skipped on GE backend. [INFO] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:13.647.262 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:79] IncreaseLoopCount] Sync stream in the step end. [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.647.285 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:576] UpdateOutputDeviceAddress] Swap ptr:0x12c83d800600 from device tensor:0x3d165a10 device type:2 to :0xfffe9c0095a0 device type:2 [INFO] VM(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.647.523 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1943] RunGraph] Status record: end run actor: kernel_graph_0 x_real is: [[[ 1.6243454 -0.6117564 -0.5281718 -1.0729686 ] [ 0.86540765 -2.3015387 1.7448118 -0.7612069 ] [ 0.3190391 -0.24937038 1.4621079 -2.0601406 ] [-0.3224172 -0.38405436 1.1337694 -1.0998913 ]] [[-0.1724282 -0.8778584 0.04221375 0.58281523] [-1.1006192 1.1447237 0.9015907 0.50249434] [ 0.90085596 -0.68372786 -0.12289023 -0.93576944] [-0.26788807 0.53035545 -0.69166076 -0.39675352]] [[-0.6871727 -0.84520566 -0.6712461 -0.0126646 ] [-1.1173104 0.2344157 1.6598022 0.74204415] [-0.19183555 -0.887629 -0.7471583 1.6924546 ] [ 0.05080776 -0.6369957 0.19091548 2.1002553 ]] [[ 0.12015896 0.6172031 0.30017033 -0.35224986] [-1.1425182 -0.34934273 -0.20889424 0.5866232 ] [ 0.8389834 0.9311021 0.2855873 0.8851412 ] [-0.7543979 1.2528682 0.5129298 -0.29809284]]] x_imag is: [[[ 0.48851815 -0.07557172 1.1316293 1.5198169 ] [ 2.1855755 -1.3964963 -1.4441139 -0.5044659 ] [ 0.16003707 0.8761689 0.31563494 -2.0222013 ] [-0.30620402 0.8279746 0.23009473 0.7620112 ]] [[-0.22232814 -0.20075807 0.18656139 0.41005164] [ 0.19829972 0.11900865 -0.6706623 0.37756377] [ 0.12182127 1.1294839 1.1989179 0.18515642] [-0.37528494 -0.6387304 0.42349437 0.07734007]] [[-0.34385368 0.04359686 -0.62000084 0.698032 ] [-0.44712856 1.2245077 0.40349165 0.5935785 ] [-1.0949118 0.16938244 0.7405565 -0.9537006 ] [-0.2662185 0.03261455 -1.3731173 0.31515938]] [[ 0.84616065 -0.85951596 0.35054597 -1.3122834 ] [-0.03869551 -1.6157724 1.1214178 0.40890053] [-0.02461696 -0.7751616 1.2737559 1.9671017 ] [-1.8579819 1.236164 1.6276507 0.33801168]]] y_real is: [[[-1.199268 0.8633453 -0.1809203 -0.60392064] [-1.2300582 0.55053747 0.79280686 -0.62353075] [ 0.52057636 -1.1443413 0.80186105 0.0465673 ] [-0.18656977 -0.10174587 0.8688862 0.7504116 ]] [[ 0.5294653 0.13770121 0.07782113 0.61838025] [ 0.23249456 0.6825514 -0.31011677 -2.4348378 ] [ 1.0388246 2.1869795 0.44136444 -0.10015523] [-0.13644475 -0.11905419 0.01740941 -1.1220187 ]] [[-0.51709443 -0.9970268 0.24879916 -0.29664114] [ 0.49521133 -0.17470317 0.98633516 0.21353391] [ 2.1906998 -1.8963609 -0.6469167 0.9014869 ] [ 2.5283258 -0.24863477 0.04366899 -0.22631425]] [[ 1.3314571 -0.28730786 0.68006986 -0.3198016 ] [-1.2725588 0.31354773 0.5031848 1.2932259 ] [-0.11044703 -0.6173621 0.56276107 0.2407371 ] [ 0.28066507 -0.0731127 1.1603385 0.3694927 ]]] y_imag is: [[[ 1.90465868e+00 1.11105669e+00 6.59049809e-01 -1.62743831e+00] [ 6.02319300e-01 4.20282215e-01 8.10951650e-01 1.04444206e+00] [-4.00878191e-01 8.24005604e-01 -5.62305450e-01 1.95487809e+00] [-1.33195162e+00 -1.76068854e+00 -1.65072131e+00 -8.90555561e-01]] [[-1.11911535e+00 1.95607889e+00 -3.26499492e-01 -1.34267581e+00] [ 1.11438298e+00 -5.86523950e-01 -1.23685336e+00 8.75838935e-01] [ 6.23362184e-01 -4.34956670e-01 1.40753996e+00 1.29101574e-01] [ 1.61694956e+00 5.02740860e-01 1.55880558e+00 1.09402694e-01]] [[-1.21974444e+00 2.44936872e+00 -5.45774162e-01 -1.98837861e-01] [-7.00398505e-01 -2.03394443e-01 2.42669448e-01 2.01830178e-01] [ 6.61020279e-01 1.79215825e+00 -1.20464571e-01 -1.23312068e+00] [-1.18231809e+00 -6.65754497e-01 -1.67419577e+00 8.25029850e-01]] [[-4.98213559e-01 -3.10984969e-01 -1.89148285e-03 -1.39662039e+00] [-8.61316383e-01 6.74711525e-01 6.18539155e-01 -4.43171918e-01] [ 1.81053495e+00 -1.30572689e+00 -3.44987214e-01 -2.30839744e-01] [-2.79308510e+00 1.93752885e+00 3.66332024e-01 -1.04458940e+00]]] ms output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] ms output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] np output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] np output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] rank_4 pass the test [INFO] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:13.649.273 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:96] IncreaseLoopCount] Sync stream success. [DEBUG] RUNTIME_FRAMEWORK(187764,fffe89ffb0f0,python):2025-02-07-15:58:13.649.341 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:220] RunOpControl] Actor(kernel_graph_0_OutputActor) receive the input op control and current count:1 [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:13.649.447 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1348] SetActorExecutionStrategy] kernel_graph_0 execution count: 1, execution time: 7386.23 ms in multi thread or not: 1. [INFO] UTILS(187764,ffff97badc10,python):2025-02-07-15:58:13.649.499 [mindspore/ccsrc/utils/utils.cc:415] SkipOrResetCopyAction] Step end, reset copy action flag [INFO] UTILS(187764,ffff97badc10,python):2025-02-07-15:58:13.649.521 [mindspore/ccsrc/utils/utils.cc:429] SkipOrResetSyncAction] Step end, reset sync action flag [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:58:13.649.557 [mindspore/ccsrc/debug/summary/summary.cc:81] SummaryTensor] This function should be skipped on GE backend. [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:13.649.606 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:576] UpdateOutputDeviceAddress] Swap ptr:0x12c83d800600 from device tensor:0x33905c40 device type:2 to :0xfffe740095a0 device type:2 [INFO] VM(187764,ffff97badc10,python):2025-02-07-15:58:13.649.840 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1943] RunGraph] Status record: end run actor: kernel_graph_0 x_real is: [[[ 1.6243454 -0.6117564 -0.5281718 -1.0729686 ] [ 0.86540765 -2.3015387 1.7448118 -0.7612069 ] [ 0.3190391 -0.24937038 1.4621079 -2.0601406 ] [-0.3224172 -0.38405436 1.1337694 -1.0998913 ]] [[-0.1724282 -0.8778584 0.04221375 0.58281523] [-1.1006192 1.1447237 0.9015907 0.50249434] [ 0.90085596 -0.68372786 -0.12289023 -0.93576944] [-0.26788807 0.53035545 -0.69166076 -0.39675352]] [[-0.6871727 -0.84520566 -0.6712461 -0.0126646 ] [-1.1173104 0.2344157 1.6598022 0.74204415] [-0.19183555 -0.887629 -0.7471583 1.6924546 ] [ 0.05080776 -0.6369957 0.19091548 2.1002553 ]] [[ 0.12015896 0.6172031 0.30017033 -0.35224986] [-1.1425182 -0.34934273 -0.20889424 0.5866232 ] [ 0.8389834 0.9311021 0.2855873 0.8851412 ] [-0.7543979 1.2528682 0.5129298 -0.29809284]]] x_imag is: [[[ 0.48851815 -0.07557172 1.1316293 1.5198169 ] [ 2.1855755 -1.3964963 -1.4441139 -0.5044659 ] [ 0.16003707 0.8761689 0.31563494 -2.0222013 ] [-0.30620402 0.8279746 0.23009473 0.7620112 ]] [[-0.22232814 -0.20075807 0.18656139 0.41005164] [ 0.19829972 0.11900865 -0.6706623 0.37756377] [ 0.12182127 1.1294839 1.1989179 0.18515642] [-0.37528494 -0.6387304 0.42349437 0.07734007]] [[-0.34385368 0.04359686 -0.62000084 0.698032 ] [-0.44712856 1.2245077 0.40349165 0.5935785 ] [-1.0949118 0.16938244 0.7405565 -0.9537006 ] [-0.2662185 0.03261455 -1.3731173 0.31515938]] [[ 0.84616065 -0.85951596 0.35054597 -1.3122834 ] [-0.03869551 -1.6157724 1.1214178 0.40890053] [-0.02461696 -0.7751616 1.2737559 1.9671017 ] [-1.8579819 1.236164 1.6276507 0.33801168]]] y_real is: [[[-1.199268 0.8633453 -0.1809203 -0.60392064] [-1.2300582 0.55053747 0.79280686 -0.62353075] [ 0.52057636 -1.1443413 0.80186105 0.0465673 ] [-0.18656977 -0.10174587 0.8688862 0.7504116 ]] [[ 0.5294653 0.13770121 0.07782113 0.61838025] [ 0.23249456 0.6825514 -0.31011677 -2.4348378 ] [ 1.0388246 2.1869795 0.44136444 -0.10015523] [-0.13644475 -0.11905419 0.01740941 -1.1220187 ]] [[-0.51709443 -0.9970268 0.24879916 -0.29664114] [ 0.49521133 -0.17470317 0.98633516 0.21353391] [ 2.1906998 -1.8963609 -0.6469167 0.9014869 ] [ 2.5283258 -0.24863477 0.04366899 -0.22631425]] [[ 1.3314571 -0.28730786 0.68006986 -0.3198016 ] [-1.2725588 0.31354773 0.5031848 1.2932259 ] [-0.11044703 -0.6173621 0.56276107 0.2407371 ] [ 0.28066507 -0.0731127 1.1603385 0.3694927 ]]] y_imag is: [[[ 1.90465868e+00 1.11105669e+00 6.59049809e-01 -1.62743831e+00] [ 6.02319300e-01 4.20282215e-01 8.10951650e-01 1.04444206e+00] [-4.00878191e-01 8.24005604e-01 -5.62305450e-01 1.95487809e+00] [-1.33195162e+00 -1.76068854e+00 -1.65072131e+00 -8.90555561e-01]] [[-1.11911535e+00 1.95607889e+00 -3.26499492e-01 -1.34267581e+00] [ 1.11438298e+00 -5.86523950e-01 -1.23685336e+00 8.75838935e-01] [ 6.23362184e-01 -4.34956670e-01 1.40753996e+00 1.29101574e-01] [ 1.61694956e+00 5.02740860e-01 1.55880558e+00 1.09402694e-01]] [[-1.21974444e+00 2.44936872e+00 -5.45774162e-01 -1.98837861e-01] [-7.00398505e-01 -2.03394443e-01 2.42669448e-01 2.01830178e-01] [ 6.61020279e-01 1.79215825e+00 -1.20464571e-01 -1.23312068e+00] [-1.18231809e+00 -6.65754497e-01 -1.67419577e+00 8.25029850e-01]] [[-4.98213559e-01 -3.10984969e-01 -1.89148285e-03 -1.39662039e+00] [-8.61316383e-01 6.74711525e-01 6.18539155e-01 -4.43171918e-01] [ 1.81053495e+00 -1.30572689e+00 -3.44987214e-01 -2.30839744e-01] [-2.79308510e+00 1.93752885e+00 3.66332024e-01 -1.04458940e+00]]] ms output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] ms output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] np output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] np output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] rank_0 pass the test x_real is: [[[ 1.6243454 -0.6117564 -0.5281718 -1.0729686 ] [ 0.86540765 -2.3015387 1.7448118 -0.7612069 ] [ 0.3190391 -0.24937038 1.4621079 -2.0601406 ] [-0.3224172 -0.38405436 1.1337694 -1.0998913 ]] [[-0.1724282 -0.8778584 0.04221375 0.58281523] [-1.1006192 1.1447237 0.9015907 0.50249434] [ 0.90085596 -0.68372786 -0.12289023 -0.93576944] [-0.26788807 0.53035545 -0.69166076 -0.39675352]] [[-0.6871727 -0.84520566 -0.6712461 -0.0126646 ] [-1.1173104 0.2344157 1.6598022 0.74204415] [-0.19183555 -0.887629 -0.7471583 1.6924546 ] [ 0.05080776 -0.6369957 0.19091548 2.1002553 ]] [[ 0.12015896 0.6172031 0.30017033 -0.35224986] [-1.1425182 -0.34934273 -0.20889424 0.5866232 ] [ 0.8389834 0.9311021 0.2855873 0.8851412 ] [-0.7543979 1.2528682 0.5129298 -0.29809284]]] x_imag is: [[[ 0.48851815 -0.07557172 1.1316293 1.5198169 ] [ 2.1855755 -1.3964963 -1.4441139 -0.5044659 ] [ 0.16003707 0.8761689 0.31563494 -2.0222013 ] [-0.30620402 0.8279746 0.23009473 0.7620112 ]] [[-0.22232814 -0.20075807 0.18656139 0.41005164] [ 0.19829972 0.11900865 -0.6706623 0.37756377] [ 0.12182127 1.1294839 1.1989179 0.18515642] [-0.37528494 -0.6387304 0.42349437 0.07734007]] [[-0.34385368 0.04359686 -0.62000084 0.698032 ] [-0.44712856 1.2245077 0.40349165 0.5935785 ] [-1.0949118 0.16938244 0.7405565 -0.9537006 ] [-0.2662185 0.03261455 -1.3731173 0.31515938]] [[ 0.84616065 -0.85951596 0.35054597 -1.3122834 ] [-0.03869551 -1.6157724 1.1214178 0.40890053] [-0.02461696 -0.7751616 1.2737559 1.9671017 ] [-1.8579819 1.236164 1.6276507 0.33801168]]] y_real is: [[[-1.199268 0.8633453 -0.1809203 -0.60392064] [-1.2300582 0.55053747 0.79280686 -0.62353075] [ 0.52057636 -1.1443413 0.80186105 0.0465673 ] [-0.18656977 -0.10174587 0.8688862 0.7504116 ]] [[ 0.5294653 0.13770121 0.07782113 0.61838025] [ 0.23249456 0.6825514 -0.31011677 -2.4348378 ] [ 1.0388246 2.1869795 0.44136444 -0.10015523] [-0.13644475 -0.11905419 0.01740941 -1.1220187 ]] [[-0.51709443 -0.9970268 0.24879916 -0.29664114] [ 0.49521133 -0.17470317 0.98633516 0.21353391] [ 2.1906998 -1.8963609 -0.6469167 0.9014869 ] [ 2.5283258 -0.24863477 0.04366899 -0.22631425]] [[ 1.3314571 -0.28730786 0.68006986 -0.3198016 ] [-1.2725588 0.31354773 0.5031848 1.2932259 ] [-0.11044703 -0.6173621 0.56276107 0.2407371 ] [ 0.28066507 -0.0731127 1.1603385 0.3694927 ]]] y_imag is: [[[ 1.90465868e+00 1.11105669e+00 6.59049809e-01 -1.62743831e+00] [ 6.02319300e-01 4.20282215e-01 8.10951650e-01 1.04444206e+00] [-4.00878191e-01 8.24005604e-01 -5.62305450e-01 1.95487809e+00] [-1.33195162e+00 -1.76068854e+00 -1.65072131e+00 -8.90555561e-01]] [[-1.11911535e+00 1.95607889e+00 -3.26499492e-01 -1.34267581e+00] [ 1.11438298e+00 -5.86523950e-01 -1.23685336e+00 8.75838935e-01] [ 6.23362184e-01 -4.34956670e-01 1.40753996e+00 1.29101574e-01] [ 1.61694956e+00 5.02740860e-01 1.55880558e+00 1.09402694e-01]] [[-1.21974444e+00 2.44936872e+00 -5.45774162e-01 -1.98837861e-01] [-7.00398505e-01 -2.03394443e-01 2.42669448e-01 2.01830178e-01] [ 6.61020279e-01 1.79215825e+00 -1.20464571e-01 -1.23312068e+00] [-1.18231809e+00 -6.65754497e-01 -1.67419577e+00 8.25029850e-01]] [[-4.98213559e-01 -3.10984969e-01 -1.89148285e-03 -1.39662039e+00] [-8.61316383e-01 6.74711525e-01 6.18539155e-01 -4.43171918e-01] [ 1.81053495e+00 -1.30572689e+00 -3.44987214e-01 -2.30839744e-01] [-2.79308510e+00 1.93752885e+00 3.66332024e-01 -1.04458940e+00]]] ms output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] ms output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] np output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] np output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] rank_6 pass the test x_real is: [[[ 1.6243454 -0.6117564 -0.5281718 -1.0729686 ] [ 0.86540765 -2.3015387 1.7448118 -0.7612069 ] [ 0.3190391 -0.24937038 1.4621079 -2.0601406 ] [-0.3224172 -0.38405436 1.1337694 -1.0998913 ]] [[-0.1724282 -0.8778584 0.04221375 0.58281523] [-1.1006192 1.1447237 0.9015907 0.50249434] [ 0.90085596 -0.68372786 -0.12289023 -0.93576944] [-0.26788807 0.53035545 -0.69166076 -0.39675352]] [[-0.6871727 -0.84520566 -0.6712461 -0.0126646 ] [-1.1173104 0.2344157 1.6598022 0.74204415] [-0.19183555 -0.887629 -0.7471583 1.6924546 ] [ 0.05080776 -0.6369957 0.19091548 2.1002553 ]] [[ 0.12015896 0.6172031 0.30017033 -0.35224986] [-1.1425182 -0.34934273 -0.20889424 0.5866232 ] [ 0.8389834 0.9311021 0.2855873 0.8851412 ] [-0.7543979 1.2528682 0.5129298 -0.29809284]]] x_imag is: [[[ 0.48851815 -0.07557172 1.1316293 1.5198169 ] [ 2.1855755 -1.3964963 -1.4441139 -0.5044659 ] [ 0.16003707 0.8761689 0.31563494 -2.0222013 ] [-0.30620402 0.8279746 0.23009473 0.7620112 ]] [[-0.22232814 -0.20075807 0.18656139 0.41005164] [ 0.19829972 0.11900865 -0.6706623 0.37756377] [ 0.12182127 1.1294839 1.1989179 0.18515642] [-0.37528494 -0.6387304 0.42349437 0.07734007]] [[-0.34385368 0.04359686 -0.62000084 0.698032 ] [-0.44712856 1.2245077 0.40349165 0.5935785 ] [-1.0949118 0.16938244 0.7405565 -0.9537006 ] [-0.2662185 0.03261455 -1.3731173 0.31515938]] [[ 0.84616065 -0.85951596 0.35054597 -1.3122834 ] [-0.03869551 -1.6157724 1.1214178 0.40890053] [-0.02461696 -0.7751616 1.2737559 1.9671017 ] [-1.8579819 1.236164 1.6276507 0.33801168]]] y_real is: [[[-1.199268 0.8633453 -0.1809203 -0.60392064] [-1.2300582 0.55053747 0.79280686 -0.62353075] [ 0.52057636 -1.1443413 0.80186105 0.0465673 ] [-0.18656977 -0.10174587 0.8688862 0.7504116 ]] [[ 0.5294653 0.13770121 0.07782113 0.61838025] [ 0.23249456 0.6825514 -0.31011677 -2.4348378 ] [ 1.0388246 2.1869795 0.44136444 -0.10015523] [-0.13644475 -0.11905419 0.01740941 -1.1220187 ]] [[-0.51709443 -0.9970268 0.24879916 -0.29664114] [ 0.49521133 -0.17470317 0.98633516 0.21353391] [ 2.1906998 -1.8963609 -0.6469167 0.9014869 ] [ 2.5283258 -0.24863477 0.04366899 -0.22631425]] [[ 1.3314571 -0.28730786 0.68006986 -0.3198016 ] [-1.2725588 0.31354773 0.5031848 1.2932259 ] [-0.11044703 -0.6173621 0.56276107 0.2407371 ] [ 0.28066507 -0.0731127 1.1603385 0.3694927 ]]] y_imag is: [[[ 1.90465868e+00 1.11105669e+00 6.59049809e-01 -1.62743831e+00] [ 6.02319300e-01 4.20282215e-01 8.10951650e-01 1.04444206e+00] [-4.00878191e-01 8.24005604e-01 -5.62305450e-01 1.95487809e+00] [-1.33195162e+00 -1.76068854e+00 -1.65072131e+00 -8.90555561e-01]] [[-1.11911535e+00 1.95607889e+00 -3.26499492e-01 -1.34267581e+00] [ 1.11438298e+00 -5.86523950e-01 -1.23685336e+00 8.75838935e-01] [ 6.23362184e-01 -4.34956670e-01 1.40753996e+00 1.29101574e-01] [ 1.61694956e+00 5.02740860e-01 1.55880558e+00 1.09402694e-01]] [[-1.21974444e+00 2.44936872e+00 -5.45774162e-01 -1.98837861e-01] [-7.00398505e-01 -2.03394443e-01 2.42669448e-01 2.01830178e-01] [ 6.61020279e-01 1.79215825e+00 -1.20464571e-01 -1.23312068e+00] [-1.18231809e+00 -6.65754497e-01 -1.67419577e+00 8.25029850e-01]] [[-4.98213559e-01 -3.10984969e-01 -1.89148285e-03 -1.39662039e+00] [-8.61316383e-01 6.74711525e-01 6.18539155e-01 -4.43171918e-01] [ 1.81053495e+00 -1.30572689e+00 -3.44987214e-01 -2.30839744e-01] [-2.79308510e+00 1.93752885e+00 3.66332024e-01 -1.04458940e+00]]] ms output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] ms output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] np output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] np output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] rank_2 pass the test [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.662.793 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.662.839 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op2, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.003 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 2, actor name : 30_Default/StreamSend-op8, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.028 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.172 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.202 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 31_Default/StreamRecv-op8, task_id_on_stream : 20. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.221 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.239 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.330 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.357 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 3, actor name : 32_Default/StreamSend-op9, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.375 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.503 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.527 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 33_Default/StreamRecv-op9, task_id_on_stream : 21. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.544 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.574 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.660 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.685 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 4, actor name : 34_Default/StreamSend-op10, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.704 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.828 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.852 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x48cbe720, name : Ascend, stream id : 0, actor name : 35_Default/StreamRecv-op10, task_id_on_stream : 22. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.869 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.663.916 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187834,fffea4ff90f0,python):2025-02-07-15:58:13.664.006 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:13.664.073 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:13.664.122 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:79] IncreaseLoopCount] Sync stream in the step end. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.667.984 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.027 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op2, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.185 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 2, actor name : 30_Default/StreamSend-op8, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.211 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.249 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.348 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.286 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op2, addresses size : 2. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.392 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 31_Default/StreamRecv-op8, task_id_on_stream : 20. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.415 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.435 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.524 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 2, actor name : 30_Default/StreamSend-op8, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.528 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.553 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.556 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 3, actor name : 32_Default/StreamSend-op9, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.577 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.720 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.752 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 31_Default/StreamRecv-op8, task_id_on_stream : 20. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.767 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.754 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.784 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.789 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 33_Default/StreamRecv-op9, task_id_on_stream : 21. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.809 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.828 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.872 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.897 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 3, actor name : 32_Default/StreamSend-op9, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.668.913 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.917 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.944 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 4, actor name : 34_Default/StreamSend-op10, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.668.962 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.039 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.064 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 33_Default/StreamRecv-op9, task_id_on_stream : 21. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.080 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.096 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.669.090 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.669.119 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x35083c70, name : Ascend, stream id : 0, actor name : 35_Default/StreamRecv-op10, task_id_on_stream : 22. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.669.138 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.182 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.203 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 4, actor name : 34_Default/StreamSend-op10, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.669.205 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.218 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe857fa0f0,python):2025-02-07-15:58:13.669.316 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.344 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.368 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x249fc190, name : Ascend, stream id : 0, actor name : 35_Default/StreamRecv-op10, task_id_on_stream : 22. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.395 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.442 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:13.669.423 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:13.669.480 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:79] IncreaseLoopCount] Sync stream in the step end. [DEBUG] RUNTIME_FRAMEWORK(187775,fffeacff90f0,python):2025-02-07-15:58:13.669.533 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:13.669.603 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:13.669.650 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:79] IncreaseLoopCount] Sync stream in the step end. [INFO] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:13.669.640 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:96] IncreaseLoopCount] Sync stream success. [DEBUG] RUNTIME_FRAMEWORK(187834,fffe7ffff0f0,python):2025-02-07-15:58:13.669.723 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:220] RunOpControl] Actor(kernel_graph_0_OutputActor) receive the input op control and current count:1 [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:13.669.815 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1348] SetActorExecutionStrategy] kernel_graph_0 execution count: 1, execution time: 6846.13 ms in multi thread or not: 1. [INFO] UTILS(187834,ffffb35e0c10,python):2025-02-07-15:58:13.669.856 [mindspore/ccsrc/utils/utils.cc:415] SkipOrResetCopyAction] Step end, reset copy action flag [INFO] UTILS(187834,ffffb35e0c10,python):2025-02-07-15:58:13.669.877 [mindspore/ccsrc/utils/utils.cc:429] SkipOrResetSyncAction] Step end, reset sync action flag [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:58:13.669.914 [mindspore/ccsrc/debug/summary/summary.cc:81] SummaryTensor] This function should be skipped on GE backend. [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:13.669.963 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:576] UpdateOutputDeviceAddress] Swap ptr:0x12c83d800600 from device tensor:0x5d314510 device type:2 to :0xfffe780095a0 device type:2 [INFO] VM(187834,ffffb35e0c10,python):2025-02-07-15:58:13.670.254 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1943] RunGraph] Status record: end run actor: kernel_graph_0 [INFO] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:13.671.768 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:96] IncreaseLoopCount] Sync stream success. [DEBUG] RUNTIME_FRAMEWORK(187775,fffe87fff0f0,python):2025-02-07-15:58:13.671.845 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:220] RunOpControl] Actor(kernel_graph_0_OutputActor) receive the input op control and current count:1 [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:13.671.942 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1348] SetActorExecutionStrategy] kernel_graph_0 execution count: 1, execution time: 7397.11 ms in multi thread or not: 1. [INFO] UTILS(187775,ffffba4dbc10,python):2025-02-07-15:58:13.671.988 [mindspore/ccsrc/utils/utils.cc:415] SkipOrResetCopyAction] Step end, reset copy action flag [INFO] UTILS(187775,ffffba4dbc10,python):2025-02-07-15:58:13.672.007 [mindspore/ccsrc/utils/utils.cc:429] SkipOrResetSyncAction] Step end, reset sync action flag [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:58:13.672.046 [mindspore/ccsrc/debug/summary/summary.cc:81] SummaryTensor] This function should be skipped on GE backend. [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:13.672.093 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:576] UpdateOutputDeviceAddress] Swap ptr:0x12c83da00600 from device tensor:0x39049050 device type:2 to :0xfffe800095a0 device type:2 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.257 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/AllGather-op2 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.300 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1304] ProcessMultiStreamAfterKernelLaunch] Record event for kernel : Default/AllGather-op2, addresses size : 2. [INFO] VM(187775,ffffba4dbc10,python):2025-02-07-15:58:13.672.382 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1943] RunGraph] Status record: end run actor: kernel_graph_0 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.456 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 2, actor name : 30_Default/StreamSend-op8, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.481 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.617 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op8 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.685 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 31_Default/StreamRecv-op8, task_id_on_stream : 20. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.702 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.718 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.810 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op8 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.836 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 3, actor name : 32_Default/StreamSend-op9, task_id_on_stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.851 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.672.990 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op9 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.015 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 33_Default/StreamRecv-op9, task_id_on_stream : 21. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.031 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 5. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.047 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.132 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op9 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.154 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 4, actor name : 34_Default/StreamSend-op10, task_id_on_stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.170 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.294 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamSend-op10 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.317 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1182] ProcessMultiStreamBeforeKernelLaunch] device context : 0x1dee35c0, name : Ascend, stream id : 0, actor name : 35_Default/StreamRecv-op10, task_id_on_stream : 22. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.332 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1205] ProcessMultiStreamBeforeKernelLaunch] Process wait stream start, memory_stream_id : 0, send task id on stream : 4. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.379 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1104] LaunchKernelWithDebug] Begin launch kernel: Default/StreamRecv-op10 [INFO] RUNTIME_FRAMEWORK(187803,fffe84ff90f0,python):2025-02-07-15:58:13.673.385 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:96] IncreaseLoopCount] Sync stream success. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe74ff90f0,python):2025-02-07-15:58:13.673.465 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc:1107] LaunchKernelWithDebug] End launch kernel: Default/StreamRecv-op10 [DEBUG] RUNTIME_FRAMEWORK(187803,fffe63fff0f0,python):2025-02-07-15:58:13.673.492 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:220] RunOpControl] Actor(kernel_graph_0_OutputActor) receive the input op control and current count:1 [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:13.673.533 [mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.cc:55] Wait] End wait kernel launch finish [INFO] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:13.673.582 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:79] IncreaseLoopCount] Sync stream in the step end. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:13.673.617 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1348] SetActorExecutionStrategy] kernel_graph_0 execution count: 1, execution time: 7211.96 ms in multi thread or not: 1. [INFO] UTILS(187803,ffff93d7bc10,python):2025-02-07-15:58:13.673.665 [mindspore/ccsrc/utils/utils.cc:415] SkipOrResetCopyAction] Step end, reset copy action flag [INFO] UTILS(187803,ffff93d7bc10,python):2025-02-07-15:58:13.673.690 [mindspore/ccsrc/utils/utils.cc:429] SkipOrResetSyncAction] Step end, reset sync action flag [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:58:13.673.731 [mindspore/ccsrc/debug/summary/summary.cc:81] SummaryTensor] This function should be skipped on GE backend. [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:13.673.783 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:576] UpdateOutputDeviceAddress] Swap ptr:0x12c83d800600 from device tensor:0x496dc080 device type:2 to :0xfffe540095a0 device type:2 [INFO] VM(187803,ffff93d7bc10,python):2025-02-07-15:58:13.674.076 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1943] RunGraph] Status record: end run actor: kernel_graph_0 [INFO] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:13.675.762 [mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc:96] IncreaseLoopCount] Sync stream success. [DEBUG] RUNTIME_FRAMEWORK(187753,fffe577fe0f0,python):2025-02-07-15:58:13.675.840 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:220] RunOpControl] Actor(kernel_graph_0_OutputActor) receive the input op control and current count:1 [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:13.675.936 [mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc:1348] SetActorExecutionStrategy] kernel_graph_0 execution count: 1, execution time: 7392.96 ms in multi thread or not: 1. [INFO] UTILS(187753,ffff8292dc10,python):2025-02-07-15:58:13.676.042 [mindspore/ccsrc/utils/utils.cc:415] SkipOrResetCopyAction] Step end, reset copy action flag [INFO] UTILS(187753,ffff8292dc10,python):2025-02-07-15:58:13.676.061 [mindspore/ccsrc/utils/utils.cc:429] SkipOrResetSyncAction] Step end, reset sync action flag [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:58:13.676.102 [mindspore/ccsrc/debug/summary/summary.cc:81] SummaryTensor] This function should be skipped on GE backend. [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:13.676.152 [mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc:576] UpdateOutputDeviceAddress] Swap ptr:0x12c83d600600 from device tensor:0x325413e0 device type:2 to :0xfffe480095a0 device type:2 x_real is: [[[ 1.6243454 -0.6117564 -0.5281718 -1.0729686 ] [ 0.86540765 -2.3015387 1.7448118 -0.7612069 ] [ 0.3190391 -0.24937038 1.4621079 -2.0601406 ] [-0.3224172 -0.38405436 1.1337694 -1.0998913 ]] [[-0.1724282 -0.8778584 0.04221375 0.58281523] [-1.1006192 1.1447237 0.9015907 0.50249434] [ 0.90085596 -0.68372786 -0.12289023 -0.93576944] [-0.26788807 0.53035545 -0.69166076 -0.39675352]] [[-0.6871727 -0.84520566 -0.6712461 -0.0126646 ] [-1.1173104 0.2344157 1.6598022 0.74204415] [-0.19183555 -0.887629 -0.7471583 1.6924546 ] [ 0.05080776 -0.6369957 0.19091548 2.1002553 ]] [[ 0.12015896 0.6172031 0.30017033 -0.35224986] [-1.1425182 -0.34934273 -0.20889424 0.5866232 ] [ 0.8389834 0.9311021 0.2855873 0.8851412 ] [-0.7543979 1.2528682 0.5129298 -0.29809284]]] x_imag is: [[[ 0.48851815 -0.07557172 1.1316293 1.5198169 ] [ 2.1855755 -1.3964963 -1.4441139 -0.5044659 ] [ 0.16003707 0.8761689 0.31563494 -2.0222013 ] [-0.30620402 0.8279746 0.23009473 0.7620112 ]] [[-0.22232814 -0.20075807 0.18656139 0.41005164] [ 0.19829972 0.11900865 -0.6706623 0.37756377] [ 0.12182127 1.1294839 1.1989179 0.18515642] [-0.37528494 -0.6387304 0.42349437 0.07734007]] [[-0.34385368 0.04359686 -0.62000084 0.698032 ] [-0.44712856 1.2245077 0.40349165 0.5935785 ] [-1.0949118 0.16938244 0.7405565 -0.9537006 ] [-0.2662185 0.03261455 -1.3731173 0.31515938]] [[ 0.84616065 -0.85951596 0.35054597 -1.3122834 ] [-0.03869551 -1.6157724 1.1214178 0.40890053] [-0.02461696 -0.7751616 1.2737559 1.9671017 ] [-1.8579819 1.236164 1.6276507 0.33801168]]] y_real is: [[[-1.199268 0.8633453 -0.1809203 -0.60392064] [-1.2300582 0.55053747 0.79280686 -0.62353075] [ 0.52057636 -1.1443413 0.80186105 0.0465673 ] [-0.18656977 -0.10174587 0.8688862 0.7504116 ]] [[ 0.5294653 0.13770121 0.07782113 0.61838025] [ 0.23249456 0.6825514 -0.31011677 -2.4348378 ] [ 1.0388246 2.1869795 0.44136444 -0.10015523] [-0.13644475 -0.11905419 0.01740941 -1.1220187 ]] [[-0.51709443 -0.9970268 0.24879916 -0.29664114] [ 0.49521133 -0.17470317 0.98633516 0.21353391] [ 2.1906998 -1.8963609 -0.6469167 0.9014869 ] [ 2.5283258 -0.24863477 0.04366899 -0.22631425]] [[ 1.3314571 -0.28730786 0.68006986 -0.3198016 ] [-1.2725588 0.31354773 0.5031848 1.2932259 ] [-0.11044703 -0.6173621 0.56276107 0.2407371 ] [ 0.28066507 -0.0731127 1.1603385 0.3694927 ]]] y_imag is: [[[ 1.90465868e+00 1.11105669e+00 6.59049809e-01 -1.62743831e+00] [ 6.02319300e-01 4.20282215e-01 8.10951650e-01 1.04444206e+00] [-4.00878191e-01 8.24005604e-01 -5.62305450e-01 1.95487809e+00] [-1.33195162e+00 -1.76068854e+00 -1.65072131e+00 -8.90555561e-01]] [[-1.11911535e+00 1.95607889e+00 -3.26499492e-01 -1.34267581e+00] [ 1.11438298e+00 -5.86523950e-01 -1.23685336e+00 8.75838935e-01] [ 6.23362184e-01 -4.34956670e-01 1.40753996e+00 1.29101574e-01] [ 1.61694956e+00 5.02740860e-01 1.55880558e+00 1.09402694e-01]] [[-1.21974444e+00 2.44936872e+00 -5.45774162e-01 -1.98837861e-01] [-7.00398505e-01 -2.03394443e-01 2.42669448e-01 2.01830178e-01] [ 6.61020279e-01 1.79215825e+00 -1.20464571e-01 -1.23312068e+00] [-1.18231809e+00 -6.65754497e-01 -1.67419577e+00 8.25029850e-01]] [[-4.98213559e-01 -3.10984969e-01 -1.89148285e-03 -1.39662039e+00] [-8.61316383e-01 6.74711525e-01 6.18539155e-01 -4.43171918e-01] [ 1.81053495e+00 -1.30572689e+00 -3.44987214e-01 -2.30839744e-01] [-2.79308510e+00 1.93752885e+00 3.66332024e-01 -1.04458940e+00]]] ms output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] ms output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] np output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] np output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] rank_7 pass the test [INFO] VM(187753,ffff8292dc10,python):2025-02-07-15:58:13.676.418 [mindspore/ccsrc/backend/graph_compiler/backend_base.cc:1943] RunGraph] Status record: end run actor: kernel_graph_0 x_real is: [[[ 1.6243454 -0.6117564 -0.5281718 -1.0729686 ] [ 0.86540765 -2.3015387 1.7448118 -0.7612069 ] [ 0.3190391 -0.24937038 1.4621079 -2.0601406 ] [-0.3224172 -0.38405436 1.1337694 -1.0998913 ]] [[-0.1724282 -0.8778584 0.04221375 0.58281523] [-1.1006192 1.1447237 0.9015907 0.50249434] [ 0.90085596 -0.68372786 -0.12289023 -0.93576944] [-0.26788807 0.53035545 -0.69166076 -0.39675352]] [[-0.6871727 -0.84520566 -0.6712461 -0.0126646 ] [-1.1173104 0.2344157 1.6598022 0.74204415] [-0.19183555 -0.887629 -0.7471583 1.6924546 ] [ 0.05080776 -0.6369957 0.19091548 2.1002553 ]] [[ 0.12015896 0.6172031 0.30017033 -0.35224986] [-1.1425182 -0.34934273 -0.20889424 0.5866232 ] [ 0.8389834 0.9311021 0.2855873 0.8851412 ] [-0.7543979 1.2528682 0.5129298 -0.29809284]]] x_imag is: [[[ 0.48851815 -0.07557172 1.1316293 1.5198169 ] [ 2.1855755 -1.3964963 -1.4441139 -0.5044659 ] [ 0.16003707 0.8761689 0.31563494 -2.0222013 ] [-0.30620402 0.8279746 0.23009473 0.7620112 ]] [[-0.22232814 -0.20075807 0.18656139 0.41005164] [ 0.19829972 0.11900865 -0.6706623 0.37756377] [ 0.12182127 1.1294839 1.1989179 0.18515642] [-0.37528494 -0.6387304 0.42349437 0.07734007]] [[-0.34385368 0.04359686 -0.62000084 0.698032 ] [-0.44712856 1.2245077 0.40349165 0.5935785 ] [-1.0949118 0.16938244 0.7405565 -0.9537006 ] [-0.2662185 0.03261455 -1.3731173 0.31515938]] [[ 0.84616065 -0.85951596 0.35054597 -1.3122834 ] [-0.03869551 -1.6157724 1.1214178 0.40890053] [-0.02461696 -0.7751616 1.2737559 1.9671017 ] [-1.8579819 1.236164 1.6276507 0.33801168]]] y_real is: [[[-1.199268 0.8633453 -0.1809203 -0.60392064] [-1.2300582 0.55053747 0.79280686 -0.62353075] [ 0.52057636 -1.1443413 0.80186105 0.0465673 ] [-0.18656977 -0.10174587 0.8688862 0.7504116 ]] [[ 0.5294653 0.13770121 0.07782113 0.61838025] [ 0.23249456 0.6825514 -0.31011677 -2.4348378 ] [ 1.0388246 2.1869795 0.44136444 -0.10015523] [-0.13644475 -0.11905419 0.01740941 -1.1220187 ]] [[-0.51709443 -0.9970268 0.24879916 -0.29664114] [ 0.49521133 -0.17470317 0.98633516 0.21353391] [ 2.1906998 -1.8963609 -0.6469167 0.9014869 ] [ 2.5283258 -0.24863477 0.04366899 -0.22631425]] [[ 1.3314571 -0.28730786 0.68006986 -0.3198016 ] [-1.2725588 0.31354773 0.5031848 1.2932259 ] [-0.11044703 -0.6173621 0.56276107 0.2407371 ] [ 0.28066507 -0.0731127 1.1603385 0.3694927 ]]] y_imag is: [[[ 1.90465868e+00 1.11105669e+00 6.59049809e-01 -1.62743831e+00] [ 6.02319300e-01 4.20282215e-01 8.10951650e-01 1.04444206e+00] [-4.00878191e-01 8.24005604e-01 -5.62305450e-01 1.95487809e+00] [-1.33195162e+00 -1.76068854e+00 -1.65072131e+00 -8.90555561e-01]] [[-1.11911535e+00 1.95607889e+00 -3.26499492e-01 -1.34267581e+00] [ 1.11438298e+00 -5.86523950e-01 -1.23685336e+00 8.75838935e-01] [ 6.23362184e-01 -4.34956670e-01 1.40753996e+00 1.29101574e-01] [ 1.61694956e+00 5.02740860e-01 1.55880558e+00 1.09402694e-01]] [[-1.21974444e+00 2.44936872e+00 -5.45774162e-01 -1.98837861e-01] [-7.00398505e-01 -2.03394443e-01 2.42669448e-01 2.01830178e-01] [ 6.61020279e-01 1.79215825e+00 -1.20464571e-01 -1.23312068e+00] [-1.18231809e+00 -6.65754497e-01 -1.67419577e+00 8.25029850e-01]] [[-4.98213559e-01 -3.10984969e-01 -1.89148285e-03 -1.39662039e+00] [-8.61316383e-01 6.74711525e-01 6.18539155e-01 -4.43171918e-01] [ 1.81053495e+00 -1.30572689e+00 -3.44987214e-01 -2.30839744e-01] [-2.79308510e+00 1.93752885e+00 3.66332024e-01 -1.04458940e+00]]] ms output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] ms output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] np output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] np output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] rank_3 pass the test x_real is: [[[ 1.6243454 -0.6117564 -0.5281718 -1.0729686 ] [ 0.86540765 -2.3015387 1.7448118 -0.7612069 ] [ 0.3190391 -0.24937038 1.4621079 -2.0601406 ] [-0.3224172 -0.38405436 1.1337694 -1.0998913 ]] [[-0.1724282 -0.8778584 0.04221375 0.58281523] [-1.1006192 1.1447237 0.9015907 0.50249434] [ 0.90085596 -0.68372786 -0.12289023 -0.93576944] [-0.26788807 0.53035545 -0.69166076 -0.39675352]] [[-0.6871727 -0.84520566 -0.6712461 -0.0126646 ] [-1.1173104 0.2344157 1.6598022 0.74204415] [-0.19183555 -0.887629 -0.7471583 1.6924546 ] [ 0.05080776 -0.6369957 0.19091548 2.1002553 ]] [[ 0.12015896 0.6172031 0.30017033 -0.35224986] [-1.1425182 -0.34934273 -0.20889424 0.5866232 ] [ 0.8389834 0.9311021 0.2855873 0.8851412 ] [-0.7543979 1.2528682 0.5129298 -0.29809284]]] x_imag is: [[[ 0.48851815 -0.07557172 1.1316293 1.5198169 ] [ 2.1855755 -1.3964963 -1.4441139 -0.5044659 ] [ 0.16003707 0.8761689 0.31563494 -2.0222013 ] [-0.30620402 0.8279746 0.23009473 0.7620112 ]] [[-0.22232814 -0.20075807 0.18656139 0.41005164] [ 0.19829972 0.11900865 -0.6706623 0.37756377] [ 0.12182127 1.1294839 1.1989179 0.18515642] [-0.37528494 -0.6387304 0.42349437 0.07734007]] [[-0.34385368 0.04359686 -0.62000084 0.698032 ] [-0.44712856 1.2245077 0.40349165 0.5935785 ] [-1.0949118 0.16938244 0.7405565 -0.9537006 ] [-0.2662185 0.03261455 -1.3731173 0.31515938]] [[ 0.84616065 -0.85951596 0.35054597 -1.3122834 ] [-0.03869551 -1.6157724 1.1214178 0.40890053] [-0.02461696 -0.7751616 1.2737559 1.9671017 ] [-1.8579819 1.236164 1.6276507 0.33801168]]] y_real is: [[[-1.199268 0.8633453 -0.1809203 -0.60392064] [-1.2300582 0.55053747 0.79280686 -0.62353075] [ 0.52057636 -1.1443413 0.80186105 0.0465673 ] [-0.18656977 -0.10174587 0.8688862 0.7504116 ]] [[ 0.5294653 0.13770121 0.07782113 0.61838025] [ 0.23249456 0.6825514 -0.31011677 -2.4348378 ] [ 1.0388246 2.1869795 0.44136444 -0.10015523] [-0.13644475 -0.11905419 0.01740941 -1.1220187 ]] [[-0.51709443 -0.9970268 0.24879916 -0.29664114] [ 0.49521133 -0.17470317 0.98633516 0.21353391] [ 2.1906998 -1.8963609 -0.6469167 0.9014869 ] [ 2.5283258 -0.24863477 0.04366899 -0.22631425]] [[ 1.3314571 -0.28730786 0.68006986 -0.3198016 ] [-1.2725588 0.31354773 0.5031848 1.2932259 ] [-0.11044703 -0.6173621 0.56276107 0.2407371 ] [ 0.28066507 -0.0731127 1.1603385 0.3694927 ]]] y_imag is: [[[ 1.90465868e+00 1.11105669e+00 6.59049809e-01 -1.62743831e+00] [ 6.02319300e-01 4.20282215e-01 8.10951650e-01 1.04444206e+00] [-4.00878191e-01 8.24005604e-01 -5.62305450e-01 1.95487809e+00] [-1.33195162e+00 -1.76068854e+00 -1.65072131e+00 -8.90555561e-01]] [[-1.11911535e+00 1.95607889e+00 -3.26499492e-01 -1.34267581e+00] [ 1.11438298e+00 -5.86523950e-01 -1.23685336e+00 8.75838935e-01] [ 6.23362184e-01 -4.34956670e-01 1.40753996e+00 1.29101574e-01] [ 1.61694956e+00 5.02740860e-01 1.55880558e+00 1.09402694e-01]] [[-1.21974444e+00 2.44936872e+00 -5.45774162e-01 -1.98837861e-01] [-7.00398505e-01 -2.03394443e-01 2.42669448e-01 2.01830178e-01] [ 6.61020279e-01 1.79215825e+00 -1.20464571e-01 -1.23312068e+00] [-1.18231809e+00 -6.65754497e-01 -1.67419577e+00 8.25029850e-01]] [[-4.98213559e-01 -3.10984969e-01 -1.89148285e-03 -1.39662039e+00] [-8.61316383e-01 6.74711525e-01 6.18539155e-01 -4.43171918e-01] [ 1.81053495e+00 -1.30572689e+00 -3.44987214e-01 -2.30839744e-01] [-2.79308510e+00 1.93752885e+00 3.66332024e-01 -1.04458940e+00]]] ms output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] ms output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] np output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] np output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] rank_5 pass the test x_real is: [[[ 1.6243454 -0.6117564 -0.5281718 -1.0729686 ] [ 0.86540765 -2.3015387 1.7448118 -0.7612069 ] [ 0.3190391 -0.24937038 1.4621079 -2.0601406 ] [-0.3224172 -0.38405436 1.1337694 -1.0998913 ]] [[-0.1724282 -0.8778584 0.04221375 0.58281523] [-1.1006192 1.1447237 0.9015907 0.50249434] [ 0.90085596 -0.68372786 -0.12289023 -0.93576944] [-0.26788807 0.53035545 -0.69166076 -0.39675352]] [[-0.6871727 -0.84520566 -0.6712461 -0.0126646 ] [-1.1173104 0.2344157 1.6598022 0.74204415] [-0.19183555 -0.887629 -0.7471583 1.6924546 ] [ 0.05080776 -0.6369957 0.19091548 2.1002553 ]] [[ 0.12015896 0.6172031 0.30017033 -0.35224986] [-1.1425182 -0.34934273 -0.20889424 0.5866232 ] [ 0.8389834 0.9311021 0.2855873 0.8851412 ] [-0.7543979 1.2528682 0.5129298 -0.29809284]]] x_imag is: [[[ 0.48851815 -0.07557172 1.1316293 1.5198169 ] [ 2.1855755 -1.3964963 -1.4441139 -0.5044659 ] [ 0.16003707 0.8761689 0.31563494 -2.0222013 ] [-0.30620402 0.8279746 0.23009473 0.7620112 ]] [[-0.22232814 -0.20075807 0.18656139 0.41005164] [ 0.19829972 0.11900865 -0.6706623 0.37756377] [ 0.12182127 1.1294839 1.1989179 0.18515642] [-0.37528494 -0.6387304 0.42349437 0.07734007]] [[-0.34385368 0.04359686 -0.62000084 0.698032 ] [-0.44712856 1.2245077 0.40349165 0.5935785 ] [-1.0949118 0.16938244 0.7405565 -0.9537006 ] [-0.2662185 0.03261455 -1.3731173 0.31515938]] [[ 0.84616065 -0.85951596 0.35054597 -1.3122834 ] [-0.03869551 -1.6157724 1.1214178 0.40890053] [-0.02461696 -0.7751616 1.2737559 1.9671017 ] [-1.8579819 1.236164 1.6276507 0.33801168]]] y_real is: [[[-1.199268 0.8633453 -0.1809203 -0.60392064] [-1.2300582 0.55053747 0.79280686 -0.62353075] [ 0.52057636 -1.1443413 0.80186105 0.0465673 ] [-0.18656977 -0.10174587 0.8688862 0.7504116 ]] [[ 0.5294653 0.13770121 0.07782113 0.61838025] [ 0.23249456 0.6825514 -0.31011677 -2.4348378 ] [ 1.0388246 2.1869795 0.44136444 -0.10015523] [-0.13644475 -0.11905419 0.01740941 -1.1220187 ]] [[-0.51709443 -0.9970268 0.24879916 -0.29664114] [ 0.49521133 -0.17470317 0.98633516 0.21353391] [ 2.1906998 -1.8963609 -0.6469167 0.9014869 ] [ 2.5283258 -0.24863477 0.04366899 -0.22631425]] [[ 1.3314571 -0.28730786 0.68006986 -0.3198016 ] [-1.2725588 0.31354773 0.5031848 1.2932259 ] [-0.11044703 -0.6173621 0.56276107 0.2407371 ] [ 0.28066507 -0.0731127 1.1603385 0.3694927 ]]] y_imag is: [[[ 1.90465868e+00 1.11105669e+00 6.59049809e-01 -1.62743831e+00] [ 6.02319300e-01 4.20282215e-01 8.10951650e-01 1.04444206e+00] [-4.00878191e-01 8.24005604e-01 -5.62305450e-01 1.95487809e+00] [-1.33195162e+00 -1.76068854e+00 -1.65072131e+00 -8.90555561e-01]] [[-1.11911535e+00 1.95607889e+00 -3.26499492e-01 -1.34267581e+00] [ 1.11438298e+00 -5.86523950e-01 -1.23685336e+00 8.75838935e-01] [ 6.23362184e-01 -4.34956670e-01 1.40753996e+00 1.29101574e-01] [ 1.61694956e+00 5.02740860e-01 1.55880558e+00 1.09402694e-01]] [[-1.21974444e+00 2.44936872e+00 -5.45774162e-01 -1.98837861e-01] [-7.00398505e-01 -2.03394443e-01 2.42669448e-01 2.01830178e-01] [ 6.61020279e-01 1.79215825e+00 -1.20464571e-01 -1.23312068e+00] [-1.18231809e+00 -6.65754497e-01 -1.67419577e+00 8.25029850e-01]] [[-4.98213559e-01 -3.10984969e-01 -1.89148285e-03 -1.39662039e+00] [-8.61316383e-01 6.74711525e-01 6.18539155e-01 -4.43171918e-01] [ 1.81053495e+00 -1.30572689e+00 -3.44987214e-01 -2.30839744e-01] [-2.79308510e+00 1.93752885e+00 3.66332024e-01 -1.04458940e+00]]] ms output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] ms output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] np output real part is: [[[-2.8784857e+00 -4.4419253e-01 -6.5024310e-01 3.1213961e+00] [-2.3809161e+00 -6.8016070e-01 2.5544052e+00 1.0015213e+00] [ 2.3023959e-01 -4.3660328e-01 1.3498906e+00 3.8572218e+00] [-3.4769565e-01 1.4968814e+00 1.3649389e+00 -1.4675790e-01]] [[-3.4010559e-01 2.7181643e-01 6.4197317e-02 9.1096783e-01] [-4.7686982e-01 8.5113418e-01 -1.1091093e+00 -1.5541773e+00] [ 8.5989255e-01 -1.0040224e+00 -1.7417642e+00 6.9818228e-02] [ 6.4336872e-01 2.5797483e-01 -6.7218679e-01 4.3670365e-01]] [[-6.4080447e-02 7.3590791e-01 -5.0538588e-01 1.4255203e-01] [-8.6647296e-01 2.0810489e-01 1.5392063e+00 3.8649529e-02] [ 3.0350482e-01 1.3797047e+00 5.7256001e-01 3.4969771e-01] [-1.8629640e-01 1.8009256e-01 -2.2905302e+00 -7.3533356e-01]] [[ 5.8155525e-01 -4.4462386e-01 2.0479986e-01 -1.7201117e+00] [ 1.4205924e+00 9.8064464e-01 -7.9875314e-01 9.3984950e-01] [-4.8093360e-02 -1.5869765e+00 6.0014689e-01 6.6717160e-01] [-5.4012351e+00 -2.4867041e+00 -1.0883808e-03 2.4294031e-01]]] np output imag part is: [[[ 2.5079594e+00 -7.4494052e-01 -5.5282623e-01 8.2834142e-01] [-2.1671333e+00 -1.7361193e+00 2.7005458e-01 -4.8048657e-01] [-4.4584304e-02 -1.2081189e+00 -5.6905591e-01 -4.1214924e+00] [ 4.8657250e-01 5.9195709e-01 -1.6716112e+00 1.5513363e+00]] [[ 7.5252019e-02 -1.7448049e+00 7.3565077e-04 -5.2896404e-01] [-1.1804078e+00 -5.9017831e-01 -9.0715194e-01 -4.7920248e-01] [ 6.8811047e-01 2.7675502e+00 3.5618681e-01 -1.3935369e-01] [-3.8195583e-01 3.4267491e-01 -1.0707920e+00 -1.3018291e-01]] [[ 1.0159800e+00 -2.1136875e+00 2.1209309e-01 -2.0454681e-01] [ 5.6113940e-01 -2.6160422e-01 8.0076128e-01 2.7651605e-01] [-2.5254302e+00 -1.9119818e+00 -3.8907224e-01 -2.9467494e+00] [-7.3315805e-01 4.1597360e-01 -3.7959254e-01 1.6614482e+00]] [[ 1.0667617e+00 5.5004805e-02 2.3782799e-01 9.1162968e-01] [ 1.0333118e+00 -7.4232733e-01 4.3507111e-01 2.6882580e-01] [ 1.5217277e+00 -7.3720962e-01 6.1829627e-01 2.6922858e-01] [ 1.5856271e+00 2.3370891e+00 2.0765285e+00 4.3627748e-01]]] rank_1 pass the test [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:13.836.908 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:13.837.085 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:58:13.837.180 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187742:281473391778832,MainProcess):2025-02-07-15:58:13.837.388 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:13.837.478 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:13.837.551 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:13.837.634 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:13.837.684 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2855] FinalizeCluster] Start finalize the cluster instance. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.840.595 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.840.706 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:58:13.840.809 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187818:281473870146576,MainProcess):2025-02-07-15:58:13.841.027 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.841.117 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.841.194 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.841.299 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.841.341 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2855] FinalizeCluster] Start finalize the cluster instance. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:13.855.300 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:13.855.390 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:58:13.855.479 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187834:281473691028496,MainProcess):2025-02-07-15:58:13.855.674 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:13.855.763 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:13.855.799 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:13.855.844 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:13.855.866 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2855] FinalizeCluster] Start finalize the cluster instance. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:13.892.177 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:13.892.273 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:58:13.892.358 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187753:281472872406032,MainProcess):2025-02-07-15:58:13.892.560 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:13.892.703 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:13.892.742 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:13.892.783 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:13.892.803 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2855] FinalizeCluster] Start finalize the cluster instance. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:13.924.964 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:13.925.029 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.925.610 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.925.710 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:58:13.925.425 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:58:13.925.824 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187775:281473807399952,MainProcess):2025-02-07-15:58:13.926.066 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] ME(187789:281473538169872,MainProcess):2025-02-07-15:58:13.926.072 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.926.161 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:13.926.171 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.926.201 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:13.926.207 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.926.255 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.926.275 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2855] FinalizeCluster] Start finalize the cluster instance. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:13.926.374 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:13.926.397 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2855] FinalizeCluster] Start finalize the cluster instance. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:13.929.172 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:13.929.229 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:58:13.929.306 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187764:281473227349008,MainProcess):2025-02-07-15:58:13.929.503 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:13.929.593 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:13.929.626 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:13.929.672 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:13.929.693 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2855] FinalizeCluster] Start finalize the cluster instance. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:13.936.111 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:13.936.207 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:58:13.936.325 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187803:281473162132496,MainProcess):2025-02-07-15:58:13.936.540 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:13.936.715 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:13.936.758 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:13.936.895 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:13.936.918 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2855] FinalizeCluster] Start finalize the cluster instance. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.948.443 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:132] Finalize] The compute graph node has been unregistered successfully. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.948.612 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187775,ffff3abbd0f0,python):2025-02-07-15:58:13.948.738 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.948.923 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.948.958 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187775,ffff3b3be0f0,python):2025-02-07-15:58:13.949.010 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.149 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.167 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.238 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187775,ffff39bbb0f0,python):2025-02-07-15:58:13.949.320 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.473 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.492 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187775,ffff3a3bc0f0,python):2025-02-07-15:58:13.949.537 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.676 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.692 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.713 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2858] FinalizeCluster] End finalize the cluster instance. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.949.731 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.950.141 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:13.950.172 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:220] Clear] Start finalizing tcp server and client for rpc actors. [INFO] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:13.950.188 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:230] Clear] End finalizing tcp server and client for rpc actors. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:13.950.258 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:13.950.273 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:13.967.717 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.968.250 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.968.287 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.970.475 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [DEBUG] RUNTIME_FRAMEWORK(187775,ffffba4dbc10,python):2025-02-07-15:58:13.970.876 [mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h:69] Clear] Graph kernel_graph_0 has already clear. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.970.930 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:35] ClearGraphResource] Clear device Ascend_3 graph 0 runtime resource [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.971.716 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.971.753 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.971.928 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.971.947 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.971.961 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187775,ffffba4dbc10,python):2025-02-07-15:58:13.972.038 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.972.305 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:13.972.339 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device Ascend_3 [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:13.979.519 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:13.979.595 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:13.999.301 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:132] Finalize] The compute graph node has been unregistered successfully. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.999.374 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:132] Finalize] The compute graph node has been unregistered successfully. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:13.999.410 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.999.480 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187742,ffff21f790f0,python):2025-02-07-15:58:13.999.506 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187818,ffff3e79b0f0,python):2025-02-07-15:58:13.999.590 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:13.999.686 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:13.999.709 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187742,ffff2277a0f0,python):2025-02-07-15:58:13.999.776 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.999.803 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:13.999.832 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:13.999.912 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187818,ffff3ef9c0f0,python):2025-02-07-15:58:13.999.895 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:13.999.930 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:13.999.990 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.040 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.063 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] DISTRIBUTED(187742,ffff20f770f0,python):2025-02-07-15:58:14.000.069 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.134 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187818,ffff3d7990f0,python):2025-02-07-15:58:14.000.203 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:14.000.230 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:14.000.251 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187742,ffff217780f0,python):2025-02-07-15:58:14.000.305 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.347 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.371 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187818,ffff3df9a0f0,python):2025-02-07-15:58:14.000.418 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:14.000.453 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:14.000.472 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.000.495 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2858] FinalizeCluster] End finalize the cluster instance. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.000.513 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.545 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.566 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.595 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2858] FinalizeCluster] End finalize the cluster instance. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.000.652 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.000.973 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:14.001.008 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:220] Clear] Start finalizing tcp server and client for rpc actors. [INFO] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:14.001.025 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:230] Clear] End finalizing tcp server and client for rpc actors. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.001.067 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:14.001.085 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:14.001.102 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.001.097 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:220] Clear] Start finalizing tcp server and client for rpc actors. [INFO] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.001.118 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:230] Clear] End finalizing tcp server and client for rpc actors. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.001.176 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.001.194 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:14.016.272 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.016.808 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.016.848 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.018.881 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.018.983 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [DEBUG] RUNTIME_FRAMEWORK(187742,ffffa187dc10,python):2025-02-07-15:58:14.019.242 [mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h:69] Clear] Graph kernel_graph_0 has already clear. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:14.019.313 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:35] ClearGraphResource] Clear device Ascend_0 graph 0 runtime resource [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.019.465 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.019.500 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.020.074 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.020.116 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.020.288 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.020.307 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.020.322 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187742,ffffa187dc10,python):2025-02-07-15:58:14.020.400 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:14.020.729 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:14.020.766 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device Ascend_0 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.021.695 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [DEBUG] RUNTIME_FRAMEWORK(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.022.055 [mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h:69] Clear] Graph kernel_graph_0 has already clear. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.022.102 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:35] ClearGraphResource] Clear device Ascend_6 graph 0 runtime resource [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.022.856 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.022.892 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.023.058 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.023.082 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.023.100 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.023.173 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.023.448 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.023.477 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device Ascend_6 [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:14.029.784 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:14.029.874 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.032.087 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.032.179 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.116.504 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:132] Finalize] The compute graph node has been unregistered successfully. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.116.619 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187764,ffff13fff0f0,python):2025-02-07-15:58:14.116.747 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.116.891 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.116.919 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187764,ffff18aae0f0,python):2025-02-07-15:58:14.116.971 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.117.104 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.117.125 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.117.197 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187764,ffff12ffd0f0,python):2025-02-07-15:58:14.117.290 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.117.455 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.117.487 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187764,ffff137fe0f0,python):2025-02-07-15:58:14.117.539 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.117.666 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:14.117.686 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.117.711 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2858] FinalizeCluster] End finalize the cluster instance. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.117.731 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.118.108 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:14.118.137 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:220] Clear] Start finalizing tcp server and client for rpc actors. [INFO] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:14.118.157 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:230] Clear] End finalizing tcp server and client for rpc actors. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:14.118.214 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:14.118.234 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:14.134.887 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.135.333 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.135.367 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.137.366 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [DEBUG] RUNTIME_FRAMEWORK(187764,ffff97badc10,python):2025-02-07-15:58:14.137.733 [mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h:69] Clear] Graph kernel_graph_0 has already clear. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:14.137.793 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:35] ClearGraphResource] Clear device Ascend_2 graph 0 runtime resource [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.138.527 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.138.560 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.138.721 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.138.741 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.138.759 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187764,ffff97badc10,python):2025-02-07-15:58:14.138.822 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:14.139.096 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:14.139.139 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device Ascend_2 [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:14.145.837 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:14.145.902 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.172.755 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:132] Finalize] The compute graph node has been unregistered successfully. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.172.946 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187789,ffff2aafd0f0,python):2025-02-07-15:58:14.173.103 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.173.327 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.173.371 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187789,ffff2b2fe0f0,python):2025-02-07-15:58:14.173.457 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.173.635 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.173.659 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.173.743 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187789,ffff29afb0f0,python):2025-02-07-15:58:14.173.839 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.005 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.032 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187789,ffff2a2fc0f0,python):2025-02-07-15:58:14.174.091 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.255 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.281 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.310 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2858] FinalizeCluster] End finalize the cluster instance. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.330 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.748 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.779 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:220] Clear] Start finalizing tcp server and client for rpc actors. [INFO] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.799 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:230] Clear] End finalizing tcp server and client for rpc actors. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.919 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:14.174.938 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:14.209.790 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.210.887 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.210.924 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.217.580 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [DEBUG] RUNTIME_FRAMEWORK(187789,ffffaa419c10,python):2025-02-07-15:58:14.218.195 [mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h:69] Clear] Graph kernel_graph_0 has already clear. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:14.218.259 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:35] ClearGraphResource] Clear device Ascend_4 graph 0 runtime resource [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.219.641 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.219.674 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.219.998 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.220.021 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.220.040 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187789,ffffaa419c10,python):2025-02-07-15:58:14.220.134 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:14.220.568 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:14.220.598 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device Ascend_4 [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:14.228.108 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:14.228.177 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.287.857 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:132] Finalize] The compute graph node has been unregistered successfully. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.287.968 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187753,ffff0302a0f0,python):2025-02-07-15:58:14.288.071 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.288.242 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.288.266 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187753,ffff0382b0f0,python):2025-02-07-15:58:14.288.326 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.288.467 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.288.484 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.288.554 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187753,ffff020280f0,python):2025-02-07-15:58:14.288.641 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.288.800 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.288.822 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187753,ffff028290f0,python):2025-02-07-15:58:14.288.876 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.044 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.062 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.086 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2858] FinalizeCluster] End finalize the cluster instance. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.105 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.481 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.512 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:220] Clear] Start finalizing tcp server and client for rpc actors. [INFO] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.528 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:230] Clear] End finalizing tcp server and client for rpc actors. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.596 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:14.289.611 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:14.307.458 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.307.982 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.308.020 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.310.132 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.310.326 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:132] Finalize] The compute graph node has been unregistered successfully. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.310.476 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [DEBUG] RUNTIME_FRAMEWORK(187753,ffff8292dc10,python):2025-02-07-15:58:14.310.540 [mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h:69] Clear] Graph kernel_graph_0 has already clear. [INFO] DISTRIBUTED(187834,ffff33cd30f0,python):2025-02-07-15:58:14.310.568 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:14.310.616 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:35] ClearGraphResource] Clear device Ascend_1 graph 0 runtime resource [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.310.732 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.310.760 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187834,ffff344d40f0,python):2025-02-07-15:58:14.310.808 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.310.937 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.310.956 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.022 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187834,ffff32cd10f0,python):2025-02-07-15:58:14.311.096 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.227 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.249 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187834,ffff334d20f0,python):2025-02-07-15:58:14.311.298 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.311.407 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.440 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.311.447 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.459 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.483 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2858] FinalizeCluster] End finalize the cluster instance. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.501 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.311.767 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.311.791 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.311.807 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187753,ffff8292dc10,python):2025-02-07-15:58:14.311.883 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.884 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.936 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:220] Clear] Start finalizing tcp server and client for rpc actors. [INFO] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:14.311.954 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:230] Clear] End finalizing tcp server and client for rpc actors. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:14.312.020 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:14.312.037 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:14.312.164 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:14.312.198 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device Ascend_1 [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:14.320.029 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:14.320.112 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:14.329.384 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.329.893 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.329.930 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.331.783 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [DEBUG] RUNTIME_FRAMEWORK(187834,ffffb35e0c10,python):2025-02-07-15:58:14.332.206 [mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h:69] Clear] Graph kernel_graph_0 has already clear. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.332.260 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:35] ClearGraphResource] Clear device Ascend_7 graph 0 runtime resource [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.333.127 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.333.166 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.333.338 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.333.360 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.333.376 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187834,ffffb35e0c10,python):2025-02-07-15:58:14.333.451 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.333.722 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:14.333.754 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device Ascend_7 [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.338.176 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:132] Finalize] The compute graph node has been unregistered successfully. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.338.295 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187803,ffff0ffff0f0,python):2025-02-07-15:58:14.338.383 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.338.578 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.338.605 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187803,ffff14c6f0f0,python):2025-02-07-15:58:14.338.656 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.338.791 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.338.811 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.338.887 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:563] Finalize] Delete send event loop [INFO] DISTRIBUTED(187803,ffff0effd0f0,python):2025-02-07-15:58:14.338.963 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.119 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.146 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:570] Finalize] Delete recv event loop [INFO] DISTRIBUTED(187803,ffff0f7fe0f0,python):2025-02-07-15:58:14.339.212 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:82] EventLoopRun] Event epoll loop run end [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.375 [mindspore/ccsrc/distributed/rpc/tcp/event_loop.cc:210] Finalize] Stop loop succ [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.397 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:584] Finalize] Delete connection pool. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.422 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2858] FinalizeCluster] End finalize the cluster instance. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.444 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.826 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.861 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:220] Clear] Start finalizing tcp server and client for rpc actors. [INFO] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.879 [mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.cc:230] Clear] End finalizing tcp server and client for rpc actors. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.955 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:14.339.976 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:14.341.090 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.341.171 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:14.357.535 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.358.069 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.358.111 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.360.195 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [DEBUG] RUNTIME_FRAMEWORK(187803,ffff93d7bc10,python):2025-02-07-15:58:14.360.624 [mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h:69] Clear] Graph kernel_graph_0 has already clear. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.360.697 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:35] ClearGraphResource] Clear device Ascend_5 graph 0 runtime resource [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.361.525 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.361.566 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.361.758 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.361.782 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.361.800 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187803,ffff93d7bc10,python):2025-02-07-15:58:14.361.883 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.362.155 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:14.362.189 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device Ascend_5 [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:14.369.441 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.369.518 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:14.731.870 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:42] ~HcclWatchDogHandler] HcclWatchDogHandler thread exit. global rank id: 0 local rank id: 0, global rank size: 8 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:14.802.582 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:42] ~HcclWatchDogHandler] HcclWatchDogHandler thread exit. global rank id: 6 local rank id: 6, global rank size: 8 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:14.834.499 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:42] ~HcclWatchDogHandler] HcclWatchDogHandler thread exit. global rank id: 3 local rank id: 3, global rank size: 8 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:14.893.818 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:42] ~HcclWatchDogHandler] HcclWatchDogHandler thread exit. global rank id: 7 local rank id: 7, global rank size: 8 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:14.913.120 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:42] ~HcclWatchDogHandler] HcclWatchDogHandler thread exit. global rank id: 4 local rank id: 4, global rank size: 8 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:14.918.320 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:42] ~HcclWatchDogHandler] HcclWatchDogHandler thread exit. global rank id: 2 local rank id: 2, global rank size: 8 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:14.935.416 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:42] ~HcclWatchDogHandler] HcclWatchDogHandler thread exit. global rank id: 5 local rank id: 5, global rank size: 8 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:14.937.340 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/hccl_watch_dog_thread.cc:42] ~HcclWatchDogHandler] HcclWatchDogHandler thread exit. global rank id: 1 local rank id: 1, global rank size: 8 [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:16.205.181 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:300] FinalizeHccl] Start destroy hccl adapter for GE_MODE [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:16.205.226 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:687] FinalizeHcclExec] Start finalize hccl exec. [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:16.211.824 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:695] FinalizeHcclExec] HcclExec destroy success [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:16.211.862 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:497] FinalizeKernelInfoStore] Start destroy hccl kernel info store. [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:16.211.935 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:519] FinalizeKernelInfoStore] Destroy hccl kernel info store success. [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:16.211.953 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:576] FinalizeHcclComm] Start finalize hccl comm. [INFO] HCCL_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.462 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:313] FinalizeHccl] Destroy hccl adapter success. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.484 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:745] DestroyHccl] Hccl destroy successful. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.524 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Common mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1023M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1023M actual size: 0M. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.547 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Persistent mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1024M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1024M actual size: 0M. [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.566 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.583 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 520704 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.597 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 520704 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.612 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 520704 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:16.212.643 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 520704 [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.283.128 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:300] FinalizeHccl] Start destroy hccl adapter for GE_MODE [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.283.171 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:687] FinalizeHcclExec] Start finalize hccl exec. [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:16.283.167 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:300] FinalizeHccl] Start destroy hccl adapter for GE_MODE [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:16.283.210 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:687] FinalizeHcclExec] Start finalize hccl exec. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:16.283.407 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:300] FinalizeHccl] Start destroy hccl adapter for GE_MODE [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:16.283.453 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:687] FinalizeHcclExec] Start finalize hccl exec. [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.283.494 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:300] FinalizeHccl] Start destroy hccl adapter for GE_MODE [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.283.538 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:687] FinalizeHcclExec] Start finalize hccl exec. [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.283.539 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:300] FinalizeHccl] Start destroy hccl adapter for GE_MODE [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.283.582 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:687] FinalizeHcclExec] Start finalize hccl exec. [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.283.614 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:300] FinalizeHccl] Start destroy hccl adapter for GE_MODE [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.283.663 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:687] FinalizeHcclExec] Start finalize hccl exec. [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:16.283.937 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:300] FinalizeHccl] Start destroy hccl adapter for GE_MODE [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:16.283.978 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:687] FinalizeHcclExec] Start finalize hccl exec. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:16.294.351 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:695] FinalizeHcclExec] HcclExec destroy success [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:16.294.386 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:497] FinalizeKernelInfoStore] Start destroy hccl kernel info store. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:16.294.477 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:519] FinalizeKernelInfoStore] Destroy hccl kernel info store success. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:16.294.499 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:576] FinalizeHcclComm] Start finalize hccl comm. [INFO] HCCL_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:16.295.153 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:313] FinalizeHccl] Destroy hccl adapter success. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:16.295.178 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:745] DestroyHccl] Hccl destroy successful. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:16.295.221 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Common mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1023M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1023M actual size: 0M. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:16.295.246 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Persistent mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1024M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1024M actual size: 0M. [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:16.295.267 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:16.295.287 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 520704 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:16.295.304 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 520704 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:16.295.338 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 520704 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:16.295.356 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 520704 [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.295.461 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:695] FinalizeHcclExec] HcclExec destroy success [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.295.500 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:497] FinalizeKernelInfoStore] Start destroy hccl kernel info store. [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.295.589 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:519] FinalizeKernelInfoStore] Destroy hccl kernel info store success. [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.295.611 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:576] FinalizeHcclComm] Start finalize hccl comm. [INFO] HCCL_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.262 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:313] FinalizeHccl] Destroy hccl adapter success. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.288 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:745] DestroyHccl] Hccl destroy successful. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.332 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Common mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1023M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1023M actual size: 0M. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.367 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Persistent mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1024M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1024M actual size: 0M. [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.392 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.412 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521216 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.429 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521216 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.449 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521216 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:16.296.466 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521216 [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.297.589 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:695] FinalizeHcclExec] HcclExec destroy success [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.297.627 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:497] FinalizeKernelInfoStore] Start destroy hccl kernel info store. [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.297.712 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:519] FinalizeKernelInfoStore] Destroy hccl kernel info store success. [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.297.734 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:576] FinalizeHcclComm] Start finalize hccl comm. [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:16.297.940 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:695] FinalizeHcclExec] HcclExec destroy success [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:16.297.979 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:497] FinalizeKernelInfoStore] Start destroy hccl kernel info store. [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.062 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:519] FinalizeKernelInfoStore] Destroy hccl kernel info store success. [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.080 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:576] FinalizeHcclComm] Start finalize hccl comm. [INFO] HCCL_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.380 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:313] FinalizeHccl] Destroy hccl adapter success. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.405 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:745] DestroyHccl] Hccl destroy successful. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.450 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Common mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1023M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1023M actual size: 0M. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.474 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Persistent mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1024M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1024M actual size: 0M. [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.496 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.513 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521728 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.529 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521728 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.545 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521728 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:16.298.560 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521728 [INFO] HCCL_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.709 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:313] FinalizeHccl] Destroy hccl adapter success. [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.298.696 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:695] FinalizeHcclExec] HcclExec destroy success [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.734 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:745] DestroyHccl] Hccl destroy successful. [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.298.734 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:497] FinalizeKernelInfoStore] Start destroy hccl kernel info store. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.772 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Common mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1023M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1023M actual size: 0M. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.796 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Persistent mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1024M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1024M actual size: 0M. [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.815 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.298.815 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:519] FinalizeKernelInfoStore] Destroy hccl kernel info store success. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.831 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 520704 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.846 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 520704 [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.298.837 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:576] FinalizeHcclComm] Start finalize hccl comm. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.860 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 520704 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:16.298.888 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 520704 [INFO] HCCL_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.445 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:313] FinalizeHccl] Destroy hccl adapter success. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.469 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:745] DestroyHccl] Hccl destroy successful. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.509 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Common mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1023M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1023M actual size: 0M. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.534 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Persistent mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1024M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1024M actual size: 0M. [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.557 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.576 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521728 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.594 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521728 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.613 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521728 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:16.299.630 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521728 [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.300.578 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:695] FinalizeHcclExec] HcclExec destroy success [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.300.614 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:497] FinalizeKernelInfoStore] Start destroy hccl kernel info store. [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.300.707 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:519] FinalizeKernelInfoStore] Destroy hccl kernel info store success. [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.300.728 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:576] FinalizeHcclComm] Start finalize hccl comm. [INFO] HCCL_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.376 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:313] FinalizeHccl] Destroy hccl adapter success. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.398 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:745] DestroyHccl] Hccl destroy successful. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.444 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Common mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1023M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1023M actual size: 0M. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.466 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Persistent mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1024M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1024M actual size: 0M. [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.486 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.502 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521216 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.517 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521216 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.532 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521216 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:16.301.546 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521216 [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:16.301.655 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:695] FinalizeHcclExec] HcclExec destroy success [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:16.301.689 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:497] FinalizeKernelInfoStore] Start destroy hccl kernel info store. [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:16.301.796 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:519] FinalizeKernelInfoStore] Destroy hccl kernel info store success. [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:16.301.819 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:576] FinalizeHcclComm] Start finalize hccl comm. [INFO] HCCL_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.367 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:313] FinalizeHccl] Destroy hccl adapter success. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.390 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:745] DestroyHccl] Hccl destroy successful. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.430 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Common mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1023M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1023M actual size: 0M. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.456 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1194] operator()] Persistent mem pool info: Total allocated mem:1024M, peak used mem:0M, in used mem:0M, total use by event mem:0M, total idle mem:1024M. Block unit size:1024M, block counts:1, block[0] stream id:0 block size:1024M idle size:1024M actual size: 0M. [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.490 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.508 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521216 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.526 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521216 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.546 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:193] FreeDeviceMem] Max actual used memory size is 521216 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:16.302.562 [mindspore/ccsrc/plugin/device/ascend/hal/device/abstract_ascend_memory_pool_support.cc:196] FreeDeviceMem] Max peak used memory size is 521216 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.814.103 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:217] DeInitialize] Ascend Memory Adapter deinitialize success, statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Used peak memory usage (without fragments): 0M Actual peak memory usage (with fragments): 0M [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.829.077 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x4a429b30. [INFO] COMMON(187834,ffff31bd50f0,python):2025-02-07-15:58:17.829.176 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:174] callback_thread_func] Exit callback thread loop. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.831.347 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x4a615c00. [WARNING] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.831.386 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4a615c00 is not exist. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.832.961 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:217] DeInitialize] Ascend Memory Adapter deinitialize success, statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Used peak memory usage (without fragments): 0M Actual peak memory usage (with fragments): 0M [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.833.548 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x5cf6c6d0. [WARNING] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.833.585 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x5cf6c6d0 is not exist. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.835.504 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x5d097c00. [WARNING] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.835.540 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x5d097c00 is not exist. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.837.483 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x5d1c3140. [WARNING] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.837.520 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x5d1c3140 is not exist. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.837.590 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:278] ReleaseDeviceRes] Ascend finalize end [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:17.837.626 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:17.837.649 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:17.837.675 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:268] DeleteGraphRunner] Delete GraphRunner success [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:17.839.135 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:239] DeleteGeSession] Delete Ge Session success [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.865.293 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x20a1d080. [INFO] COMMON(187764,ffff11ffb0f0,python):2025-02-07-15:58:17.865.419 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:174] callback_thread_func] Exit callback thread loop. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.866.163 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:217] DeInitialize] Ascend Memory Adapter deinitialize success, statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Used peak memory usage (without fragments): 0M Actual peak memory usage (with fragments): 0M [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.868.392 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x20c08f90. [WARNING] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.868.434 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x20c08f90 is not exist. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.871.164 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x3355eb60. [WARNING] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.871.206 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3355eb60 is not exist. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.874.315 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x3368a000. [WARNING] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.874.355 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3368a000 is not exist. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.877.266 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x337b5540. [WARNING] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.877.306 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x337b5540 is not exist. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.877.470 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:278] ReleaseDeviceRes] Ascend finalize end [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:17.877.513 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:17.877.539 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:17.877.568 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:268] DeleteGraphRunner] Delete GraphRunner success [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:17.879.843 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:239] DeleteGeSession] Delete Ge Session success [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.889.787 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x367ede00. [INFO] COMMON(187803,ffff0dffb0f0,python):2025-02-07-15:58:17.889.889 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:174] callback_thread_func] Exit callback thread loop. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.892.230 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x369dbe90. [WARNING] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.892.265 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x369dbe90 is not exist. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.894.635 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x49334800. [WARNING] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.894.668 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x49334800 is not exist. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.897.386 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x4945fc90. [WARNING] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.897.420 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4945fc90 is not exist. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.899.626 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x4958b1d0. [WARNING] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.899.674 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4958b1d0 is not exist. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.899.742 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:278] ReleaseDeviceRes] Ascend finalize end [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:17.899.776 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:17.899.800 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:17.899.827 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:268] DeleteGraphRunner] Delete GraphRunner success [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:17.901.358 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:239] DeleteGeSession] Delete Ge Session success [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:18.130.083 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PRE_ACT(187834,ffffb35e0c10,python):2025-02-07-15:58:18.130.153 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:18.130.183 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:18.130.220 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:117] CloseTsd] Start to close tsd, ref = 1 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:18.130.245 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel _npu_log begins the destruction process. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:18.632.934 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PRE_ACT(187764,ffff97badc10,python):2025-02-07-15:58:18.633.001 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:18.633.033 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:18.633.058 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:117] CloseTsd] Start to close tsd, ref = 1 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:18.633.084 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel _npu_log begins the destruction process. [INFO] DEVICE(187834,fffee3fff0f0,python):2025-02-07-15:58:18.637.182 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:18.637.910 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_histogram_summary channel is being destroyed. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:18.637.951 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_histogram_summary begins the destruction process. [INFO] DEVICE(187764,fffecceda0f0,python):2025-02-07-15:58:18.989.369 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:18.990.106 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_histogram_summary channel is being destroyed. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:18.990.140 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_histogram_summary begins the destruction process. [INFO] DEVICE(187764,fffec67fc0f0,python):2025-02-07-15:58:18.992.927 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187834,fffeca7fc0f0,python):2025-02-07-15:58:19.057.078 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187834,fffecb7fe0f0,python):2025-02-07-15:58:19.057.144 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.057.748 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_scalar_summary channel is being destroyed. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.057.778 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_scalar_summary begins the destruction process. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.058.250 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_image_summary channel is being destroyed. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.058.273 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_image_summary begins the destruction process. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:19.130.356 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PRE_ACT(187803,ffff93d7bc10,python):2025-02-07-15:58:19.130.439 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:19.130.471 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:19.130.499 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:117] CloseTsd] Start to close tsd, ref = 1 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:19.130.528 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel _npu_log begins the destruction process. [INFO] DEVICE(187764,fffec57fa0f0,python):2025-02-07-15:58:19.212.928 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187834,fffee2ffd0f0,python):2025-02-07-15:58:19.469.048 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187834,fffee0ff90f0,python):2025-02-07-15:58:19.469.085 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.469.680 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_summary channel is being destroyed. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.469.707 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_summary begins the destruction process. [INFO] DEVICE(187834,fffee1ffb0f0,python):2025-02-07-15:58:19.814.680 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.340 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_dump channel is being destroyed. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.367 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_dump begins the destruction process. [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.803 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:135] CloseTsd] Call aclrtResetDevice, destroy and close tsd successful, ret[0] [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.836 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_0 [INFO] ME(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.870 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_7 [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.912 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.931 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.948 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:25] ClearRuntimeResource] Release device Ascend_7 [INFO] DEVICE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.966 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.815.993 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.009 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.034 [mindspore/ccsrc/distributed/collective/collective_manager.cc:499] Finalize] Begin finalize collective manager. [INFO] DISTRIBUTED(187834,fffee27fc0f0,python):2025-02-07-15:58:19.816.159 [mindspore/ccsrc/distributed/collective/collective_manager.cc:473] operator()] Start finalizing host communication lib. [INFO] DISTRIBUTED(187834,fffee27fc0f0,python):2025-02-07-15:58:19.816.188 [mindspore/ccsrc/distributed/collective/collective_manager.cc:477] operator()] End finalizing host communication lib. [INFO] DISTRIBUTED(187834,fffee27fc0f0,python):2025-02-07-15:58:19.816.205 [mindspore/ccsrc/distributed/collective/collective_manager.cc:482] operator()] Start finalizing device communication lib. [INFO] DISTRIBUTED(187834,fffee27fc0f0,python):2025-02-07-15:58:19.816.220 [mindspore/ccsrc/distributed/collective/collective_manager.cc:486] operator()] End finalizing device communication lib. [INFO] DISTRIBUTED(187834,fffea6ffd0f0,python):2025-02-07-15:58:19.816.273 [mindspore/ccsrc/distributed/collective/collective_manager.cc:910] RunInitCommTasks] Initialize communciator thread is stopped. [INFO] DISTRIBUTED(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.418 [mindspore/ccsrc/distributed/collective/collective_manager.cc:507] Finalize] End finalize collective manager. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.437 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.452 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.468 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.483 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.499 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.513 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.707 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.742 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.800 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.826 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.842 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.861 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.876 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.891 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.906 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.922 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.936 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.816.987 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.817.002 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.817.296 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.817.316 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.817.333 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.817.349 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187834,ffffb35e0c10,python):2025-02-07-15:58:19.817.518 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. [INFO] DEVICE(187764,fffea6ffd0f0,python):2025-02-07-15:58:19.821.234 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187764,fffec77fe0f0,python):2025-02-07-15:58:19.821.281 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.821.832 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_scalar_summary channel is being destroyed. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.821.862 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_scalar_summary begins the destruction process. [INFO] DEVICE(187764,fffea7fff0f0,python):2025-02-07-15:58:19.824.954 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.825.532 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_image_summary channel is being destroyed. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.825.564 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_image_summary begins the destruction process. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.826.017 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_summary channel is being destroyed. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.826.050 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_summary begins the destruction process. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.826.468 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_dump channel is being destroyed. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.826.494 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_dump begins the destruction process. [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.826.934 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:135] CloseTsd] Call aclrtResetDevice, destroy and close tsd successful, ret[0] [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:19.826.967 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_0 [INFO] ME(187764,ffff97badc10,python):2025-02-07-15:58:19.827.039 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_2 [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.082 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.103 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.124 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:25] ClearRuntimeResource] Release device Ascend_2 [INFO] DEVICE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.143 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.169 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.188 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:19.827.221 [mindspore/ccsrc/distributed/collective/collective_manager.cc:499] Finalize] Begin finalize collective manager. [INFO] DISTRIBUTED(187764,fffec6ffd0f0,python):2025-02-07-15:58:19.827.319 [mindspore/ccsrc/distributed/collective/collective_manager.cc:473] operator()] Start finalizing host communication lib. [INFO] DISTRIBUTED(187764,fffec6ffd0f0,python):2025-02-07-15:58:19.827.350 [mindspore/ccsrc/distributed/collective/collective_manager.cc:477] operator()] End finalizing host communication lib. [INFO] DISTRIBUTED(187764,fffec6ffd0f0,python):2025-02-07-15:58:19.827.370 [mindspore/ccsrc/distributed/collective/collective_manager.cc:482] operator()] Start finalizing device communication lib. [INFO] DISTRIBUTED(187764,fffec6ffd0f0,python):2025-02-07-15:58:19.827.391 [mindspore/ccsrc/distributed/collective/collective_manager.cc:486] operator()] End finalizing device communication lib. [INFO] DISTRIBUTED(187764,fffe8b7fe0f0,python):2025-02-07-15:58:19.827.449 [mindspore/ccsrc/distributed/collective/collective_manager.cc:910] RunInitCommTasks] Initialize communciator thread is stopped. [INFO] DISTRIBUTED(187764,ffff97badc10,python):2025-02-07-15:58:19.827.600 [mindspore/ccsrc/distributed/collective/collective_manager.cc:507] Finalize] End finalize collective manager. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.620 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.638 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.669 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.687 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.707 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.725 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.887 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187764,ffff97badc10,python):2025-02-07-15:58:19.827.922 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.971 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.827.990 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.007 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.029 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.046 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.065 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.081 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.099 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.116 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.157 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.174 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.429 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.450 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.472 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.490 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187764,ffff97badc10,python):2025-02-07-15:58:19.828.681 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. [INFO] DEVICE(187803,fffec8eda0f0,python):2025-02-07-15:58:19.834.882 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:19.835.781 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_histogram_summary channel is being destroyed. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:19.835.815 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_histogram_summary begins the destruction process. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.092.969 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:217] DeInitialize] Ascend Memory Adapter deinitialize success, statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30682M MindSpore memory base address: 0x12c100000000 Used peak memory usage (without fragments): 0M Actual peak memory usage (with fragments): 0M [INFO] DEVICE(187803,fffec17fa0f0,python):2025-02-07-15:58:20.113.208 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187803,fffea3fff0f0,python):2025-02-07-15:58:20.113.271 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187803,fffea2ffd0f0,python):2025-02-07-15:58:20.113.343 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.113.463 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x2615e5e0. [INFO] COMMON(187775,ffff38ad20f0,python):2025-02-07-15:58:20.113.562 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:174] callback_thread_func] Exit callback thread loop. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.114.057 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_scalar_summary channel is being destroyed. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.114.104 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_scalar_summary begins the destruction process. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.114.588 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_image_summary channel is being destroyed. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.114.612 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_image_summary begins the destruction process. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.115.003 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_summary channel is being destroyed. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.115.028 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_summary begins the destruction process. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.115.724 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x2634a490. [WARNING] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.115.758 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2634a490 is not exist. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.118.003 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x38ca1a20. [WARNING] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.118.037 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x38ca1a20 is not exist. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.120.041 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x38dccf00. [WARNING] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.120.082 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x38dccf00 is not exist. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.122.156 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x38ef8440. [WARNING] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.122.188 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x38ef8440 is not exist. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.122.253 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:278] ReleaseDeviceRes] Ascend finalize end [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:20.122.280 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.122.299 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:20.122.321 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:268] DeleteGraphRunner] Delete GraphRunner success [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:20.123.833 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:239] DeleteGeSession] Delete Ge Session success [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.185.854 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:217] DeInitialize] Ascend Memory Adapter deinitialize success, statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Used peak memory usage (without fragments): 0M Actual peak memory usage (with fragments): 0M [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.208.593 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:217] DeInitialize] Ascend Memory Adapter deinitialize success, statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30680M MindSpore memory base address: 0x12c100000000 Used peak memory usage (without fragments): 0M Actual peak memory usage (with fragments): 0M [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.225.208 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x2158b480. [INFO] COMMON(187789,ffff28a120f0,python):2025-02-07-15:58:20.225.369 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:174] callback_thread_func] Exit callback thread loop. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.229.994 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x21779070. [WARNING] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.230.031 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x21779070 is not exist. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.231.079 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:217] DeInitialize] Ascend Memory Adapter deinitialize success, statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30676M MindSpore memory base address: 0x12c100000000 Used peak memory usage (without fragments): 0M Actual peak memory usage (with fragments): 0M [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.233.894 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x2a27bc70. [INFO] COMMON(187818,ffff1ffff0f0,python):2025-02-07-15:58:20.234.014 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:174] callback_thread_func] Exit callback thread loop. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.235.128 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x340cff20. [WARNING] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.235.162 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x340cff20 is not exist. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.237.128 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x2a467fd0. [WARNING] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.237.163 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2a467fd0 is not exist. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.239.117 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x341fb3d0. [WARNING] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.239.153 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x341fb3d0 is not exist. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.239.365 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x3cdbda00. [WARNING] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.239.396 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3cdbda00 is not exist. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.242.302 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x3cee8eb0. [WARNING] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.242.335 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3cee8eb0 is not exist. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.242.727 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x34326910. [WARNING] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.242.760 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x34326910 is not exist. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.242.862 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:278] ReleaseDeviceRes] Ascend finalize end [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:20.242.902 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:20.242.928 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:20.242.955 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:268] DeleteGraphRunner] Delete GraphRunner success [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:20.245.163 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:239] DeleteGeSession] Delete Ge Session success [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.245.293 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x3d0143f0. [WARNING] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.245.325 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3d0143f0 is not exist. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.245.392 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:278] ReleaseDeviceRes] Ascend finalize end [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.245.428 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.245.452 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.245.477 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:268] DeleteGraphRunner] Delete GraphRunner success [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:20.246.861 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:239] DeleteGeSession] Delete Ge Session success [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.255.811 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x2d36d880. [INFO] COMMON(187742,ffff137fe0f0,python):2025-02-07-15:58:20.255.934 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:174] callback_thread_func] Exit callback thread loop. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.258.626 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x2d55a940. [WARNING] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.258.669 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2d55a940 is not exist. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.262.630 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x3feaeb30. [WARNING] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.262.673 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3feaeb30 is not exist. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.265.373 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:217] DeInitialize] Ascend Memory Adapter deinitialize success, statistics: Device MOC memory size: 32768M MindSpore Used memory size: 30678M MindSpore memory base address: 0x12c100000000 Used peak memory usage (without fragments): 0M Actual peak memory usage (with fragments): 0M [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.266.672 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x3ffda050. [WARNING] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.266.714 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3ffda050 is not exist. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.271.108 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x40105590. [WARNING] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.271.148 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x40105590 is not exist. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.271.217 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:278] ReleaseDeviceRes] Ascend finalize end [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:20.271.258 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.271.282 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:20.271.307 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:268] DeleteGraphRunner] Delete GraphRunner success [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:20.272.824 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:239] DeleteGeSession] Delete Ge Session success [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.296.834 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x1f655490. [INFO] COMMON(187753,ffff00f2c0f0,python):2025-02-07-15:58:20.296.938 [mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc:174] callback_thread_func] Exit callback thread loop. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.300.953 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x1f8431a0. [WARNING] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.300.993 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x1f8431a0 is not exist. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.305.217 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x3219a300. [WARNING] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.305.253 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3219a300 is not exist. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.309.684 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x322c57a0. [WARNING] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.309.720 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x322c57a0 is not exist. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.314.039 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:141] UnRegCallback] Unregister callback thread, stream : 0x323f0ce0. [WARNING] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.314.075 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x323f0ce0 is not exist. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.314.141 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:278] ReleaseDeviceRes] Ascend finalize end [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:20.314.171 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.314.191 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:20.314.214 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:268] DeleteGraphRunner] Delete GraphRunner success [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:20.315.731 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:239] DeleteGeSession] Delete Ge Session success [INFO] DEVICE(187803,fffec27fc0f0,python):2025-02-07-15:58:20.337.155 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.337.864 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_dump channel is being destroyed. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.337.899 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_dump begins the destruction process. [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:20.565.540 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:265] DeleteGraphRunner] GraphRunner is not exist [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:20.565.629 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:229] DeleteGeSession] Ge Session is not exist [INFO] GE_ADPT(187834,ffffb35e0c10,python):2025-02-07-15:58:20.565.648 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:20.568.031 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:265] DeleteGraphRunner] GraphRunner is not exist [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:20.568.083 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:229] DeleteGeSession] Ge Session is not exist [INFO] GE_ADPT(187764,ffff97badc10,python):2025-02-07-15:58:20.568.111 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187803,fffec37fe0f0,python):2025-02-07-15:58:20.625.029 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.594 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:135] CloseTsd] Call aclrtResetDevice, destroy and close tsd successful, ret[0] [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.632 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_0 [INFO] ME(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.658 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_5 [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.708 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.728 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.748 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:25] ClearRuntimeResource] Release device Ascend_5 [INFO] DEVICE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.766 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.795 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.812 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:20.625.845 [mindspore/ccsrc/distributed/collective/collective_manager.cc:499] Finalize] Begin finalize collective manager. [INFO] DISTRIBUTED(187803,fffec2ffd0f0,python):2025-02-07-15:58:20.625.963 [mindspore/ccsrc/distributed/collective/collective_manager.cc:473] operator()] Start finalizing host communication lib. [INFO] DISTRIBUTED(187803,fffec2ffd0f0,python):2025-02-07-15:58:20.626.007 [mindspore/ccsrc/distributed/collective/collective_manager.cc:477] operator()] End finalizing host communication lib. [INFO] DISTRIBUTED(187803,fffec2ffd0f0,python):2025-02-07-15:58:20.626.027 [mindspore/ccsrc/distributed/collective/collective_manager.cc:482] operator()] Start finalizing device communication lib. [INFO] DISTRIBUTED(187803,fffec2ffd0f0,python):2025-02-07-15:58:20.626.048 [mindspore/ccsrc/distributed/collective/collective_manager.cc:486] operator()] End finalizing device communication lib. [INFO] DISTRIBUTED(187803,fffe877fe0f0,python):2025-02-07-15:58:20.626.102 [mindspore/ccsrc/distributed/collective/collective_manager.cc:910] RunInitCommTasks] Initialize communciator thread is stopped. [INFO] DISTRIBUTED(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.246 [mindspore/ccsrc/distributed/collective/collective_manager.cc:507] Finalize] End finalize collective manager. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.266 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.285 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.307 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.324 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.345 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.362 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.504 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.538 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.597 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.618 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.635 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.658 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.675 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.694 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.711 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.729 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.745 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.789 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.626.806 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.627.140 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.627.162 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.627.185 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.627.203 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187803,ffff93d7bc10,python):2025-02-07-15:58:20.627.381 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.634.004 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PRE_ACT(187753,ffff8292dc10,python):2025-02-07-15:58:20.634.083 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.634.109 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.634.132 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:117] CloseTsd] Start to close tsd, ref = 1 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:20.634.155 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel _npu_log begins the destruction process. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.634.387 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PRE_ACT(187742,ffffa187dc10,python):2025-02-07-15:58:20.634.462 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.634.490 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.634.514 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:117] CloseTsd] Start to close tsd, ref = 1 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.634.537 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel _npu_log begins the destruction process. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.634.834 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PRE_ACT(187775,ffffba4dbc10,python):2025-02-07-15:58:20.634.916 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.634.949 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.634.974 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:117] CloseTsd] Start to close tsd, ref = 1 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.635.000 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel _npu_log begins the destruction process. [INFO] DEVICE(187742,fffed69b50f0,python):2025-02-07-15:58:20.685.445 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.686.331 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_histogram_summary channel is being destroyed. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:20.686.362 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_histogram_summary begins the destruction process. [INFO] DEVICE(187775,fffeefa8c0f0,python):2025-02-07-15:58:20.945.125 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.945.803 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_histogram_summary channel is being destroyed. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:20.945.828 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_histogram_summary begins the destruction process. [INFO] DEVICE(187742,fffebd7fa0f0,python):2025-02-07-15:58:21.036.956 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.037.487 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_scalar_summary channel is being destroyed. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.037.512 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_scalar_summary begins the destruction process. [INFO] DEVICE(187742,fffebe7fc0f0,python):2025-02-07-15:58:21.072.954 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.073.409 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_image_summary channel is being destroyed. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.073.431 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_image_summary begins the destruction process. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:21.131.480 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PRE_ACT(187818,ffffbe0b2c10,python):2025-02-07-15:58:21.131.552 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:21.131.599 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:21.131.633 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:117] CloseTsd] Start to close tsd, ref = 1 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:21.131.665 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel _npu_log begins the destruction process. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:21.131.678 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PRE_ACT(187789,ffffaa419c10,python):2025-02-07-15:58:21.131.764 [mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc:1218] DumpDynamicMemPoolStateInfo] The dynamic memory pool total allocated mem:2048M, min addr :0, max addr: 0, peak used mem:0M, actual peak used mem:0M, in used mem:0M, total used by event mem:0M, total idle mem:2047M, total eager free mem:0M. Weight used size:0M, constant value used size:0M, kernel output used size:0M, other used size:0M. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:21.131.797 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_two_pointer_mem_adapter.cc:82] DeInitialize] DeInitialize Ascend Memory Adapter when it is not initialize [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:21.131.826 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:117] CloseTsd] Start to close tsd, ref = 1 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:21.131.857 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel _npu_log begins the destruction process. [INFO] DEVICE(187818,fffef35220f0,python):2025-02-07-15:58:21.361.037 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:21.361.716 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_histogram_summary channel is being destroyed. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:21.361.742 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_histogram_summary begins the destruction process. [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:21.362.163 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:265] DeleteGraphRunner] GraphRunner is not exist [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:21.362.264 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:229] DeleteGeSession] Ge Session is not exist [INFO] GE_ADPT(187803,ffff93d7bc10,python):2025-02-07-15:58:21.362.286 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187753,fffeb7f070f0,python):2025-02-07-15:58:21.425.717 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:21.426.442 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_histogram_summary channel is being destroyed. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:21.426.471 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_histogram_summary begins the destruction process. [INFO] DEVICE(187742,fffed59b30f0,python):2025-02-07-15:58:21.521.150 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187742,fffebf7fe0f0,python):2025-02-07-15:58:21.521.275 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187742,fffed49b10f0,python):2025-02-07-15:58:21.521.337 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.521.772 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_summary channel is being destroyed. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.521.800 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_summary begins the destruction process. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.152 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_dump channel is being destroyed. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.180 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_dump begins the destruction process. [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.647 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:135] CloseTsd] Call aclrtResetDevice, destroy and close tsd successful, ret[0] [INFO] ME(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.681 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_0 [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.761 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.779 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.795 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:25] ClearRuntimeResource] Release device Ascend_0 [INFO] DEVICE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.809 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.831 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.845 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:21.522.869 [mindspore/ccsrc/distributed/collective/collective_manager.cc:499] Finalize] Begin finalize collective manager. [INFO] DISTRIBUTED(187742,fffed51b20f0,python):2025-02-07-15:58:21.522.993 [mindspore/ccsrc/distributed/collective/collective_manager.cc:473] operator()] Start finalizing host communication lib. [INFO] DISTRIBUTED(187742,fffed51b20f0,python):2025-02-07-15:58:21.523.027 [mindspore/ccsrc/distributed/collective/collective_manager.cc:477] operator()] End finalizing host communication lib. [INFO] DISTRIBUTED(187742,fffed51b20f0,python):2025-02-07-15:58:21.523.044 [mindspore/ccsrc/distributed/collective/collective_manager.cc:482] operator()] Start finalizing device communication lib. [INFO] DISTRIBUTED(187742,fffed51b20f0,python):2025-02-07-15:58:21.523.058 [mindspore/ccsrc/distributed/collective/collective_manager.cc:486] operator()] End finalizing device communication lib. [INFO] DISTRIBUTED(187742,fffe9dffb0f0,python):2025-02-07-15:58:21.523.102 [mindspore/ccsrc/distributed/collective/collective_manager.cc:910] RunInitCommTasks] Initialize communciator thread is stopped. [INFO] DISTRIBUTED(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.233 [mindspore/ccsrc/distributed/collective/collective_manager.cc:507] Finalize] End finalize collective manager. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.253 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.267 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.283 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.296 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.323 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.337 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.490 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.519 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.573 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.588 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.601 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.617 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.630 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.645 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.658 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.671 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.683 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.721 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.523.735 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.524.034 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.524.055 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.524.070 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.524.084 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187742,ffffa187dc10,python):2025-02-07-15:58:21.524.236 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. [INFO] DEVICE(187775,fffecf7fe0f0,python):2025-02-07-15:58:21.741.005 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187775,fffeeda880f0,python):2025-02-07-15:58:21.744.927 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187753,fffeb6f050f0,python):2025-02-07-15:58:21.777.030 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187775,fffece7fc0f0,python):2025-02-07-15:58:21.777.106 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187775,fffeeea8a0f0,python):2025-02-07-15:58:21.777.158 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187775,fffeeca860f0,python):2025-02-07-15:58:21.777.194 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.777.697 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_scalar_summary channel is being destroyed. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.777.725 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_scalar_summary begins the destruction process. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.778.065 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_image_summary channel is being destroyed. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.778.086 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_image_summary begins the destruction process. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.778.449 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_summary channel is being destroyed. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.778.470 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_summary begins the destruction process. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.778.798 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_dump channel is being destroyed. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.778.818 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_dump begins the destruction process. [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.199 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:135] CloseTsd] Call aclrtResetDevice, destroy and close tsd successful, ret[0] [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.225 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_0 [INFO] ME(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.244 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_3 [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.288 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.305 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.323 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:25] ClearRuntimeResource] Release device Ascend_3 [INFO] DEVICE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.337 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.361 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.374 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.410 [mindspore/ccsrc/distributed/collective/collective_manager.cc:499] Finalize] Begin finalize collective manager. [INFO] DISTRIBUTED(187775,fffeee2890f0,python):2025-02-07-15:58:21.779.528 [mindspore/ccsrc/distributed/collective/collective_manager.cc:473] operator()] Start finalizing host communication lib. [INFO] DISTRIBUTED(187775,fffeee2890f0,python):2025-02-07-15:58:21.779.567 [mindspore/ccsrc/distributed/collective/collective_manager.cc:477] operator()] End finalizing host communication lib. [INFO] DISTRIBUTED(187775,fffeee2890f0,python):2025-02-07-15:58:21.779.584 [mindspore/ccsrc/distributed/collective/collective_manager.cc:482] operator()] Start finalizing device communication lib. [INFO] DISTRIBUTED(187775,fffeee2890f0,python):2025-02-07-15:58:21.779.601 [mindspore/ccsrc/distributed/collective/collective_manager.cc:486] operator()] End finalizing device communication lib. [INFO] DISTRIBUTED(187775,fffeaeffd0f0,python):2025-02-07-15:58:21.779.647 [mindspore/ccsrc/distributed/collective/collective_manager.cc:910] RunInitCommTasks] Initialize communciator thread is stopped. [INFO] DISTRIBUTED(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.767 [mindspore/ccsrc/distributed/collective/collective_manager.cc:507] Finalize] End finalize collective manager. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.785 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.799 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.816 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.829 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.845 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.779.858 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.007 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.035 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.090 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.106 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.118 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.137 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.149 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.164 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.176 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.190 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.211 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.243 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.257 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.577 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.595 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.612 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.627 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187775,ffffba4dbc10,python):2025-02-07-15:58:21.780.858 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. [INFO] DEVICE(187753,fffe977fe0f0,python):2025-02-07-15:58:21.804.971 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187789,fffedfa8c0f0,python):2025-02-07-15:58:21.841.040 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:21.841.724 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_histogram_summary channel is being destroyed. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:21.841.752 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_histogram_summary begins the destruction process. [INFO] DEVICE(187818,fffef151e0f0,python):2025-02-07-15:58:22.193.152 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187818,fffed2ffd0f0,python):2025-02-07-15:58:22.193.192 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187818,fffed3fff0f0,python):2025-02-07-15:58:22.193.233 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187818,fffed1ffb0f0,python):2025-02-07-15:58:22.193.246 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187818,fffef25200f0,python):2025-02-07-15:58:22.193.318 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.193.879 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_scalar_summary channel is being destroyed. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.193.907 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_scalar_summary begins the destruction process. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.194.293 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_image_summary channel is being destroyed. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.194.317 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_image_summary begins the destruction process. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.194.696 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_summary channel is being destroyed. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.194.721 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_summary begins the destruction process. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.040 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_dump channel is being destroyed. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.065 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_dump begins the destruction process. [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.442 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:135] CloseTsd] Call aclrtResetDevice, destroy and close tsd successful, ret[0] [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.472 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_0 [INFO] ME(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.495 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_6 [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.535 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.554 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.574 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:25] ClearRuntimeResource] Release device Ascend_6 [INFO] DEVICE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.592 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.616 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.634 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.195.662 [mindspore/ccsrc/distributed/collective/collective_manager.cc:499] Finalize] Begin finalize collective manager. [INFO] DISTRIBUTED(187818,fffef1d1f0f0,python):2025-02-07-15:58:22.195.775 [mindspore/ccsrc/distributed/collective/collective_manager.cc:473] operator()] Start finalizing host communication lib. [INFO] DISTRIBUTED(187818,fffef1d1f0f0,python):2025-02-07-15:58:22.195.809 [mindspore/ccsrc/distributed/collective/collective_manager.cc:477] operator()] End finalizing host communication lib. [INFO] DISTRIBUTED(187818,fffef1d1f0f0,python):2025-02-07-15:58:22.195.828 [mindspore/ccsrc/distributed/collective/collective_manager.cc:482] operator()] Start finalizing device communication lib. [INFO] DISTRIBUTED(187818,fffef1d1f0f0,python):2025-02-07-15:58:22.195.848 [mindspore/ccsrc/distributed/collective/collective_manager.cc:486] operator()] End finalizing device communication lib. [INFO] DISTRIBUTED(187818,fffeba7fc0f0,python):2025-02-07-15:58:22.195.899 [mindspore/ccsrc/distributed/collective/collective_manager.cc:910] RunInitCommTasks] Initialize communciator thread is stopped. [INFO] DISTRIBUTED(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.019 [mindspore/ccsrc/distributed/collective/collective_manager.cc:507] Finalize] End finalize collective manager. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.041 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.058 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.080 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.097 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.118 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.135 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.274 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.307 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.355 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.374 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.391 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.413 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.430 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.448 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.464 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.481 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.498 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.535 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.552 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.822 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.845 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.867 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.885 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.196.995 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:22.242.846 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:265] DeleteGraphRunner] GraphRunner is not exist [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:22.242.934 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:229] DeleteGeSession] Ge Session is not exist [INFO] GE_ADPT(187742,ffffa187dc10,python):2025-02-07-15:58:22.242.954 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187753,fffeb5f030f0,python):2025-02-07-15:58:22.257.029 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187753,fffeb4f010f0,python):2025-02-07-15:58:22.257.069 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187753,fffe967fc0f0,python):2025-02-07-15:58:22.257.107 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.257.676 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_scalar_summary channel is being destroyed. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.257.708 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_scalar_summary begins the destruction process. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.258.082 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_image_summary channel is being destroyed. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.258.104 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_image_summary begins the destruction process. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.258.533 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_summary channel is being destroyed. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.258.555 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_summary begins the destruction process. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.258.911 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_dump channel is being destroyed. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.258.935 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_dump begins the destruction process. [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.336 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:135] CloseTsd] Call aclrtResetDevice, destroy and close tsd successful, ret[0] [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.365 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_0 [INFO] ME(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.385 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_1 [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.431 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.451 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.467 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:25] ClearRuntimeResource] Release device Ascend_1 [INFO] DEVICE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.481 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.505 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.520 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.543 [mindspore/ccsrc/distributed/collective/collective_manager.cc:499] Finalize] Begin finalize collective manager. [INFO] DISTRIBUTED(187753,fffeb67040f0,python):2025-02-07-15:58:22.259.665 [mindspore/ccsrc/distributed/collective/collective_manager.cc:473] operator()] Start finalizing host communication lib. [INFO] DISTRIBUTED(187753,fffeb67040f0,python):2025-02-07-15:58:22.259.696 [mindspore/ccsrc/distributed/collective/collective_manager.cc:477] operator()] End finalizing host communication lib. [INFO] DISTRIBUTED(187753,fffeb67040f0,python):2025-02-07-15:58:22.259.711 [mindspore/ccsrc/distributed/collective/collective_manager.cc:482] operator()] Start finalizing device communication lib. [INFO] DISTRIBUTED(187753,fffeb67040f0,python):2025-02-07-15:58:22.259.725 [mindspore/ccsrc/distributed/collective/collective_manager.cc:486] operator()] End finalizing device communication lib. [INFO] DISTRIBUTED(187753,fffe76ffd0f0,python):2025-02-07-15:58:22.259.763 [mindspore/ccsrc/distributed/collective/collective_manager.cc:910] RunInitCommTasks] Initialize communciator thread is stopped. [INFO] DISTRIBUTED(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.891 [mindspore/ccsrc/distributed/collective/collective_manager.cc:507] Finalize] End finalize collective manager. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.908 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.922 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.938 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.962 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.976 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.259.990 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.133 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.164 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.216 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.231 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.244 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.261 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.274 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.288 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.301 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.316 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.328 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.362 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.375 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.759 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.782 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.797 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.812 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187753,ffff8292dc10,python):2025-02-07-15:58:22.260.970 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:22.518.239 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:265] DeleteGraphRunner] GraphRunner is not exist [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:22.518.354 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:229] DeleteGeSession] Ge Session is not exist [INFO] GE_ADPT(187775,ffffba4dbc10,python):2025-02-07-15:58:22.518.374 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] DEVICE(187789,fffebf7fe0f0,python):2025-02-07-15:58:22.669.035 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187789,fffebe7fc0f0,python):2025-02-07-15:58:22.669.084 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187789,fffedda880f0,python):2025-02-07-15:58:22.669.145 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.669.606 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_scalar_summary channel is being destroyed. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.669.647 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_scalar_summary begins the destruction process. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.670.010 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_image_summary channel is being destroyed. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.670.033 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_image_summary begins the destruction process. [INFO] DEVICE(187789,fffedea8a0f0,python):2025-02-07-15:58:22.672.938 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187789,fffedca860f0,python):2025-02-07-15:58:22.672.966 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:151] ~AclDatasetInfo] AcltdtDestroyDataset succeeded. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.673.481 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_summary channel is being destroyed. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.673.515 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_summary begins the destruction process. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.673.832 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.h:251] DestoryHandler] The thread of ms_tensor_dump channel is being destroyed. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.673.856 [mindspore/ccsrc/plugin/device/ascend/hal/device/mbuf_receive_manager.cc:325] ~MbufDataHandler] Channel ms_tensor_dump begins the destruction process. [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.289 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc:135] CloseTsd] Call aclrtResetDevice, destroy and close tsd successful, ret[0] [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.319 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_0 [INFO] ME(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.340 [mindspore/ccsrc/runtime/hardware/device_context_manager.cc:401] ClearDeviceContexts] Release device CPU_4 [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.415 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.434 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.454 [mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc:25] ClearRuntimeResource] Release device Ascend_4 [INFO] DEVICE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.471 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc:249] ReleaseDeviceRes] Ascend finalize start [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.497 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.514 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.546 [mindspore/ccsrc/distributed/collective/collective_manager.cc:499] Finalize] Begin finalize collective manager. [INFO] DISTRIBUTED(187789,fffede2890f0,python):2025-02-07-15:58:22.674.669 [mindspore/ccsrc/distributed/collective/collective_manager.cc:473] operator()] Start finalizing host communication lib. [INFO] DISTRIBUTED(187789,fffede2890f0,python):2025-02-07-15:58:22.674.705 [mindspore/ccsrc/distributed/collective/collective_manager.cc:477] operator()] End finalizing host communication lib. [INFO] DISTRIBUTED(187789,fffede2890f0,python):2025-02-07-15:58:22.674.734 [mindspore/ccsrc/distributed/collective/collective_manager.cc:482] operator()] Start finalizing device communication lib. [INFO] DISTRIBUTED(187789,fffede2890f0,python):2025-02-07-15:58:22.674.754 [mindspore/ccsrc/distributed/collective/collective_manager.cc:486] operator()] End finalizing device communication lib. [INFO] DISTRIBUTED(187789,fffe9effd0f0,python):2025-02-07-15:58:22.674.809 [mindspore/ccsrc/distributed/collective/collective_manager.cc:910] RunInitCommTasks] Initialize communciator thread is stopped. [INFO] DISTRIBUTED(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.945 [mindspore/ccsrc/distributed/collective/collective_manager.cc:507] Finalize] End finalize collective manager. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.965 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.674.982 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.003 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.020 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.041 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.057 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.200 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.235 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.338 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.358 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.375 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.397 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.414 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.433 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.449 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.467 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.482 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.502 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.518 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.812 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.834 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.856 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.675.874 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187789,ffffaa419c10,python):2025-02-07-15:58:22.676.018 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.946.417 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:265] DeleteGraphRunner] GraphRunner is not exist [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.946.480 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:229] DeleteGeSession] Ge Session is not exist [INFO] GE_ADPT(187818,ffffbe0b2c10,python):2025-02-07-15:58:22.946.504 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:23.002.089 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:265] DeleteGraphRunner] GraphRunner is not exist [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:23.002.196 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:229] DeleteGeSession] Ge Session is not exist [INFO] GE_ADPT(187753,ffff8292dc10,python):2025-02-07-15:58:23.002.216 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:23.396.534 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:265] DeleteGraphRunner] GraphRunner is not exist [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:23.396.610 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:229] DeleteGeSession] Ge Session is not exist [INFO] GE_ADPT(187789,ffffaa419c10,python):2025-02-07-15:58:23.396.662 [mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc:183] ClearGraph] Remove all graphs in GraphManager [INFO] ME(187602:281473361210384,MainProcess):2025-02-07-15:58:27.826.43 [mindspore/parallel/cluster/process_entity/_api.py:322] All workers successfully exit! [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.084.173 [mindspore/ccsrc/pipeline/jit/ps/init.cc:604] operator()] Start register... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.084.251 [mindspore/ccsrc/pipeline/jit/ps/init.cc:607] operator()] Start mindspore.profiler... [INFO] ME(187602:281473361210384,MainProcess):2025-02-07-15:58:27.843.73 [mindspore/profiler/envprofiler.py:56] analyse start [INFO] ME(187602:281473361210384,MainProcess):2025-02-07-15:58:27.845.02 [mindspore/profiler/envprofiler.py:58] Profiler is not initialized, skip analyse. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.084.583 [mindspore/ccsrc/pipeline/jit/ps/init.cc:614] operator()] Start EmbeddingCacheScheduler... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.084.712 [mindspore/ccsrc/pipeline/jit/ps/init.cc:621] operator()] Start releasing dataset handles... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.084.803 [mindspore/ccsrc/pipeline/jit/ps/init.cc:624] operator()] End release dataset handles. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.084.825 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2797] ClearResAtexit] Pipeline clear all resource [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.085.006 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:303] RecordExitStatus] Status record: system exit. [INFO] DEBUG(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.085.055 [mindspore/ccsrc/common/debug/env_config_parser.cc:152] ParseFromFile] The 'env_config_path' in 'mindspore.context.set_context(env_config_path={path})' is empty. [INFO] ME(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.085.099 [mindspore/core/mindrt/src/actor/actormgr.cc:165] Finalize] mindrt Actors finish exiting. [INFO] ME(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.085.112 [mindspore/core/mindrt/src/actor/actormgr.cc:168] Finalize] mindrt Threads finish exiting. [INFO] ME(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.085.126 [mindspore/core/mindrt/src/actor/actormgr.cc:179] Finalize] mindrt IOMGRS finish exiting. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.085.279 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2680] ClearResPart1] Start Finalize StreamSynchronizer... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.085.299 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2682] ClearResPart1] End Finalize StreamSynchronizer... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.086.905 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1024] ClearInfo] Clean graph resource! [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.086.938 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:1019] ClearRes] Clean executor resource! [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.086.970 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2699] ClearResPart2] Start clear PyNativeExecutor... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.142 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2701] ClearResPart2] End clear PyNativeExecutor. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.158 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2703] ClearResPart2] Start clear ConfigManager... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.170 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2705] ClearResPart2] End clear ConfigManager. [INFO] COMMON(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.333 [mindspore/ccsrc/common/thread_pool.cc:41] ThreadPool] Set max_thread_num_ to 4 [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.357 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2709] ClearResPart2] Start clear device context... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.375 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2711] ClearResPart2] End clear device context. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.386 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2713] ClearResPart2] Start clear kernel runtime... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.405 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2715] ClearResPart2] End clear kernel runtime. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.416 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2717] ClearResPart2] Start clear CollectiveManager... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.451 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2720] ClearResPart2] End clear CollectiveManager. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.463 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2722] ClearResPart2] Start clear AnalysisResultCacheMgr... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.478 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2724] ClearResPart2] End clear AnalysisResultCacheMgr. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.501 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2726] ClearResPart2] Start clear AnalysisContext... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.516 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2728] ClearResPart2] End clear AnalysisContext... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.527 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2730] ClearResPart2] Start clear AnalysisSchedule... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.836 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2732] ClearResPart2] End clear AnalysisSchedule... [INFO] DEBUG(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.869 [mindspore/ccsrc/debug/debugger/debugger.cc:137] Reset] Release Debugger resource. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.920 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2745] ClearResPart3] Start clear ClearObjectCache... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.933 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2747] ClearResPart3] End clear ClearObjectCache... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.944 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2749] ClearResPart3] Start clear Parser... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.961 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2751] ClearResPart3] End clear Parser... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.087.972 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2753] ClearResPart3] Start ClearTraceStack... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.036 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2755] ClearResPart3] End ClearTraceStack... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.048 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2757] ClearResPart3] Start clear InterpretNodeRecorder... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.060 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2759] ClearResPart3] End clear InterpretNodeRecorder... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.071 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2761] ClearResPart3] Start clear parallel::entire_costgraph... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.099 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2763] ClearResPart3] End clear parallel::entire_costgraph... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.110 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2765] ClearResPart3] Start clear ProtobufLibrary... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.444 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2767] ClearResPart3] End clear ProtobufLibrary... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.462 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2769] ClearResPart3] Start clear python_adapter... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.476 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2771] ClearResPart3] End clear python_adapter. [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.488 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2775] ClearSingleton] Start clear singleton... [INFO] PIPELINE(187602,ffff9fb56c10,python3.7):2025-02-07-15:58:27.088.720 [mindspore/ccsrc/pipeline/jit/ps/pipeline.cc:2793] ClearSingleton] End clear singleton. netstat -tunlp cmd is: msrun --worker_num=8 --local_worker_num=8 --master_port=10001 --join=True --log_dir=log_output python parallel_complex_input.py . ============================== 1 passed in 44.26s ============================== ff8c39f2e51611efac92c4447d93fe45/pass/test_remove_redundancy_test_get_strategy_redundancy.log0000644000175400017540000147734114751343157031743 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_remove_redundancy.py [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:41.633.127 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:41.759.199 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:41.895.310 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:42.373.33 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:42.186.271 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:worker_4.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:42.339.615 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:worker_5.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:42.493.103 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:worker_6.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:42.655.700 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:worker_7.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(25955:281472830163984,MainProcess):2025-02-07-15:47:42.808.944 [mindspore/parallel/cluster/process_entity/_api.py:223] Distributed job is spawned. Waiting all processes to exit... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] DISTRIBUTED(26020,ffff3a51f0f0,python3.7):2025-02-07-15:47:47.883.880 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49124 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:47.883.880 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49124, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:47.884.044 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49126, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26020,ffff3b5210f0,python3.7):2025-02-07-15:47:47.884.075 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49126 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:47.884.085 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:47.900.844 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49128, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26040,ffff389940f0,python3.7):2025-02-07-15:47:47.900.844 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49128 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:47.900.911 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:48.079.320 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49130, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26029,ffff37fff0f0,python3.7):2025-02-07-15:47:48.079.331 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49130 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:48.079.430 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:48.346.560 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49136, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26068,ffff2d1b30f0,python3.7):2025-02-07-15:47:48.346.562 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49136 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:48.346.629 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:48.384.746 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:48.401.127 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49138, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26040,ffff399960f0,python3.7):2025-02-07-15:47:48.401.154 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49138 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:48.401.163 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(26051,fffeff24a0f0,python3.7):2025-02-07-15:47:48.428.630 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49142 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:47:48.428.621 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49142, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:47:48.428.807 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49144, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26051,ffff0024c0f0,python3.7):2025-02-07-15:47:48.428.835 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49144 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:47:48.428.856 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:48.579.600 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49146, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:48.579.630 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(26029,ffff3d2d10f0,python3.7):2025-02-07-15:47:48.579.627 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49146 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:48.655.615 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49148, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26086,ffff219140f0,python3.7):2025-02-07-15:47:48.655.631 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49148 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:48.655.717 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(26100,ffff1b32c0f0,python3.7):2025-02-07-15:47:48.783.160 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49152 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:48.783.164 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49152, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:48.783.333 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49154, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26100,ffff1c32e0f0,python3.7):2025-02-07-15:47:48.783.359 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49154 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:48.783.373 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:48.846.856 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49156, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26068,ffff2e1b50f0,python3.7):2025-02-07-15:47:48.846.879 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49156 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:48.846.887 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(26112,ffff01cf40f0,python3.7):2025-02-07-15:47:48.863.711 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49158 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:47:48.863.710 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49158, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:47:48.863.865 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49160, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26112,ffff02cf60f0,python3.7):2025-02-07-15:47:48.863.891 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49160 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:47:48.863.905 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:48.884.849 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:48.901.570 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:47:48.929.238 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:49.080.009 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:49.155.876 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49162, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:49.155.904 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(26086,ffff229160f0,python3.7):2025-02-07-15:47:49.155.906 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49162 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:49.283.721 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:49.347.330 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:47:49.364.233 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:49.384.957 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:49.401.663 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:47:49.429.331 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:49.580.095 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:49.656.339 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:49.783.811 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:49.847.435 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:47:49.864.321 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:49.885.053 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:49.901.747 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:47:49.929.413 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:50.080.176 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:50.156.453 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:50.156.485 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 5 rank id: 5 [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:50.283.924 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:50.283.959 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 6 rank id: 6 [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:50.347.546 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:50.347.576 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 4 rank id: 4 [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:47:50.364.420 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:47:50.364.449 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 7 rank id: 7 [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:50.385.165 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:47:50.385.206 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 0 rank id: 0 [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:50.401.840 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:50.401.864 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 2 rank id: 2 [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:47:50.429.524 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:47:50.429.561 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 3 rank id: 3 [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:50.580.283 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:47:50.580.314 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 1 rank id: 1 [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:57.005.097 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(26086,ffffa208fc10,python3.7):2025-02-07-15:47:57.006.730 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:47:57.010.660 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DEVICE(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:47:57.512.185 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 2 [WARNING] DEVICE(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:47:58.014.128 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 2 [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:58.430.887 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(26100,ffff9bab1c10,python3.7):2025-02-07-15:47:58.432.547 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(26100,fffe8f7fe0f0,python3.7):2025-02-07-15:47:58.437.526 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 distribute network. [WARNING] DEVICE(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:47:58.515.732 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 396/400, sleep 2 collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DEVICE(26100,fffe8f7fe0f0,python3.7):2025-02-07-15:47:58.939.240 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:47:59.017.281 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 395/400, sleep 2 [WARNING] DEVICE(26100,fffe8f7fe0f0,python3.7):2025-02-07-15:47:59.440.933 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 1 [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:59.481.306 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(26068,ffffad937c10,python3.7):2025-02-07-15:47:59.483.252 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(26068,fffeb17fa0f0,python3.7):2025-02-07-15:47:59.489.582 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 [WARNING] DEVICE(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:47:59.521.011 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 394/400, sleep 2 distribute network. collected 1 item remove_redundancy.py [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:59.719.745 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(26040,ffffb911fc10,python3.7):2025-02-07-15:47:59.721.438 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group distribute network shard. [WARNING] DEVICE(26040,fffecd7fa0f0,python3.7):2025-02-07-15:47:59.727.345 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 distribute network. distribute network create dataset. collected 1 item distribute network train. remove_redundancy.py distribute network shard. [WARNING] DEVICE(26100,fffe8f7fe0f0,python3.7):2025-02-07-15:47:59.942.606 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 396/400, sleep 1 distribute network create dataset. [WARNING] DEVICE(26068,fffeb17fa0f0,python3.7):2025-02-07-15:47:59.991.352 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 2 [WARNING] DEVICE(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:48:00.022.804 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 393/400, sleep 1 distribute network train. [WARNING] DEVICE(26040,fffecd7fa0f0,python3.7):2025-02-07-15:48:00.228.870 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 2 [WARNING] DEVICE(26100,fffe8f7fe0f0,python3.7):2025-02-07-15:48:00.444.403 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 395/400, sleep 2 [WARNING] DEVICE(26068,fffeb17fa0f0,python3.7):2025-02-07-15:48:00.493.136 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 2 [WARNING] DEVICE(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:48:00.523.752 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 392/400, sleep 1 [WARNING] DEVICE(26040,fffecd7fa0f0,python3.7):2025-02-07-15:48:00.730.497 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 2 [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:48:00.773.920 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(26020,ffffbacacc10,python3.7):2025-02-07-15:48:00.776.192 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(26020,fffeb6ffd0f0,python3.7):2025-02-07-15:48:00.804.031 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26020,fffeb4ff90f0,python3.7):2025-02-07-15:48:00.806.809 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network. [WARNING] DISTRIBUTED(26100,fffe8f7fe0f0,python3.7):2025-02-07-15:48:00.949.107 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26100,fffe4f7fe0f0,python3.7):2025-02-07-15:48:00.954.507 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 collected 1 item [WARNING] DISTRIBUTED(26068,fffeb17fa0f0,python3.7):2025-02-07-15:48:00.996.125 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26068,fffdde7fc0f0,python3.7):2025-02-07-15:48:01.000.456 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 remove_redundancy.py [WARNING] DISTRIBUTED(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:48:01.026.384 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26086,fffd60ff90f0,python3.7):2025-02-07-15:48:01.028.326 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(26040,fffecd7fa0f0,python3.7):2025-02-07-15:48:01.233.459 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26040,fffdea7fc0f0,python3.7):2025-02-07-15:48:01.239.017 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] PARALLEL(26086,ffffa208fc10,python3.7):2025-02-07-15:48:01.554.465 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:48:01.567.150 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(26051,ffff7f9d5c10,python3.7):2025-02-07-15:48:01.568.864 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(26051,fffe737fe0f0,python3.7):2025-02-07-15:48:01.573.228 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26051,fffe72ffd0f0,python3.7):2025-02-07-15:48:01.574.939 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:48:02.290.924 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(26112,ffff8247fc10,python3.7):2025-02-07-15:48:02.291.160 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(26112,fffe7dffb0f0,python3.7):2025-02-07-15:48:02.292.004 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26112,fffe7d7fa0f0,python3.7):2025-02-07-15:48:02.292.376 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:48:02.344.688 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(26029,ffffbca4cc10,python3.7):2025-02-07-15:48:02.344.950 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(26029,fffec0ff90f0,python3.7):2025-02-07-15:48:02.345.663 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26029,fffe9ffff0f0,python3.7):2025-02-07-15:48:02.345.981 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] PARALLEL(26086,ffffa208fc10,python3.7):2025-02-07-15:48:02.347.989 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DEVICE(26020,fffeb4ff90f0,python3.7):2025-02-07-15:48:02.507.616 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(26020,fffeb6ffd0f0,python3.7):2025-02-07-15:48:02.507.925 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26040,fffdea7fc0f0,python3.7):2025-02-07-15:48:02.573.731 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(26040,fffecd7fa0f0,python3.7):2025-02-07-15:48:02.574.042 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26068,fffdde7fc0f0,python3.7):2025-02-07-15:48:02.635.496 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(26068,fffeb17fa0f0,python3.7):2025-02-07-15:48:02.635.825 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26051,fffe72ffd0f0,python3.7):2025-02-07-15:48:02.645.098 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(26051,fffe737fe0f0,python3.7):2025-02-07-15:48:02.645.425 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26100,fffe4f7fe0f0,python3.7):2025-02-07-15:48:02.654.275 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(26100,fffe8f7fe0f0,python3.7):2025-02-07-15:48:02.655.880 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PARALLEL(26100,ffff9bab1c10,python3.7):2025-02-07-15:48:02.663.029 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DEVICE(26112,fffe7d7fa0f0,python3.7):2025-02-07-15:48:02.675.598 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(26112,fffe7dffb0f0,python3.7):2025-02-07-15:48:02.675.943 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26029,fffe9ffff0f0,python3.7):2025-02-07-15:48:02.732.090 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(26029,fffec0ff90f0,python3.7):2025-02-07-15:48:02.733.542 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(26086,fffd60ff90f0,python3.7):2025-02-07-15:48:02.758.068 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(26086,fffeb5ffb0f0,python3.7):2025-02-07-15:48:02.758.697 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PARALLEL(26040,ffffb911fc10,python3.7):2025-02-07-15:48:03.233.431 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(26068,ffffad937c10,python3.7):2025-02-07-15:48:03.247.622 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(26100,ffff9bab1c10,python3.7):2025-02-07-15:48:03.248.067 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(26020,ffffbacacc10,python3.7):2025-02-07-15:48:03.387.019 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(26051,ffff7f9d5c10,python3.7):2025-02-07-15:48:03.418.861 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(26112,ffff8247fc10,python3.7):2025-02-07-15:48:03.564.484 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(26029,ffffbca4cc10,python3.7):2025-02-07-15:48:03.616.192 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(26040,ffffb911fc10,python3.7):2025-02-07-15:48:03.837.738 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(26068,ffffad937c10,python3.7):2025-02-07-15:48:03.904.980 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(26020,ffffbacacc10,python3.7):2025-02-07-15:48:03.995.176 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(26051,ffff7f9d5c10,python3.7):2025-02-07-15:48:04.020.505 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(26112,ffff8247fc10,python3.7):2025-02-07-15:48:04.162.711 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(26029,ffffbca4cc10,python3.7):2025-02-07-15:48:04.217.448 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 15.298, [21] [bootstrap]: 0.0349907 [type_inference]: 1.2489 [auto_monad]: 0.00390121 [graph_reusing]: 2.74703e-05 [inline]: 0.0541728, [2] [rewriter_before_opt_a]: 0.0018221 [a1a2]: 0.0519723, [2] [Cycle 1]: 0.0312787, [11] [expand_dump_flag]: 4.96102e-05 [switch_simplify]: 0.001446 [loop_unroll]: 0.00066723 [a_1]: 0.0236783 [recompute_prepare]: 0.0001632 [updatestate_depend_eliminate]: 0.00038612 [updatestate_assign_eliminate]: 0.00025196 [updatestate_loads_eliminate]: 0.00022306 [parameter_eliminate]: 7.70995e-06 [a_2]: 0.00343838 [parallel_inline_pass]: 0.00010462 [Cycle 2]: 0.00558399, [11] [expand_dump_flag]: 2.94996e-06 [switch_simplify]: 9.502e-05 [loop_unroll]: 9.37702e-05 [a_1]: 0.00317225 [recompute_prepare]: 0.0001043 [updatestate_depend_eliminate]: 0.00020446 [updatestate_assign_eliminate]: 6.541e-05 [updatestate_loads_eliminate]: 6.52303e-05 [parameter_eliminate]: 5.41005e-06 [a_2]: 0.00157368 [parallel_inline_pass]: 0.0001035 [parallel-infer-symbol]: 0.00091413 [pre_auto_parallel]: 0.00021853 [insert-virtual-dataset]: 0.00409238 [parallel-infer-symbol-second]: 3.17022e-06 [dataset_repeat_opt]: 0.00022782 [pipeline_split]: 0.0003931 [optimize]: 0.794221, [52] [py_interpret_to_execute]: 0.00014898 [rewriter_before_opt_a]: 0.00028291 [opt_a]: 0.778416, [3] [Cycle 1]: 0.690981, [46] [expand_dump_flag]: 2.75997e-06 [switch_simplify]: 0.00011223 [loop_unroll]: 9.91398e-05 [a_1]: 0.00334727 [recompute_prepare]: 0.00010596 [updatestate_depend_eliminate]: 0.00010765 [updatestate_assign_eliminate]: 6.43097e-05 [updatestate_loads_eliminate]: 6.83698e-05 [parameter_eliminate]: 4.61005e-06 [a_2]: 0.00161745 [accelerated_algorithm]: 0.00065059 [shard]: 2.88989e-06 [meta_shard_fg_expand]: 5.05298e-05 [shard_inline]: 0.00011098 [auto_parallel]: 8.51299e-05 [parallel]: 0.0342282 [flash_sp]: 0.00061523 [merge_comm]: 0.00013612 [allreduce_fusion]: 7.52104e-05 [matmul_add_comm_reduction]: 9.96101e-05 [allreduce_slice_to_reducescatter]: 5.40167e-07 [virtual_shard_identity]: 0.00013238 [virtual_dataset]: 0.00017438 [get_grad_eliminate_]: 0.00014628 [virtual_output]: 0.00011441 [merge_forward]: 8.11997e-05 [cell_reuse_recompute_pass]: 4.13973e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021614 [before_grad]: 0.00020529 [inplace_validation]: 0.0001307 [parallel_renormalize]: 0.0307136 [update_top_fg]: 1.22981e-06 [cast_eliminate]: 0.00015316 [meta_fg_expand]: 0.372376 [inplace_validation_after_expand]: 0.00161339 [flash_sp_send_recv_attached]: 0.00117772 [receive_attached]: 8.685e-05 [after_resolve]: 0.00199323 [a_after_grad]: 0.00390711 [special_op_eliminate]: 0.00184501 [renormalize]: 0.202917 [add_forward_monad_depend]: 0.00037427 [auto_monad_grad]: 0.00022699 [auto_monad_eliminator]: 0.0018445 [cse]: 0.00439609 [a_3]: 0.0238205 [Cycle 2]: 0.0751369, [46] [expand_dump_flag]: 5.88601e-05 [switch_simplify]: 0.0018396 [loop_unroll]: 0.0014979 [a_1]: 0.0335032 [recompute_prepare]: 0.00020172 [updatestate_depend_eliminate]: 0.00025427 [updatestate_assign_eliminate]: 0.00010452 [updatestate_loads_eliminate]: 0.00019136 [parameter_eliminate]: 7.34022e-06 [a_2]: 0.00444547 [accelerated_algorithm]: 0.00016965 [shard]: 4.71994e-06 [meta_shard_fg_expand]: 0.00010226 [shard_inline]: 0.00014213 [auto_parallel]: 0.00012445 [parallel]: 1.79601e-05 [flash_sp]: 0.00013229 [merge_comm]: 0.00012714 [allreduce_fusion]: 9.52901e-05 [matmul_add_comm_reduction]: 0.00012048 [allreduce_slice_to_reducescatter]: 7.39936e-07 [virtual_shard_identity]: 0.00014612 [virtual_dataset]: 0.00014089 [get_grad_eliminate_]: 0.00013349 [virtual_output]: 0.00014195 [merge_forward]: 9.31597e-05 [cell_reuse_recompute_pass]: 4.57e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025977 [before_grad]: 0.00024215 [inplace_validation]: 8.62703e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 9.60194e-07 [cast_eliminate]: 0.00016534 [meta_fg_expand]: 0.00029784 [inplace_validation_after_expand]: 0.00017992 [flash_sp_send_recv_attached]: 3.41004e-06 [receive_attached]: 2.08989e-06 [after_resolve]: 0.00016476 [a_after_grad]: 0.00023248 [special_op_eliminate]: 0.00013822 [renormalize]: 0.0201979 [add_forward_monad_depend]: 1.036e-05 [auto_monad_grad]: 3.58978e-06 [auto_monad_eliminator]: 0.00030067 [cse]: 0.00746447 [a_3]: 0.00097809 [Cycle 3]: 0.012263, [46] [expand_dump_flag]: 4.51971e-06 [switch_simplify]: 0.00014034 [loop_unroll]: 0.00013168 [a_1]: 0.00438144 [recompute_prepare]: 0.00014232 [updatestate_depend_eliminate]: 0.00016195 [updatestate_assign_eliminate]: 9.68296e-05 [updatestate_loads_eliminate]: 9.68901e-05 [parameter_eliminate]: 6.57002e-06 [a_2]: 0.00209486 [accelerated_algorithm]: 0.00016336 [shard]: 3.15998e-06 [meta_shard_fg_expand]: 6.217e-05 [shard_inline]: 0.00013744 [auto_parallel]: 0.00012526 [parallel]: 1.65799e-05 [flash_sp]: 3.49991e-06 [merge_comm]: 0.00012569 [allreduce_fusion]: 9.90597e-05 [matmul_add_comm_reduction]: 0.00012609 [allreduce_slice_to_reducescatter]: 5.09899e-07 [virtual_shard_identity]: 0.00014315 [virtual_dataset]: 0.00013481 [get_grad_eliminate_]: 0.00012954 [virtual_output]: 0.0001315 [merge_forward]: 9.595e-05 [cell_reuse_recompute_pass]: 6.79027e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025504 [before_grad]: 0.00023361 [inplace_validation]: 9.12398e-05 [parallel_renormalize]: 8.98726e-08 [update_top_fg]: 1.09989e-06 [cast_eliminate]: 0.00015696 [meta_fg_expand]: 0.00010894 [inplace_validation_after_expand]: 0.00013091 [flash_sp_send_recv_attached]: 3.81982e-06 [receive_attached]: 2.35019e-06 [after_resolve]: 0.00015524 [a_after_grad]: 0.00022636 [special_op_eliminate]: 0.00013265 [renormalize]: 9.96515e-08 [add_forward_monad_depend]: 4.97978e-06 [auto_monad_grad]: 3.9502e-06 [auto_monad_eliminator]: 0.00019804 [cse]: 0.00044119 [a_3]: 0.00095855 [py_interpret_to_execute_after_opt_a]: 0.00015649 [slice_cell_reuse_recomputed_activation]: 3.03006e-06 [rewriter_after_opt_a]: 0.00113564 [convert_after_rewriter]: 0.00011718 [order_py_execute_after_rewriter]: 8.605e-05 [opt_b]: 0.00399483, [1] [Cycle 1]: 0.00398265, [7] [b_1]: 0.00306037 [b_2]: 0.00014218 [updatestate_depend_eliminate]: 0.00010344 [updatestate_assign_eliminate]: 8.82703e-05 [updatestate_loads_eliminate]: 9.29302e-05 [renormalize]: 8.50298e-07 [cse]: 0.00042418 [optimize_parallel_all_gather_comm]: 0.00014329 [overlap_param_gather]: 1.60001e-06 [cconv]: 8.92999e-05 [loop_unroll]: 0.00103248 [opt_after_cconv]: 0.00165546, [1] [Cycle 1]: 0.00164423, [7] [c_1]: 0.00079671 [parameter_eliminate]: 4.77023e-06 [updatestate_depend_eliminate]: 0.00017035 [updatestate_assign_eliminate]: 9.32203e-05 [updatestate_loads_eliminate]: 9.35299e-05 [cse]: 0.00041435 [renormalize]: 1.05985e-06 [remove_dup_value]: 0.00064942 [tuple_transform]: 0.00096255, [1] [Cycle 1]: 0.00095287, [2] [d_1]: 0.00093082 [renormalize]: 8.2003e-07 [partial_unused_args_eliminate]: 5.03985e-06 [add_cache_embedding]: 0.00016565 [add_recomputation]: 0.00078078 [cse_after_recomputation]: 0.00032486, [1] [Cycle 1]: 0.00031571, [1] [cse]: 0.00029948 [environ_conv]: 0.00011002 [swap_dp_allreduce_reducescatter]: 0.00014555 [bias_add_comm_swap]: 4.33018e-06 [label_micro_interleaved_index]: 2.48989e-06 [label_fine_grained_interleaved_index]: 0.00053733 [merge_cast_opt]: 1.60001e-06 [slice_recompute_activation]: 0.00015267 [micro_interleaved_order_control]: 2.60025e-06 [assign_add_opt]: 0.00041413 [ForceFp32Comm]: 1.34995e-06 [remove_cast_before_assign_add]: 0.00010836 [full_micro_interleaved_order_control]: 2.63005e-06 [reorder_send_recv_between_fp_bp]: 2.01026e-06 [comm_op_add_attrs]: 0.00016059 [add_comm_op_reuse_tag]: 0.00016356 [interleave_split_concat_branches]: 1.37975e-06 [interleave_parallel_branches]: 9.20147e-07 [overlap_opt_shard_in_pipeline]: 4.61098e-05 [overlap_opt_shard_grad_in_pipeline]: 3.85009e-06 [control_data_broadcast_order]: 1.22003e-06 [grouped_pairwise_exchange_alltoall]: 1.375e-05 [offloading_packed_experts]: 2.82004e-06 [overlap_recompute_and_grad_model_parallel]: 2.82004e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.09785e-07 [overlap_recompute_allgather_and_fa_grad]: 0.00010186 [overlap_grad_ring_attention]: 0.00015154 [overlap_grad_flash_sp]: 0.00012165 [begin_end_overlap_inline]: 8.89879e-07 [split_matmul_comm_elemetwise]: 2.17976e-06 [split_layernorm_comm]: 3.00026e-06 [handle_group_info]: 6.63009e-06 [symbol_engine_optimizer]: 0.00083501, [1] [Cycle 1]: 0.0008268, [6] [build]: 6.07399e-05 [elim_shapecalc]: 0.00015191 [elim_not_effective]: 0.0002249 [opt_reshape]: 0.00013168 [fold_const_symbol]: 0.00021316 [renormalize]: 4.80097e-07 [pipeline_parallel_scheduler]: 4.18955e-06 [auto_monad_reorder]: 0.00035467 [get_jit_bprop_graph]: 5.60191e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00150538 [distribtued_split]: 0.00045583 [validate]: 0.00031276 [task_emit]: 13.1516 [execute]: 1.35098e-05 Sums bootstrap : 0.034991s : 0.23% type_inference : 1.248905s : 8.18% auto_monad : 0.003901s : 0.03% graph_reusing : 0.000027s : 0.00% inline.rewriter_before_opt_a : 0.001822s : 0.01% inline.a1a2.expand_dump_flag : 0.000053s : 0.00% inline.a1a2.switch_simplify : 0.001541s : 0.01% inline.a1a2.loop_unroll : 0.000761s : 0.00% inline.a1a2.a_1 : 0.026851s : 0.18% inline.a1a2.recompute_prepare : 0.000267s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000591s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000317s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000288s : 0.00% inline.a1a2.parameter_eliminate : 0.000013s : 0.00% inline.a1a2.a_2 : 0.005012s : 0.03% inline.a1a2.parallel_inline_pass : 0.000208s : 0.00% parallel-infer-symbol : 0.000914s : 0.01% pre_auto_parallel : 0.000219s : 0.00% insert-virtual-dataset : 0.004092s : 0.03% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000228s : 0.00% pipeline_split : 0.000393s : 0.00% optimize.py_interpret_to_execute : 0.000149s : 0.00% optimize.rewriter_before_opt_a : 0.000283s : 0.00% optimize.opt_a.expand_dump_flag : 0.000066s : 0.00% optimize.opt_a.switch_simplify : 0.002092s : 0.01% optimize.opt_a.loop_unroll : 0.001729s : 0.01% optimize.opt_a.a_1 : 0.041232s : 0.27% optimize.opt_a.recompute_prepare : 0.000450s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000524s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000266s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000357s : 0.00% optimize.opt_a.parameter_eliminate : 0.000019s : 0.00% optimize.opt_a.a_2 : 0.008158s : 0.05% optimize.opt_a.accelerated_algorithm : 0.000984s : 0.01% optimize.opt_a.shard : 0.000011s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000215s : 0.00% optimize.opt_a.shard_inline : 0.000391s : 0.00% optimize.opt_a.auto_parallel : 0.000335s : 0.00% optimize.opt_a.parallel : 0.034263s : 0.22% optimize.opt_a.flash_sp : 0.000751s : 0.00% optimize.opt_a.merge_comm : 0.000389s : 0.00% optimize.opt_a.allreduce_fusion : 0.000270s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000346s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000422s : 0.00% optimize.opt_a.virtual_dataset : 0.000450s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000409s : 0.00% optimize.opt_a.virtual_output : 0.000388s : 0.00% optimize.opt_a.merge_forward : 0.000270s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000016s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000731s : 0.00% optimize.opt_a.before_grad : 0.000681s : 0.00% optimize.opt_a.inplace_validation : 0.000308s : 0.00% optimize.opt_a.parallel_renormalize : 0.030714s : 0.20% optimize.opt_a.update_top_fg : 0.000003s : 0.00% optimize.opt_a.cast_eliminate : 0.000475s : 0.00% optimize.opt_a.meta_fg_expand : 0.372783s : 2.44% optimize.opt_a.inplace_validation_after_expand : 0.001924s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001185s : 0.01% optimize.opt_a.receive_attached : 0.000091s : 0.00% optimize.opt_a.after_resolve : 0.002313s : 0.02% optimize.opt_a.a_after_grad : 0.004366s : 0.03% optimize.opt_a.special_op_eliminate : 0.002116s : 0.01% optimize.opt_a.renormalize : 0.223115s : 1.46% optimize.opt_a.add_forward_monad_depend : 0.000390s : 0.00% optimize.opt_a.auto_monad_grad : 0.000235s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002343s : 0.02% optimize.opt_a.cse : 0.012302s : 0.08% optimize.opt_a.a_3 : 0.025757s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000156s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001136s : 0.01% optimize.convert_after_rewriter : 0.000117s : 0.00% optimize.order_py_execute_after_rewriter : 0.000086s : 0.00% optimize.opt_b.b_1 : 0.003060s : 0.02% optimize.opt_b.b_2 : 0.000142s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000103s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000088s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000093s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000424s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000143s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000089s : 0.00% optimize.loop_unroll : 0.001032s : 0.01% optimize.opt_after_cconv.c_1 : 0.000797s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000170s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000093s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.cse : 0.000414s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000649s : 0.00% optimize.tuple_transform.d_1 : 0.000931s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000005s : 0.00% optimize.add_cache_embedding : 0.000166s : 0.00% optimize.add_recomputation : 0.000781s : 0.01% optimize.cse_after_recomputation.cse : 0.000299s : 0.00% optimize.environ_conv : 0.000110s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000146s : 0.00% optimize.bias_add_comm_swap : 0.000004s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000537s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000153s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000414s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000108s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000161s : 0.00% optimize.add_comm_op_reuse_tag : 0.000164s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000046s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000014s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000102s : 0.00% optimize.overlap_grad_ring_attention : 0.000152s : 0.00% optimize.overlap_grad_flash_sp : 0.000122s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000003s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000061s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000152s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000225s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000132s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000213s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000355s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001505s : 0.01% distribtued_split : 0.000456s : 0.00% validate : 0.000313s : 0.00% task_emit : 13.151571s : 86.09% execute : 0.000014s : 0.00% Time group info: ------[substitution.] 0.056953 4298 0.04% : 0.000022s : 5: substitution.ad_related_special_op_eliminate 0.05% : 0.000031s : 9: substitution.addn_check_dump 0.13% : 0.000073s : 7: substitution.addn_zero_filter 0.04% : 0.000021s : 7: substitution.adjust_all_reduce_mul_add 0.77% : 0.000436s : 71: substitution.arithmetic_simplify 0.16% : 0.000090s : 10: substitution.cast_eliminate 0.11% : 0.000061s : 47: substitution.depend_value_elim 0.05% : 0.000031s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 1.09% : 0.000622s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000022s : 12: substitution.environ_get_depend_swap 0.06% : 0.000032s : 27: substitution.environ_get_eliminate 0.08% : 0.000045s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000021s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.03% : 0.000015s : 10: substitution.float_tuple_getitem_switch 0.05% : 0.000031s : 107: substitution.fold_const_symbol 63.49% : 0.036157s : 257: substitution.getattr_setattr_resolve 0.15% : 0.000085s : 126: substitution.graph_param_transform 0.02% : 0.000010s : 8: substitution.incorporate_call 0.01% : 0.000006s : 8: substitution.incorporate_call_switch 23.45% : 0.013358s : 331: substitution.inline 1.22% : 0.000696s : 112: substitution.inline_without_move 0.22% : 0.000126s : 309: substitution.j_node_and_user_rematch 0.70% : 0.000399s : 40: substitution.less_batch_normalization 0.09% : 0.000050s : 90: substitution.load_eliminater 0.13% : 0.000074s : 10: substitution.merge_addn 0.21% : 0.000118s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.58% : 0.000327s : 1: substitution.partial_defer_inline 0.12% : 0.000067s : 23: substitution.partial_eliminate 0.03% : 0.000018s : 26: substitution.reduce_all_const_elim 0.08% : 0.000043s : 15: substitution.reduce_eliminate 0.28% : 0.000160s : 309: substitution.remove_not_recompute_node 1.86% : 0.001059s : 508: substitution.replace_applicator 0.20% : 0.000115s : 251: substitution.replace_old_param 0.09% : 0.000051s : 11: substitution.reshape_eliminate 0.03% : 0.000015s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000012s : 4: substitution.specialize_transform 0.04% : 0.000025s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000095s : 34: substitution.switch_simplify 0.07% : 0.000038s : 11: substitution.tile_eliminate 0.47% : 0.000266s : 101: substitution.tuple_list_convert_item_index_to_positive 0.24% : 0.000136s : 107: substitution.tuple_list_get_item_const_eliminator 0.37% : 0.000213s : 107: substitution.tuple_list_get_item_depend_reorder 1.50% : 0.000856s : 308: substitution.tuple_list_get_item_eliminator 0.33% : 0.000188s : 107: substitution.tuple_list_get_set_item_eliminator 0.36% : 0.000205s : 210: substitution.updatestate_pure_node_eliminater 0.71% : 0.000404s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000014s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 1.248402 2 97.27% : 1.214344s : 1: type_inference.infer 2.73% : 0.034058s : 1: type_inference.specialize ------[replace.] 0.010766 775 0.42% : 0.000046s : 5: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.47% : 0.000051s : 7: replace.depend_value_elim 0.42% : 0.000045s : 3: replace.environ_get_set_eliminate 28.07% : 0.003022s : 183: replace.getattr_setattr_resolve 29.38% : 0.003163s : 310: replace.inline 0.25% : 0.000027s : 1: replace.merge_addn 1.09% : 0.000118s : 7: replace.partial_eliminate 4.42% : 0.000475s : 25: replace.replace_applicator 4.75% : 0.000512s : 34: replace.switch_simplify 0.48% : 0.000052s : 6: replace.tuple_list_get_item_depend_reorder 29.83% : 0.003212s : 191: replace.tuple_list_get_item_eliminator 0.15% : 0.000017s : 1: replace.updatestate_useless_node_eliminater 0.20% : 0.000021s : 1: replace.virtual_dataset_eliminate ------[match.] 0.047526 775 0.04% : 0.000019s : 5: match.ad_related_special_op_eliminate 0.04% : 0.000018s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.05% : 0.000025s : 3: match.environ_get_set_eliminate 70.56% : 0.033533s : 183: match.getattr_setattr_resolve 27.60% : 0.013119s : 310: match.inline 0.08% : 0.000039s : 1: match.merge_addn 0.10% : 0.000046s : 7: match.partial_eliminate 0.25% : 0.000120s : 25: match.replace_applicator 0.16% : 0.000074s : 34: match.switch_simplify 0.07% : 0.000032s : 6: match.tuple_list_get_item_depend_reorder 0.99% : 0.000469s : 191: match.tuple_list_get_item_eliminator 0.03% : 0.000015s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000012s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020521131318 0.82% : 0.000168s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000056s : 254: predicate.ad_related_special_op_eliminate 0.56% : 0.000116s : 835: predicate.addn_check_dump 0.76% : 0.000157s : 1198: predicate.addn_zero_filter 0.75% : 0.000154s : 1198: predicate.adjust_all_reduce_mul_add 1.88% : 0.000386s : 2034: predicate.arithmetic_simplify 1.25% : 0.000256s : 1586: predicate.cast_eliminate 2.95% : 0.000606s : 3484: predicate.check_bprop_eliminate 0.56% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.18% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.22% : 0.000251s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000117s : 838: predicate.depend_value_elim 0.84% : 0.000172s : 1202: predicate.dict_get_item_const_eliminator 0.88% : 0.000180s : 1202: predicate.dict_get_item_eliminator 0.83% : 0.000170s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000010s : 126: predicate.elim_not_effective 0.11% : 0.000023s : 126: predicate.elim_shapecalc_of_broadcastargs 0.85% : 0.000175s : 1334: predicate.environ_add_const_eliminate 0.86% : 0.000177s : 1337: predicate.environ_get_add_eliminate 0.87% : 0.000178s : 1334: predicate.environ_get_depend_swap 1.48% : 0.000304s : 2172: predicate.environ_get_eliminate 0.86% : 0.000176s : 1337: predicate.environ_get_set_eliminate 1.30% : 0.000267s : 1717: predicate.exchange_switch_depend_value 1.46% : 0.000299s : 1717: predicate.float_depend_g_call 0.56% : 0.000116s : 835: predicate.float_environ_get_switch 0.65% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.29% : 0.000060s : 395: predicate.get_grad_eliminate 2.33% : 0.000477s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.56% : 0.000114s : 835: predicate.incorporate_call 0.55% : 0.000113s : 835: predicate.incorporate_call_switch 4.01% : 0.000824s : 4602: predicate.inline 2.29% : 0.000470s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.95% : 0.000196s : 388: predicate.less_batch_normalization 1.22% : 0.000250s : 1660: predicate.list_to_tuple_eliminator_ 1.89% : 0.000388s : 2874: predicate.load_eliminater 0.19% : 0.000039s : 135: predicate.loop_unroll_after_grad 2.30% : 0.000472s : 2640: predicate.loop_unroll_before_grad 0.96% : 0.000196s : 1478: predicate.make_slice_get_slice_eliminator 0.58% : 0.000118s : 837: predicate.merge_addn 2.86% : 0.000587s : 3380: predicate.micro_step_allgather_replace 2.88% : 0.000591s : 3380: predicate.mini_step_allgather_replace 0.81% : 0.000166s : 1199: predicate.minmaximum_grad 0.18% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.10% : 0.000430s : 1717: predicate.partial_defer_inline 1.10% : 0.000227s : 1541: predicate.partial_eliminate 0.79% : 0.000161s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000115s : 824: predicate.reduce_all_const_elim 1.00% : 0.000205s : 1199: predicate.reduce_eliminate 0.14% : 0.000028s : 395: predicate.remove_not_recompute_node 1.95% : 0.000401s : 4829: predicate.replace_applicator 0.80% : 0.000164s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.80% : 0.000165s : 1199: predicate.reshape_eliminate 3.20% : 0.000656s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000023s : 135: predicate.row_tensor_eliminate 3.08% : 0.000633s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000062s : 395: predicate.shard_identity_eliminate 2.08% : 0.000426s : 2338: predicate.special_op_eliminate 0.64% : 0.000130s : 837: predicate.specialize_transform 3.17% : 0.000651s : 3380: predicate.split_environ_get_set_with_tuple_value 1.61% : 0.000330s : 2203: predicate.stack_unstack_eliminate 1.91% : 0.000391s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.25% : 0.000257s : 1717: predicate.switch_defer_inline 4.18% : 0.000857s : 5201: predicate.switch_layer_defer_inline 4.45% : 0.000913s : 5262: predicate.switch_simplify 0.78% : 0.000161s : 1199: predicate.tile_eliminate 0.77% : 0.000158s : 1199: predicate.transpose_eliminate 1.09% : 0.000224s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.07% : 0.000220s : 1469: predicate.tuple_list_get_item_const_eliminator 0.95% : 0.000196s : 1469: predicate.tuple_list_get_item_depend_reorder 1.95% : 0.000401s : 2495: predicate.tuple_list_get_item_eliminator 1.04% : 0.000213s : 1469: predicate.tuple_list_get_set_item_eliminator 1.68% : 0.000345s : 2304: predicate.tuple_list_set_item_eliminator 1.10% : 0.000227s : 1660: predicate.tuple_to_list_eliminator_ 1.97% : 0.000405s : 2874: predicate.updatestate_pure_node_eliminater 2.50% : 0.000513s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.29% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000058s : 395: predicate.virtual_output_eliminate 0.11% : 0.000023s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.088161 747 63.64% : 0.056106s : 338: func_graph_cloner_run.FuncGraphClonerGraph 1.89% : 0.001663s : 22: func_graph_cloner_run.FuncGraphClonerNode 34.47% : 0.030393s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 16.575299 346 0.00% : 0.000006s : 1: ForceFp32Comm 0.31% : 0.051980s : 1: a1a2 0.00% : 0.000176s : 1: add_cache_embedding 0.00% : 0.000172s : 1: add_comm_op_reuse_tag 0.00% : 0.000797s : 1: add_recomputation 0.00% : 0.000426s : 1: assign_add_opt 0.02% : 0.003940s : 1: auto_monad 0.00% : 0.000371s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000008s : 1: bias_add_comm_swap 0.21% : 0.035072s : 1: bootstrap 0.00% : 0.000097s : 1: cconv 0.00% : 0.000169s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.00% : 0.000125s : 1: convert_after_rewriter 0.00% : 0.000330s : 1: cse_after_recomputation 0.00% : 0.000238s : 1: dataset_repeat_opt 0.00% : 0.000473s : 1: distribtued_split 0.01% : 0.001524s : 1: eliminate_special_op_node 0.00% : 0.000119s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000039s : 1: graph_reusing 0.00% : 0.000017s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.33% : 0.054192s : 1: inline 0.02% : 0.004117s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000547s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001044s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.21% : 0.034560s : 61: opt.transform.a1a2 0.00% : 0.000184s : 1: opt.transform.loop_unroll_optimizer 0.56% : 0.092845s : 148: opt.transform.opt_a 0.00% : 0.000794s : 1: opt.transform.opt_after_cconv 0.02% : 0.003172s : 27: opt.transform.opt_b 0.25% : 0.042180s : 16: opt.transform.opt_resolve 0.01% : 0.000927s : 1: opt.transform.opt_trans_graph 0.01% : 0.000843s : 6: opt.transform.special_op_eliminate 0.00% : 0.000715s : 4: opt.transform.symbol_engine_opt 4.70% : 0.778424s : 1: opt_a 0.01% : 0.001663s : 1: opt_after_cconv 0.02% : 0.004000s : 1: opt_b 4.79% : 0.794237s : 1: optimize 0.00% : 0.000153s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000092s : 1: order_py_execute_after_rewriter 0.00% : 0.000126s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000158s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000052s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000107s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000928s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000009s : 1: partial_unused_args_eliminate 0.00% : 0.000011s : 1: pipeline_parallel_scheduler 0.00% : 0.000403s : 1: pipeline_split 0.00% : 0.000229s : 1: pre_auto_parallel 0.00% : 0.000157s : 1: py_interpret_to_execute 0.00% : 0.000168s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000115s : 1: remove_cast_before_assign_add 0.00% : 0.000666s : 1: remove_dup_value 1.10% : 0.181937s : 3: renormalize.infer 0.43% : 0.071834s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000007s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001149s : 1: rewriter_after_opt_a 0.01% : 0.002139s : 2: rewriter_before_opt_a 0.00% : 0.000008s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000160s : 1: slice_recompute_activation 0.00% : 0.000006s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000156s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000840s : 1: symbol_engine_optimizer 79.34% : 13.151621s : 1: task_emit 0.01% : 0.000967s : 1: tuple_transform 7.54% : 1.248964s : 1: type_inference 0.01% : 0.001486s : 1: validate TotalTime = 14.5354, [21] [bootstrap]: 0.0214023 [type_inference]: 0.984168 [auto_monad]: 0.00199649 [graph_reusing]: 2.64198e-05 [inline]: 0.0427568, [2] [rewriter_before_opt_a]: 0.00153907 [a1a2]: 0.0411414, [2] [Cycle 1]: 0.0279703, [11] [expand_dump_flag]: 3.23001e-05 [switch_simplify]: 0.00104997 [loop_unroll]: 0.00068338 [a_1]: 0.0218274 [recompute_prepare]: 0.00016358 [updatestate_depend_eliminate]: 0.0003514 [updatestate_assign_eliminate]: 0.00011444 [updatestate_loads_eliminate]: 0.00019822 [parameter_eliminate]: 5.39981e-06 [a_2]: 0.00325943 [parallel_inline_pass]: 0.00010133 [Cycle 2]: 0.0052445, [11] [expand_dump_flag]: 1.22981e-06 [switch_simplify]: 9.437e-05 [loop_unroll]: 9.40501e-05 [a_1]: 0.00311378 [recompute_prepare]: 9.89898e-05 [updatestate_depend_eliminate]: 7.01402e-05 [updatestate_assign_eliminate]: 5.93099e-05 [updatestate_loads_eliminate]: 6.09001e-05 [parameter_eliminate]: 2.2701e-06 [a_2]: 0.00148062 [parallel_inline_pass]: 0.00010097 [parallel-infer-symbol]: 0.00016643 [pre_auto_parallel]: 8.75597e-05 [insert-virtual-dataset]: 0.00131748 [parallel-infer-symbol-second]: 2.08989e-06 [dataset_repeat_opt]: 7.668e-05 [pipeline_split]: 8.83299e-05 [optimize]: 0.586075, [52] [py_interpret_to_execute]: 0.00012534 [rewriter_before_opt_a]: 0.00027853 [opt_a]: 0.57157, [3] [Cycle 1]: 0.492505, [46] [expand_dump_flag]: 1.57999e-06 [switch_simplify]: 0.00010907 [loop_unroll]: 9.83803e-05 [a_1]: 0.00329699 [recompute_prepare]: 0.00010553 [updatestate_depend_eliminate]: 9.675e-05 [updatestate_assign_eliminate]: 6.217e-05 [updatestate_loads_eliminate]: 7.12797e-05 [parameter_eliminate]: 2.48011e-06 [a_2]: 0.00153727 [accelerated_algorithm]: 0.00023218 [shard]: 1.97021e-06 [meta_shard_fg_expand]: 4.91003e-05 [shard_inline]: 0.00010668 [auto_parallel]: 7.131e-05 [parallel]: 0.0163282 [flash_sp]: 6.76103e-05 [merge_comm]: 0.0001465 [allreduce_fusion]: 7.50599e-05 [matmul_add_comm_reduction]: 9.73102e-05 [allreduce_slice_to_reducescatter]: 6.89644e-07 [virtual_shard_identity]: 0.00012407 [virtual_dataset]: 0.00016299 [get_grad_eliminate_]: 0.00011626 [virtual_output]: 0.00011455 [merge_forward]: 7.67899e-05 [cell_reuse_recompute_pass]: 2.2999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020931 [before_grad]: 0.00019582 [inplace_validation]: 0.00011716 [parallel_renormalize]: 0.0210051 [update_top_fg]: 7.59959e-07 [cast_eliminate]: 0.00017664 [meta_fg_expand]: 0.261433 [inplace_validation_after_expand]: 0.00154346 [flash_sp_send_recv_attached]: 0.00123056 [receive_attached]: 6.86101e-05 [after_resolve]: 0.00200782 [a_after_grad]: 0.00392828 [special_op_eliminate]: 0.00188654 [renormalize]: 0.143713 [add_forward_monad_depend]: 0.00035245 [auto_monad_grad]: 0.00020782 [auto_monad_eliminator]: 0.00184125 [cse]: 0.00413477 [a_3]: 0.0247817 [Cycle 2]: 0.0673141, [46] [expand_dump_flag]: 5.04297e-05 [switch_simplify]: 0.00182062 [loop_unroll]: 0.0015586 [a_1]: 0.0304731 [recompute_prepare]: 0.00017338 [updatestate_depend_eliminate]: 0.00022147 [updatestate_assign_eliminate]: 0.00010303 [updatestate_loads_eliminate]: 0.0001643 [parameter_eliminate]: 3.01981e-06 [a_2]: 0.00427633 [accelerated_algorithm]: 0.00016142 [shard]: 1.76998e-06 [meta_shard_fg_expand]: 6.99698e-05 [shard_inline]: 0.00014102 [auto_parallel]: 0.00011084 [parallel]: 1.05398e-05 [flash_sp]: 0.00011804 [merge_comm]: 0.00010781 [allreduce_fusion]: 9.23602e-05 [matmul_add_comm_reduction]: 0.00010886 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 0.0001401 [virtual_dataset]: 0.0001362 [get_grad_eliminate_]: 0.00013245 [virtual_output]: 0.00013416 [merge_forward]: 8.73101e-05 [cell_reuse_recompute_pass]: 1.97999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025139 [before_grad]: 0.00024472 [inplace_validation]: 8.424e-05 [parallel_renormalize]: 7.96281e-08 [update_top_fg]: 7.30157e-07 [cast_eliminate]: 0.00015089 [meta_fg_expand]: 0.00026062 [inplace_validation_after_expand]: 0.00017905 [flash_sp_send_recv_attached]: 1.97999e-06 [receive_attached]: 1.39978e-06 [after_resolve]: 0.00020932 [a_after_grad]: 0.00022688 [special_op_eliminate]: 0.00013563 [renormalize]: 0.0170348 [add_forward_monad_depend]: 4.92018e-06 [auto_monad_grad]: 2.02004e-06 [auto_monad_eliminator]: 0.00029023 [cse]: 0.00644831 [a_3]: 0.0009531 [Cycle 3]: 0.011732, [46] [expand_dump_flag]: 2.30037e-06 [switch_simplify]: 0.00013376 [loop_unroll]: 0.00012991 [a_1]: 0.00424053 [recompute_prepare]: 0.00013942 [updatestate_depend_eliminate]: 0.0001524 [updatestate_assign_eliminate]: 9.24701e-05 [updatestate_loads_eliminate]: 0.0001005 [parameter_eliminate]: 2.96021e-06 [a_2]: 0.00206046 [accelerated_algorithm]: 0.00015604 [shard]: 1.72993e-06 [meta_shard_fg_expand]: 5.01797e-05 [shard_inline]: 0.00013445 [auto_parallel]: 0.00011227 [parallel]: 1.02897e-05 [flash_sp]: 1.94972e-06 [merge_comm]: 0.00010543 [allreduce_fusion]: 9.50401e-05 [matmul_add_comm_reduction]: 0.00011847 [allreduce_slice_to_reducescatter]: 3.50177e-07 [virtual_shard_identity]: 0.00013765 [virtual_dataset]: 0.00013305 [get_grad_eliminate_]: 0.00012902 [virtual_output]: 0.00013109 [merge_forward]: 9.258e-05 [cell_reuse_recompute_pass]: 2.62028e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024732 [before_grad]: 0.00023079 [inplace_validation]: 8.80999e-05 [parallel_renormalize]: 1.00117e-07 [update_top_fg]: 7.39936e-07 [cast_eliminate]: 0.00014753 [meta_fg_expand]: 0.00011418 [inplace_validation_after_expand]: 0.00011401 [flash_sp_send_recv_attached]: 1.76998e-06 [receive_attached]: 1.01002e-06 [after_resolve]: 0.0001477 [a_after_grad]: 0.00021997 [special_op_eliminate]: 0.00013113 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 1.8198e-06 [auto_monad_grad]: 1.51014e-06 [auto_monad_eliminator]: 0.00016806 [cse]: 0.00042178 [a_3]: 0.0009311 [py_interpret_to_execute_after_opt_a]: 0.00013683 [slice_cell_reuse_recomputed_activation]: 2.31992e-06 [rewriter_after_opt_a]: 0.0009969 [convert_after_rewriter]: 0.00010965 [order_py_execute_after_rewriter]: 8.12202e-05 [opt_b]: 0.00399423, [1] [Cycle 1]: 0.00398508, [7] [b_1]: 0.00308349 [b_2]: 0.00013832 [updatestate_depend_eliminate]: 0.00010284 [updatestate_assign_eliminate]: 9.41199e-05 [updatestate_loads_eliminate]: 0.00010675 [renormalize]: 4.00003e-07 [cse]: 0.00039746 [optimize_parallel_all_gather_comm]: 0.00014717 [overlap_param_gather]: 1.13994e-06 [cconv]: 6.44899e-05 [loop_unroll]: 0.00091564 [opt_after_cconv]: 0.0016243, [1] [Cycle 1]: 0.00161745, [7] [c_1]: 0.00084869 [parameter_eliminate]: 2.31015e-06 [updatestate_depend_eliminate]: 0.00013036 [updatestate_assign_eliminate]: 9.10601e-05 [updatestate_loads_eliminate]: 0.00010096 [cse]: 0.00038882 [renormalize]: 5.69969e-07 [remove_dup_value]: 0.00057973 [tuple_transform]: 0.00094414, [1] [Cycle 1]: 0.00093762, [2] [d_1]: 0.00092164 [renormalize]: 4.20026e-07 [partial_unused_args_eliminate]: 2.08011e-06 [add_cache_embedding]: 0.00014924 [add_recomputation]: 0.00069371 [cse_after_recomputation]: 0.00029917, [1] [Cycle 1]: 0.00029168, [1] [cse]: 0.00027968 [environ_conv]: 9.60398e-05 [swap_dp_allreduce_reducescatter]: 0.00012711 [bias_add_comm_swap]: 2.44984e-06 [label_micro_interleaved_index]: 1.56043e-06 [label_fine_grained_interleaved_index]: 0.00051806 [merge_cast_opt]: 1.26008e-06 [slice_recompute_activation]: 0.00015058 [micro_interleaved_order_control]: 1.36998e-06 [assign_add_opt]: 0.00037736 [ForceFp32Comm]: 1.0198e-06 [remove_cast_before_assign_add]: 0.00010273 [full_micro_interleaved_order_control]: 2.05031e-06 [reorder_send_recv_between_fp_bp]: 1.05007e-06 [comm_op_add_attrs]: 0.00014196 [add_comm_op_reuse_tag]: 0.00014814 [interleave_split_concat_branches]: 7.00355e-07 [interleave_parallel_branches]: 7.5018e-07 [overlap_opt_shard_in_pipeline]: 4.35803e-05 [overlap_opt_shard_grad_in_pipeline]: 2.07964e-06 [control_data_broadcast_order]: 7.69738e-07 [grouped_pairwise_exchange_alltoall]: 9.66992e-06 [offloading_packed_experts]: 1.45007e-06 [overlap_recompute_and_grad_model_parallel]: 1.34017e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.69969e-07 [overlap_recompute_allgather_and_fa_grad]: 9.27602e-05 [overlap_grad_ring_attention]: 0.00014193 [overlap_grad_flash_sp]: 0.00011829 [begin_end_overlap_inline]: 6.79865e-07 [split_matmul_comm_elemetwise]: 1.34017e-06 [split_layernorm_comm]: 1.91992e-06 [handle_group_info]: 4.99981e-06 [symbol_engine_optimizer]: 0.0008078, [1] [Cycle 1]: 0.00080163, [6] [build]: 5.03999e-05 [elim_shapecalc]: 0.00014914 [elim_not_effective]: 0.00022271 [opt_reshape]: 0.00013013 [fold_const_symbol]: 0.00021065 [renormalize]: 3.7998e-07 [pipeline_parallel_scheduler]: 3.15998e-06 [auto_monad_reorder]: 0.00031691 [get_jit_bprop_graph]: 3.90224e-07 [rewriter_after_jit_bprop_graph]: 3.00352e-07 [eliminate_special_op_node]: 0.0014626 [distribtued_split]: 0.00037518 [validate]: 0.00028389 [task_emit]: 12.8933 [execute]: 1.05901e-05 Sums bootstrap : 0.021402s : 0.15% type_inference : 0.984168s : 6.78% auto_monad : 0.001996s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001539s : 0.01% inline.a1a2.expand_dump_flag : 0.000034s : 0.00% inline.a1a2.switch_simplify : 0.001144s : 0.01% inline.a1a2.loop_unroll : 0.000777s : 0.01% inline.a1a2.a_1 : 0.024941s : 0.17% inline.a1a2.recompute_prepare : 0.000263s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000422s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000174s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000259s : 0.00% inline.a1a2.parameter_eliminate : 0.000008s : 0.00% inline.a1a2.a_2 : 0.004740s : 0.03% inline.a1a2.parallel_inline_pass : 0.000202s : 0.00% parallel-infer-symbol : 0.000166s : 0.00% pre_auto_parallel : 0.000088s : 0.00% insert-virtual-dataset : 0.001317s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000077s : 0.00% pipeline_split : 0.000088s : 0.00% optimize.py_interpret_to_execute : 0.000125s : 0.00% optimize.rewriter_before_opt_a : 0.000279s : 0.00% optimize.opt_a.expand_dump_flag : 0.000054s : 0.00% optimize.opt_a.switch_simplify : 0.002063s : 0.01% optimize.opt_a.loop_unroll : 0.001787s : 0.01% optimize.opt_a.a_1 : 0.038011s : 0.26% optimize.opt_a.recompute_prepare : 0.000418s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000471s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000258s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000336s : 0.00% optimize.opt_a.parameter_eliminate : 0.000008s : 0.00% optimize.opt_a.a_2 : 0.007874s : 0.05% optimize.opt_a.accelerated_algorithm : 0.000550s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000169s : 0.00% optimize.opt_a.shard_inline : 0.000382s : 0.00% optimize.opt_a.auto_parallel : 0.000294s : 0.00% optimize.opt_a.parallel : 0.016349s : 0.11% optimize.opt_a.flash_sp : 0.000188s : 0.00% optimize.opt_a.merge_comm : 0.000360s : 0.00% optimize.opt_a.allreduce_fusion : 0.000262s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000325s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000402s : 0.00% optimize.opt_a.virtual_dataset : 0.000432s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000378s : 0.00% optimize.opt_a.virtual_output : 0.000380s : 0.00% optimize.opt_a.merge_forward : 0.000257s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000007s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000708s : 0.00% optimize.opt_a.before_grad : 0.000671s : 0.00% optimize.opt_a.inplace_validation : 0.000289s : 0.00% optimize.opt_a.parallel_renormalize : 0.021005s : 0.14% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000475s : 0.00% optimize.opt_a.meta_fg_expand : 0.261807s : 1.80% optimize.opt_a.inplace_validation_after_expand : 0.001837s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001234s : 0.01% optimize.opt_a.receive_attached : 0.000071s : 0.00% optimize.opt_a.after_resolve : 0.002365s : 0.02% optimize.opt_a.a_after_grad : 0.004375s : 0.03% optimize.opt_a.special_op_eliminate : 0.002153s : 0.01% optimize.opt_a.renormalize : 0.160748s : 1.11% optimize.opt_a.add_forward_monad_depend : 0.000359s : 0.00% optimize.opt_a.auto_monad_grad : 0.000211s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002300s : 0.02% optimize.opt_a.cse : 0.011005s : 0.08% optimize.opt_a.a_3 : 0.026666s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000137s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000997s : 0.01% optimize.convert_after_rewriter : 0.000110s : 0.00% optimize.order_py_execute_after_rewriter : 0.000081s : 0.00% optimize.opt_b.b_1 : 0.003083s : 0.02% optimize.opt_b.b_2 : 0.000138s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000103s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000094s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000107s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000397s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000147s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000064s : 0.00% optimize.loop_unroll : 0.000916s : 0.01% optimize.opt_after_cconv.c_1 : 0.000849s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000130s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000101s : 0.00% optimize.opt_after_cconv.cse : 0.000389s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000580s : 0.00% optimize.tuple_transform.d_1 : 0.000922s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000149s : 0.00% optimize.add_recomputation : 0.000694s : 0.00% optimize.cse_after_recomputation.cse : 0.000280s : 0.00% optimize.environ_conv : 0.000096s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000127s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000518s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000151s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000377s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000103s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000142s : 0.00% optimize.add_comm_op_reuse_tag : 0.000148s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000044s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000093s : 0.00% optimize.overlap_grad_ring_attention : 0.000142s : 0.00% optimize.overlap_grad_flash_sp : 0.000118s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.00% optimize.symbol_engine_optimizer.build : 0.000050s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000149s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000223s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000211s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000317s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001463s : 0.01% distribtued_split : 0.000375s : 0.00% validate : 0.000284s : 0.00% task_emit : 12.893270s : 88.77% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.048173 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000020s : 9: substitution.addn_check_dump 0.10% : 0.000049s : 7: substitution.addn_zero_filter 0.03% : 0.000014s : 7: substitution.adjust_all_reduce_mul_add 0.59% : 0.000283s : 71: substitution.arithmetic_simplify 0.10% : 0.000048s : 10: substitution.cast_eliminate 0.11% : 0.000053s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000023s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000017s : 12: substitution.environ_get_depend_swap 0.06% : 0.000027s : 27: substitution.environ_get_eliminate 0.07% : 0.000032s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000017s : 23: substitution.float_depend_g_call 0.02% : 0.000009s : 12: substitution.float_environ_get_switch 0.02% : 0.000010s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 65.21% : 0.031416s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000084s : 126: substitution.graph_param_transform 0.02% : 0.000007s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.18% : 0.011165s : 331: substitution.inline 1.40% : 0.000674s : 112: substitution.inline_without_move 0.26% : 0.000124s : 309: substitution.j_node_and_user_rematch 0.26% : 0.000128s : 40: substitution.less_batch_normalization 0.09% : 0.000045s : 90: substitution.load_eliminater 0.10% : 0.000047s : 10: substitution.merge_addn 0.23% : 0.000109s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.07% : 0.000033s : 1: substitution.partial_defer_inline 0.13% : 0.000062s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.06% : 0.000030s : 15: substitution.reduce_eliminate 0.34% : 0.000162s : 309: substitution.remove_not_recompute_node 2.16% : 0.001039s : 508: substitution.replace_applicator 0.22% : 0.000107s : 251: substitution.replace_old_param 0.08% : 0.000037s : 11: substitution.reshape_eliminate 0.02% : 0.000012s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000010s : 4: substitution.specialize_transform 0.03% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000081s : 34: substitution.switch_simplify 0.06% : 0.000027s : 11: substitution.tile_eliminate 0.51% : 0.000247s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000129s : 107: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000205s : 107: substitution.tuple_list_get_item_depend_reorder 1.62% : 0.000779s : 308: substitution.tuple_list_get_item_eliminator 0.36% : 0.000175s : 107: substitution.tuple_list_get_set_item_eliminator 0.39% : 0.000187s : 210: substitution.updatestate_pure_node_eliminater 0.67% : 0.000325s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.983718 2 97.48% : 0.958902s : 1: type_inference.infer 2.52% : 0.024816s : 1: type_inference.specialize ------[replace.] 0.009307 775 0.42% : 0.000039s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.51% : 0.000047s : 7: replace.depend_value_elim 0.45% : 0.000042s : 3: replace.environ_get_set_eliminate 29.38% : 0.002735s : 183: replace.getattr_setattr_resolve 30.03% : 0.002795s : 310: replace.inline 0.21% : 0.000020s : 1: replace.merge_addn 1.15% : 0.000107s : 7: replace.partial_eliminate 4.01% : 0.000373s : 25: replace.replace_applicator 3.84% : 0.000358s : 34: replace.switch_simplify 0.54% : 0.000050s : 6: replace.tuple_list_get_item_depend_reorder 29.04% : 0.002702s : 191: replace.tuple_list_get_item_eliminator 0.17% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.19% : 0.000017s : 1: replace.virtual_dataset_eliminate ------[match.] 0.040936 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.02% : 0.000010s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 71.48% : 0.029260s : 183: match.getattr_setattr_resolve 26.73% : 0.010941s : 310: match.inline 0.05% : 0.000020s : 1: match.merge_addn 0.09% : 0.000036s : 7: match.partial_eliminate 0.24% : 0.000096s : 25: match.replace_applicator 0.15% : 0.000060s : 34: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.05% : 0.000429s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.021046131318 1.11% : 0.000234s : 1198: predicate.accumulaten_eliminater 0.62% : 0.000131s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000115s : 835: predicate.addn_check_dump 0.75% : 0.000159s : 1198: predicate.addn_zero_filter 0.75% : 0.000157s : 1198: predicate.adjust_all_reduce_mul_add 1.70% : 0.000358s : 2034: predicate.arithmetic_simplify 1.10% : 0.000231s : 1586: predicate.cast_eliminate 3.32% : 0.000698s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.09% : 0.000228s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000120s : 838: predicate.depend_value_elim 0.81% : 0.000171s : 1202: predicate.dict_get_item_const_eliminator 0.85% : 0.000178s : 1202: predicate.dict_get_item_eliminator 0.83% : 0.000175s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000021s : 126: predicate.elim_shapecalc_of_broadcastargs 0.83% : 0.000174s : 1334: predicate.environ_add_const_eliminate 0.85% : 0.000178s : 1337: predicate.environ_get_add_eliminate 0.83% : 0.000174s : 1334: predicate.environ_get_depend_swap 1.41% : 0.000296s : 2172: predicate.environ_get_eliminate 0.83% : 0.000174s : 1337: predicate.environ_get_set_eliminate 1.10% : 0.000231s : 1717: predicate.exchange_switch_depend_value 1.34% : 0.000283s : 1717: predicate.float_depend_g_call 0.54% : 0.000114s : 835: predicate.float_environ_get_switch 0.63% : 0.000133s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000059s : 395: predicate.get_grad_eliminate 2.30% : 0.000483s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.54% : 0.000114s : 835: predicate.incorporate_call 0.53% : 0.000111s : 835: predicate.incorporate_call_switch 3.81% : 0.000801s : 4602: predicate.inline 2.34% : 0.000493s : 2203: predicate.inline_without_move 0.14% : 0.000028s : 395: predicate.j_node_and_user_rematch 0.31% : 0.000065s : 388: predicate.less_batch_normalization 1.08% : 0.000228s : 1660: predicate.list_to_tuple_eliminator_ 1.83% : 0.000384s : 2874: predicate.load_eliminater 0.19% : 0.000039s : 135: predicate.loop_unroll_after_grad 2.51% : 0.000528s : 2640: predicate.loop_unroll_before_grad 0.99% : 0.000208s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000117s : 837: predicate.merge_addn 3.14% : 0.000660s : 3380: predicate.micro_step_allgather_replace 3.15% : 0.000663s : 3380: predicate.mini_step_allgather_replace 0.76% : 0.000159s : 1199: predicate.minmaximum_grad 0.17% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.00% : 0.000420s : 1717: predicate.partial_defer_inline 1.07% : 0.000226s : 1541: predicate.partial_eliminate 0.78% : 0.000163s : 1198: predicate.print_const_string_wrapper 0.55% : 0.000116s : 824: predicate.reduce_all_const_elim 0.94% : 0.000199s : 1199: predicate.reduce_eliminate 0.13% : 0.000028s : 395: predicate.remove_not_recompute_node 1.92% : 0.000405s : 4829: predicate.replace_applicator 0.78% : 0.000164s : 2203: predicate.replace_old_param 0.05% : 0.000011s : 135: predicate.reset_defer_inline 0.81% : 0.000170s : 1199: predicate.reshape_eliminate 3.18% : 0.000669s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000022s : 135: predicate.row_tensor_eliminate 3.35% : 0.000706s : 3484: predicate.same_eliminate 0.23% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000060s : 395: predicate.shard_identity_eliminate 2.14% : 0.000451s : 2338: predicate.special_op_eliminate 0.63% : 0.000132s : 837: predicate.specialize_transform 3.47% : 0.000731s : 3380: predicate.split_environ_get_set_with_tuple_value 1.63% : 0.000343s : 2203: predicate.stack_unstack_eliminate 1.78% : 0.000376s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.24% : 0.000261s : 1717: predicate.switch_defer_inline 4.42% : 0.000930s : 5201: predicate.switch_layer_defer_inline 4.27% : 0.000899s : 5262: predicate.switch_simplify 0.78% : 0.000164s : 1199: predicate.tile_eliminate 0.76% : 0.000161s : 1199: predicate.transpose_eliminate 1.07% : 0.000226s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.04% : 0.000219s : 1469: predicate.tuple_list_get_item_const_eliminator 0.95% : 0.000201s : 1469: predicate.tuple_list_get_item_depend_reorder 1.86% : 0.000392s : 2495: predicate.tuple_list_get_item_eliminator 1.02% : 0.000215s : 1469: predicate.tuple_list_get_set_item_eliminator 1.63% : 0.000343s : 2304: predicate.tuple_list_set_item_eliminator 1.63% : 0.000342s : 1660: predicate.tuple_to_list_eliminator_ 1.88% : 0.000397s : 2874: predicate.updatestate_pure_node_eliminater 2.53% : 0.000533s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.28% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000057s : 395: predicate.virtual_output_eliminate 0.12% : 0.000024s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.060962 747 70.75% : 0.043129s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.23% : 0.001357s : 22: func_graph_cloner_run.FuncGraphClonerNode 27.03% : 0.016477s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.511292 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041146s : 1: a1a2 0.00% : 0.000156s : 1: add_cache_embedding 0.00% : 0.000154s : 1: add_comm_op_reuse_tag 0.00% : 0.000705s : 1: add_recomputation 0.00% : 0.000386s : 1: assign_add_opt 0.01% : 0.002019s : 1: auto_monad 0.00% : 0.000329s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.14% : 0.021453s : 1: bootstrap 0.00% : 0.000071s : 1: cconv 0.00% : 0.000149s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000117s : 1: convert_after_rewriter 0.00% : 0.000304s : 1: cse_after_recomputation 0.00% : 0.000085s : 1: dataset_repeat_opt 0.00% : 0.000390s : 1: distribtued_split 0.01% : 0.001477s : 1: eliminate_special_op_node 0.00% : 0.000105s : 1: environ_conv 0.00% : 0.000020s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000035s : 1: graph_reusing 0.00% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000008s : 1: handle_group_info 0.28% : 0.042769s : 1: inline 0.01% : 0.001338s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000527s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000926s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.031997s : 61: opt.transform.a1a2 0.00% : 0.000178s : 1: opt.transform.loop_unroll_optimizer 0.58% : 0.089849s : 148: opt.transform.opt_a 0.01% : 0.000846s : 1: opt.transform.opt_after_cconv 0.02% : 0.003190s : 27: opt.transform.opt_b 0.24% : 0.037062s : 16: opt.transform.opt_resolve 0.01% : 0.000919s : 1: opt.transform.opt_trans_graph 0.01% : 0.000901s : 6: opt.transform.special_op_eliminate 0.00% : 0.000706s : 4: opt.transform.symbol_engine_opt 3.68% : 0.571576s : 1: opt_a 0.01% : 0.001631s : 1: opt_after_cconv 0.03% : 0.003999s : 1: opt_b 3.78% : 0.586086s : 1: optimize 0.00% : 0.000156s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000087s : 1: order_py_execute_after_rewriter 0.00% : 0.000123s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000148s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000048s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000098s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000177s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000098s : 1: pipeline_split 0.00% : 0.000096s : 1: pre_auto_parallel 0.00% : 0.000133s : 1: py_interpret_to_execute 0.00% : 0.000144s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000108s : 1: remove_cast_before_assign_add 0.00% : 0.000591s : 1: remove_dup_value 0.84% : 0.129572s : 3: renormalize.infer 0.34% : 0.052143s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001008s : 1: rewriter_after_opt_a 0.01% : 0.001839s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000157s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000133s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000812s : 1: symbol_engine_optimizer 83.12% : 12.893353s : 1: task_emit 0.01% : 0.000949s : 1: tuple_transform 6.35% : 0.984204s : 1: type_inference 0.01% : 0.001367s : 1: validate TotalTime = 14.3749, [21] [bootstrap]: 0.00132981 [type_inference]: 0.776054 [auto_monad]: 0.00192255 [graph_reusing]: 2.68202e-05 [inline]: 0.0440498, [2] [rewriter_before_opt_a]: 0.00146989 [a1a2]: 0.0425329, [2] [Cycle 1]: 0.0289683, [11] [expand_dump_flag]: 3.534e-05 [switch_simplify]: 0.00119428 [loop_unroll]: 0.00068767 [a_1]: 0.0223409 [recompute_prepare]: 0.00016944 [updatestate_depend_eliminate]: 0.00037713 [updatestate_assign_eliminate]: 9.21004e-05 [updatestate_loads_eliminate]: 0.00027239 [parameter_eliminate]: 8.91974e-06 [a_2]: 0.00348789 [parallel_inline_pass]: 0.00010483 [Cycle 2]: 0.00563625, [11] [expand_dump_flag]: 2.50014e-06 [switch_simplify]: 9.56701e-05 [loop_unroll]: 0.00014282 [a_1]: 0.00319595 [recompute_prepare]: 0.00010629 [updatestate_depend_eliminate]: 0.00021782 [updatestate_assign_eliminate]: 6.59698e-05 [updatestate_loads_eliminate]: 6.76704e-05 [parameter_eliminate]: 6.04987e-06 [a_2]: 0.00153641 [parallel_inline_pass]: 0.00010243 [parallel-infer-symbol]: 0.00021187 [pre_auto_parallel]: 0.00010109 [insert-virtual-dataset]: 0.00136023 [parallel-infer-symbol-second]: 2.40002e-06 [dataset_repeat_opt]: 0.00011848 [pipeline_split]: 0.00010826 [optimize]: 0.605325, [52] [py_interpret_to_execute]: 0.0001552 [rewriter_before_opt_a]: 0.0002843 [opt_a]: 0.590727, [3] [Cycle 1]: 0.509484, [46] [expand_dump_flag]: 2.85963e-06 [switch_simplify]: 0.00017362 [loop_unroll]: 0.00010123 [a_1]: 0.00341266 [recompute_prepare]: 0.00010863 [updatestate_depend_eliminate]: 0.00010678 [updatestate_assign_eliminate]: 6.659e-05 [updatestate_loads_eliminate]: 7.56802e-05 [parameter_eliminate]: 5.60004e-06 [a_2]: 0.00165945 [accelerated_algorithm]: 0.00035403 [shard]: 2.67988e-06 [meta_shard_fg_expand]: 4.76399e-05 [shard_inline]: 0.00011498 [auto_parallel]: 8.94801e-05 [parallel]: 0.0157007 [flash_sp]: 7.41496e-05 [merge_comm]: 0.00013175 [allreduce_fusion]: 7.51801e-05 [matmul_add_comm_reduction]: 0.00010291 [allreduce_slice_to_reducescatter]: 7.5018e-07 [virtual_shard_identity]: 0.00014361 [virtual_dataset]: 0.00019086 [get_grad_eliminate_]: 0.00011968 [virtual_output]: 0.00011559 [merge_forward]: 7.65501e-05 [cell_reuse_recompute_pass]: 4.99981e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021511 [before_grad]: 0.00020124 [inplace_validation]: 0.00012635 [parallel_renormalize]: 0.0230304 [update_top_fg]: 1.57021e-06 [cast_eliminate]: 0.00015941 [meta_fg_expand]: 0.266554 [inplace_validation_after_expand]: 0.00154739 [flash_sp_send_recv_attached]: 0.00123068 [receive_attached]: 9.07099e-05 [after_resolve]: 0.00199868 [a_after_grad]: 0.00392502 [special_op_eliminate]: 0.00187812 [renormalize]: 0.153253 [add_forward_monad_depend]: 0.00036653 [auto_monad_grad]: 0.00021305 [auto_monad_eliminator]: 0.00183582 [cse]: 0.00428878 [a_3]: 0.0248373 [Cycle 2]: 0.0693014, [46] [expand_dump_flag]: 5.143e-05 [switch_simplify]: 0.00186073 [loop_unroll]: 0.00157613 [a_1]: 0.0311044 [recompute_prepare]: 0.00017424 [updatestate_depend_eliminate]: 0.0002269 [updatestate_assign_eliminate]: 0.00010409 [updatestate_loads_eliminate]: 0.00016577 [parameter_eliminate]: 4.17978e-06 [a_2]: 0.0043689 [accelerated_algorithm]: 0.00016594 [shard]: 2.39024e-06 [meta_shard_fg_expand]: 7.06101e-05 [shard_inline]: 0.0001437 [auto_parallel]: 0.00012273 [parallel]: 1.44504e-05 [flash_sp]: 0.00012178 [merge_comm]: 0.00011698 [allreduce_fusion]: 9.342e-05 [matmul_add_comm_reduction]: 0.00011057 [allreduce_slice_to_reducescatter]: 6.79865e-07 [virtual_shard_identity]: 0.00014636 [virtual_dataset]: 0.00014134 [get_grad_eliminate_]: 0.0001366 [virtual_output]: 0.00013954 [merge_forward]: 9.09897e-05 [cell_reuse_recompute_pass]: 2.50991e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024942 [before_grad]: 0.00024195 [inplace_validation]: 8.55601e-05 [parallel_renormalize]: 1.00117e-07 [update_top_fg]: 9.89996e-07 [cast_eliminate]: 0.0001587 [meta_fg_expand]: 0.00033381 [inplace_validation_after_expand]: 0.00018516 [flash_sp_send_recv_attached]: 2.12993e-06 [receive_attached]: 1.70991e-06 [after_resolve]: 0.00016331 [a_after_grad]: 0.00023514 [special_op_eliminate]: 0.00014097 [renormalize]: 0.017978 [add_forward_monad_depend]: 5.26011e-06 [auto_monad_grad]: 2.35019e-06 [auto_monad_eliminator]: 0.00029171 [cse]: 0.00658891 [a_3]: 0.00096178 [Cycle 3]: 0.0119167, [46] [expand_dump_flag]: 2.23005e-06 [switch_simplify]: 0.00013822 [loop_unroll]: 0.00013361 [a_1]: 0.0043135 [recompute_prepare]: 0.00013658 [updatestate_depend_eliminate]: 0.00014815 [updatestate_assign_eliminate]: 9.597e-05 [updatestate_loads_eliminate]: 9.19797e-05 [parameter_eliminate]: 3.10037e-06 [a_2]: 0.00211214 [accelerated_algorithm]: 0.00016131 [shard]: 1.67964e-06 [meta_shard_fg_expand]: 5.11399e-05 [shard_inline]: 0.00013688 [auto_parallel]: 0.00011183 [parallel]: 1.01002e-05 [flash_sp]: 2.12993e-06 [merge_comm]: 0.00010645 [allreduce_fusion]: 9.54801e-05 [matmul_add_comm_reduction]: 0.00011808 [allreduce_slice_to_reducescatter]: 4.70318e-07 [virtual_shard_identity]: 0.00014379 [virtual_dataset]: 0.00013748 [get_grad_eliminate_]: 0.0001332 [virtual_output]: 0.00013398 [merge_forward]: 9.33902e-05 [cell_reuse_recompute_pass]: 2.73017e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024439 [before_grad]: 0.00024522 [inplace_validation]: 9.43197e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 3.89758e-07 [cast_eliminate]: 0.00015348 [meta_fg_expand]: 0.00011447 [inplace_validation_after_expand]: 0.00011594 [flash_sp_send_recv_attached]: 1.84961e-06 [receive_attached]: 1.81003e-06 [after_resolve]: 0.0001499 [a_after_grad]: 0.00022405 [special_op_eliminate]: 0.00013656 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 2.44984e-06 [auto_monad_grad]: 1.47987e-06 [auto_monad_eliminator]: 0.00017095 [cse]: 0.00040835 [a_3]: 0.00093961 [py_interpret_to_execute_after_opt_a]: 0.00013998 [slice_cell_reuse_recomputed_activation]: 2.31992e-06 [rewriter_after_opt_a]: 0.00096278 [convert_after_rewriter]: 0.00011647 [order_py_execute_after_rewriter]: 9.58298e-05 [opt_b]: 0.0040009, [1] [Cycle 1]: 0.00399214, [7] [b_1]: 0.00306797 [b_2]: 0.00014059 [updatestate_depend_eliminate]: 9.61497e-05 [updatestate_assign_eliminate]: 8.765e-05 [updatestate_loads_eliminate]: 8.95602e-05 [renormalize]: 4.99655e-07 [cse]: 0.00045447 [optimize_parallel_all_gather_comm]: 0.00014444 [overlap_param_gather]: 1.12969e-06 [cconv]: 7.15498e-05 [loop_unroll]: 0.00092378 [opt_after_cconv]: 0.00156875, [1] [Cycle 1]: 0.00156135, [7] [c_1]: 0.00078929 [parameter_eliminate]: 2.90014e-06 [updatestate_depend_eliminate]: 0.00012834 [updatestate_assign_eliminate]: 9.33302e-05 [updatestate_loads_eliminate]: 9.26699e-05 [cse]: 0.00039747 [renormalize]: 4.4005e-07 [remove_dup_value]: 0.00059674 [tuple_transform]: 0.00101045, [1] [Cycle 1]: 0.00100256, [2] [d_1]: 0.00098191 [renormalize]: 4.00003e-07 [partial_unused_args_eliminate]: 3.43006e-06 [add_cache_embedding]: 0.00015175 [add_recomputation]: 0.0007076 [cse_after_recomputation]: 0.00031104, [1] [Cycle 1]: 0.00030282, [1] [cse]: 0.00028853 [environ_conv]: 8.79499e-05 [swap_dp_allreduce_reducescatter]: 0.00012839 [bias_add_comm_swap]: 3.2098e-06 [label_micro_interleaved_index]: 2.08011e-06 [label_fine_grained_interleaved_index]: 0.00051888 [merge_cast_opt]: 1.79e-06 [slice_recompute_activation]: 0.00014933 [micro_interleaved_order_control]: 1.98977e-06 [assign_add_opt]: 0.00039571 [ForceFp32Comm]: 1.28988e-06 [remove_cast_before_assign_add]: 0.00010446 [full_micro_interleaved_order_control]: 2.33995e-06 [reorder_send_recv_between_fp_bp]: 1.81003e-06 [comm_op_add_attrs]: 0.00014651 [add_comm_op_reuse_tag]: 0.00015119 [interleave_split_concat_branches]: 1.37975e-06 [interleave_parallel_branches]: 8.49832e-07 [overlap_opt_shard_in_pipeline]: 1.09198e-05 [overlap_opt_shard_grad_in_pipeline]: 3.57023e-06 [control_data_broadcast_order]: 1.13994e-06 [grouped_pairwise_exchange_alltoall]: 1.05402e-05 [offloading_packed_experts]: 2.59001e-06 [overlap_recompute_and_grad_model_parallel]: 2.34041e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.09785e-07 [overlap_recompute_allgather_and_fa_grad]: 9.18801e-05 [overlap_grad_ring_attention]: 0.00014779 [overlap_grad_flash_sp]: 0.00011963 [begin_end_overlap_inline]: 9.79751e-07 [split_matmul_comm_elemetwise]: 2.04006e-06 [split_layernorm_comm]: 2.10013e-06 [handle_group_info]: 6.56024e-06 [symbol_engine_optimizer]: 0.0008354, [1] [Cycle 1]: 0.00082892, [6] [build]: 6.33001e-05 [elim_shapecalc]: 0.00015216 [elim_not_effective]: 0.00022453 [opt_reshape]: 0.00013399 [fold_const_symbol]: 0.00021457 [renormalize]: 4.80097e-07 [pipeline_parallel_scheduler]: 4.0899e-06 [auto_monad_reorder]: 0.00031892 [get_jit_bprop_graph]: 7.10133e-07 [rewriter_after_jit_bprop_graph]: 6.10016e-07 [eliminate_special_op_node]: 0.00136671 [distribtued_split]: 0.00036923 [validate]: 0.00028286 [task_emit]: 12.9405 [execute]: 1.20201e-05 Sums bootstrap : 0.001330s : 0.01% type_inference : 0.776054s : 5.40% auto_monad : 0.001923s : 0.01% graph_reusing : 0.000027s : 0.00% inline.rewriter_before_opt_a : 0.001470s : 0.01% inline.a1a2.expand_dump_flag : 0.000038s : 0.00% inline.a1a2.switch_simplify : 0.001290s : 0.01% inline.a1a2.loop_unroll : 0.000830s : 0.01% inline.a1a2.a_1 : 0.025537s : 0.18% inline.a1a2.recompute_prepare : 0.000276s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000595s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000158s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000340s : 0.00% inline.a1a2.parameter_eliminate : 0.000015s : 0.00% inline.a1a2.a_2 : 0.005024s : 0.03% inline.a1a2.parallel_inline_pass : 0.000207s : 0.00% parallel-infer-symbol : 0.000212s : 0.00% pre_auto_parallel : 0.000101s : 0.00% insert-virtual-dataset : 0.001360s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000118s : 0.00% pipeline_split : 0.000108s : 0.00% optimize.py_interpret_to_execute : 0.000155s : 0.00% optimize.rewriter_before_opt_a : 0.000284s : 0.00% optimize.opt_a.expand_dump_flag : 0.000057s : 0.00% optimize.opt_a.switch_simplify : 0.002173s : 0.02% optimize.opt_a.loop_unroll : 0.001811s : 0.01% optimize.opt_a.a_1 : 0.038831s : 0.27% optimize.opt_a.recompute_prepare : 0.000419s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000482s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000267s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000333s : 0.00% optimize.opt_a.parameter_eliminate : 0.000013s : 0.00% optimize.opt_a.a_2 : 0.008140s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000681s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000169s : 0.00% optimize.opt_a.shard_inline : 0.000396s : 0.00% optimize.opt_a.auto_parallel : 0.000324s : 0.00% optimize.opt_a.parallel : 0.015725s : 0.11% optimize.opt_a.flash_sp : 0.000198s : 0.00% optimize.opt_a.merge_comm : 0.000355s : 0.00% optimize.opt_a.allreduce_fusion : 0.000264s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000332s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000434s : 0.00% optimize.opt_a.virtual_dataset : 0.000470s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000389s : 0.00% optimize.opt_a.virtual_output : 0.000389s : 0.00% optimize.opt_a.merge_forward : 0.000261s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000010s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000709s : 0.00% optimize.opt_a.before_grad : 0.000688s : 0.00% optimize.opt_a.inplace_validation : 0.000306s : 0.00% optimize.opt_a.parallel_renormalize : 0.023031s : 0.16% optimize.opt_a.update_top_fg : 0.000003s : 0.00% optimize.opt_a.cast_eliminate : 0.000472s : 0.00% optimize.opt_a.meta_fg_expand : 0.267003s : 1.86% optimize.opt_a.inplace_validation_after_expand : 0.001848s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001235s : 0.01% optimize.opt_a.receive_attached : 0.000094s : 0.00% optimize.opt_a.after_resolve : 0.002312s : 0.02% optimize.opt_a.a_after_grad : 0.004384s : 0.03% optimize.opt_a.special_op_eliminate : 0.002156s : 0.02% optimize.opt_a.renormalize : 0.171231s : 1.19% optimize.opt_a.add_forward_monad_depend : 0.000374s : 0.00% optimize.opt_a.auto_monad_grad : 0.000217s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002298s : 0.02% optimize.opt_a.cse : 0.011286s : 0.08% optimize.opt_a.a_3 : 0.026739s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000140s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000963s : 0.01% optimize.convert_after_rewriter : 0.000116s : 0.00% optimize.order_py_execute_after_rewriter : 0.000096s : 0.00% optimize.opt_b.b_1 : 0.003068s : 0.02% optimize.opt_b.b_2 : 0.000141s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000096s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000088s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000454s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000144s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000072s : 0.00% optimize.loop_unroll : 0.000924s : 0.01% optimize.opt_after_cconv.c_1 : 0.000789s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000128s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000093s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000093s : 0.00% optimize.opt_after_cconv.cse : 0.000397s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000597s : 0.00% optimize.tuple_transform.d_1 : 0.000982s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000152s : 0.00% optimize.add_recomputation : 0.000708s : 0.00% optimize.cse_after_recomputation.cse : 0.000289s : 0.00% optimize.environ_conv : 0.000088s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000128s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000519s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000149s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000396s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000104s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000147s : 0.00% optimize.add_comm_op_reuse_tag : 0.000151s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000011s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000092s : 0.00% optimize.overlap_grad_ring_attention : 0.000148s : 0.00% optimize.overlap_grad_flash_sp : 0.000120s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000063s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000152s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000225s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000134s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000215s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000319s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.001367s : 0.01% distribtued_split : 0.000369s : 0.00% validate : 0.000283s : 0.00% task_emit : 12.940509s : 90.10% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.049279 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000022s : 9: substitution.addn_check_dump 0.11% : 0.000053s : 7: substitution.addn_zero_filter 0.03% : 0.000016s : 7: substitution.adjust_all_reduce_mul_add 0.71% : 0.000351s : 71: substitution.arithmetic_simplify 0.11% : 0.000055s : 10: substitution.cast_eliminate 0.12% : 0.000057s : 47: substitution.depend_value_elim 0.06% : 0.000031s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.08% : 0.000040s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000020s : 12: substitution.environ_get_depend_swap 0.07% : 0.000033s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000019s : 23: substitution.float_depend_g_call 0.02% : 0.000012s : 12: substitution.float_environ_get_switch 0.03% : 0.000016s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 63.81% : 0.031447s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000085s : 126: substitution.graph_param_transform 0.02% : 0.000010s : 8: substitution.incorporate_call 0.01% : 0.000006s : 8: substitution.incorporate_call_switch 23.92% : 0.011787s : 331: substitution.inline 1.42% : 0.000701s : 112: substitution.inline_without_move 0.26% : 0.000126s : 309: substitution.j_node_and_user_rematch 0.44% : 0.000215s : 40: substitution.less_batch_normalization 0.09% : 0.000046s : 90: substitution.load_eliminater 0.11% : 0.000053s : 10: substitution.merge_addn 0.24% : 0.000119s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.03% : 0.000016s : 1: substitution.partial_defer_inline 0.13% : 0.000064s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.08% : 0.000038s : 15: substitution.reduce_eliminate 0.32% : 0.000157s : 309: substitution.remove_not_recompute_node 2.02% : 0.000996s : 508: substitution.replace_applicator 0.23% : 0.000113s : 251: substitution.replace_old_param 0.08% : 0.000038s : 11: substitution.reshape_eliminate 0.04% : 0.000018s : 6: substitution.set_cell_output_no_recompute 0.03% : 0.000013s : 4: substitution.specialize_transform 0.04% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000085s : 34: substitution.switch_simplify 0.06% : 0.000029s : 11: substitution.tile_eliminate 0.69% : 0.000342s : 101: substitution.tuple_list_convert_item_index_to_positive 0.28% : 0.000137s : 107: substitution.tuple_list_get_item_const_eliminator 0.45% : 0.000220s : 107: substitution.tuple_list_get_item_depend_reorder 1.59% : 0.000782s : 308: substitution.tuple_list_get_item_eliminator 0.39% : 0.000190s : 107: substitution.tuple_list_get_set_item_eliminator 0.43% : 0.000210s : 210: substitution.updatestate_pure_node_eliminater 0.75% : 0.000370s : 265: substitution.updatestate_useless_node_eliminater 0.03% : 0.000017s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.775580 2 96.62% : 0.749360s : 1: type_inference.infer 3.38% : 0.026220s : 1: type_inference.specialize ------[replace.] 0.010063 775 0.42% : 0.000042s : 5: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.60% : 0.000060s : 7: replace.depend_value_elim 0.38% : 0.000039s : 3: replace.environ_get_set_eliminate 31.70% : 0.003190s : 183: replace.getattr_setattr_resolve 29.42% : 0.002960s : 310: replace.inline 0.23% : 0.000023s : 1: replace.merge_addn 1.16% : 0.000117s : 7: replace.partial_eliminate 3.81% : 0.000384s : 25: replace.replace_applicator 4.63% : 0.000466s : 34: replace.switch_simplify 0.51% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 26.65% : 0.002682s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.28% : 0.000028s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041620 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.04% : 0.000015s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 70.39% : 0.029295s : 183: match.getattr_setattr_resolve 27.81% : 0.011573s : 310: match.inline 0.06% : 0.000025s : 1: match.merge_addn 0.11% : 0.000044s : 7: match.partial_eliminate 0.23% : 0.000098s : 25: match.replace_applicator 0.16% : 0.000066s : 34: match.switch_simplify 0.08% : 0.000033s : 6: match.tuple_list_get_item_depend_reorder 0.99% : 0.000410s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.04% : 0.000015s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020962131318 0.76% : 0.000158s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000056s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000115s : 835: predicate.addn_check_dump 0.79% : 0.000165s : 1198: predicate.addn_zero_filter 0.74% : 0.000155s : 1198: predicate.adjust_all_reduce_mul_add 1.82% : 0.000382s : 2034: predicate.arithmetic_simplify 1.12% : 0.000235s : 1586: predicate.cast_eliminate 3.18% : 0.000667s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.16% : 0.000244s : 1399: predicate.convert_tensor_eliminate 0.56% : 0.000117s : 838: predicate.depend_value_elim 0.86% : 0.000180s : 1202: predicate.dict_get_item_const_eliminator 0.88% : 0.000184s : 1202: predicate.dict_get_item_eliminator 0.84% : 0.000175s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000010s : 126: predicate.elim_not_effective 0.11% : 0.000023s : 126: predicate.elim_shapecalc_of_broadcastargs 0.83% : 0.000174s : 1334: predicate.environ_add_const_eliminate 0.84% : 0.000176s : 1337: predicate.environ_get_add_eliminate 0.83% : 0.000173s : 1334: predicate.environ_get_depend_swap 1.43% : 0.000299s : 2172: predicate.environ_get_eliminate 1.08% : 0.000226s : 1337: predicate.environ_get_set_eliminate 1.12% : 0.000235s : 1717: predicate.exchange_switch_depend_value 1.40% : 0.000293s : 1717: predicate.float_depend_g_call 0.55% : 0.000115s : 835: predicate.float_environ_get_switch 0.64% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000059s : 395: predicate.get_grad_eliminate 2.38% : 0.000499s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000115s : 835: predicate.incorporate_call 0.53% : 0.000112s : 835: predicate.incorporate_call_switch 3.88% : 0.000814s : 4602: predicate.inline 2.24% : 0.000469s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.33% : 0.000069s : 388: predicate.less_batch_normalization 1.23% : 0.000257s : 1660: predicate.list_to_tuple_eliminator_ 1.85% : 0.000388s : 2874: predicate.load_eliminater 0.19% : 0.000040s : 135: predicate.loop_unroll_after_grad 2.39% : 0.000501s : 2640: predicate.loop_unroll_before_grad 0.95% : 0.000199s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000118s : 837: predicate.merge_addn 3.07% : 0.000644s : 3380: predicate.micro_step_allgather_replace 3.08% : 0.000646s : 3380: predicate.mini_step_allgather_replace 0.78% : 0.000164s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000020s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.00% : 0.000420s : 1717: predicate.partial_defer_inline 1.10% : 0.000230s : 1541: predicate.partial_eliminate 0.81% : 0.000170s : 1198: predicate.print_const_string_wrapper 0.55% : 0.000115s : 824: predicate.reduce_all_const_elim 0.94% : 0.000198s : 1199: predicate.reduce_eliminate 0.14% : 0.000028s : 395: predicate.remove_not_recompute_node 1.93% : 0.000404s : 4829: predicate.replace_applicator 0.81% : 0.000169s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.76% : 0.000160s : 1199: predicate.reshape_eliminate 3.29% : 0.000689s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.28% : 0.000688s : 3484: predicate.same_eliminate 0.24% : 0.000050s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000061s : 395: predicate.shard_identity_eliminate 2.06% : 0.000432s : 2338: predicate.special_op_eliminate 0.64% : 0.000134s : 837: predicate.specialize_transform 3.44% : 0.000722s : 3380: predicate.split_environ_get_set_with_tuple_value 1.59% : 0.000334s : 2203: predicate.stack_unstack_eliminate 1.80% : 0.000378s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.42% : 0.000297s : 1717: predicate.switch_defer_inline 4.54% : 0.000952s : 5201: predicate.switch_layer_defer_inline 4.59% : 0.000963s : 5262: predicate.switch_simplify 0.80% : 0.000168s : 1199: predicate.tile_eliminate 0.77% : 0.000162s : 1199: predicate.transpose_eliminate 1.06% : 0.000223s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.10% : 0.000230s : 1469: predicate.tuple_list_get_item_const_eliminator 0.96% : 0.000201s : 1469: predicate.tuple_list_get_item_depend_reorder 1.89% : 0.000397s : 2495: predicate.tuple_list_get_item_eliminator 1.02% : 0.000213s : 1469: predicate.tuple_list_get_set_item_eliminator 1.66% : 0.000349s : 2304: predicate.tuple_list_set_item_eliminator 1.09% : 0.000228s : 1660: predicate.tuple_to_list_eliminator_ 1.86% : 0.000391s : 2874: predicate.updatestate_pure_node_eliminater 2.50% : 0.000524s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.28% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000058s : 395: predicate.virtual_output_eliminate 0.10% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.063394 747 70.06% : 0.044414s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.38% : 0.001510s : 22: func_graph_cloner_run.FuncGraphClonerNode 27.56% : 0.017471s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.386998 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.28% : 0.042539s : 1: a1a2 0.00% : 0.000159s : 1: add_cache_embedding 0.00% : 0.000158s : 1: add_comm_op_reuse_tag 0.00% : 0.000721s : 1: add_recomputation 0.00% : 0.000406s : 1: assign_add_opt 0.01% : 0.001947s : 1: auto_monad 0.00% : 0.000333s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001383s : 1: bootstrap 0.00% : 0.000078s : 1: cconv 0.00% : 0.000153s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000125s : 1: convert_after_rewriter 0.00% : 0.000316s : 1: cse_after_recomputation 0.00% : 0.000128s : 1: dataset_repeat_opt 0.00% : 0.000383s : 1: distribtued_split 0.01% : 0.001382s : 1: eliminate_special_op_node 0.00% : 0.000097s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000037s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000009s : 1: handle_group_info 0.29% : 0.044065s : 1: inline 0.01% : 0.001385s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000529s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000935s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.033080s : 61: opt.transform.a1a2 0.00% : 0.000176s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.091333s : 148: opt.transform.opt_a 0.01% : 0.000787s : 1: opt.transform.opt_after_cconv 0.02% : 0.003180s : 27: opt.transform.opt_b 0.24% : 0.037612s : 16: opt.transform.opt_resolve 0.01% : 0.000979s : 1: opt.transform.opt_trans_graph 0.01% : 0.000836s : 6: opt.transform.special_op_eliminate 0.00% : 0.000719s : 4: opt.transform.symbol_engine_opt 3.84% : 0.590733s : 1: opt_a 0.01% : 0.001575s : 1: opt_after_cconv 0.03% : 0.004006s : 1: opt_b 3.93% : 0.605337s : 1: optimize 0.00% : 0.000152s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000102s : 1: order_py_execute_after_rewriter 0.00% : 0.000124s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000153s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000015s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000097s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000225s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000119s : 1: pipeline_split 0.00% : 0.000110s : 1: pre_auto_parallel 0.00% : 0.000163s : 1: py_interpret_to_execute 0.00% : 0.000147s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000110s : 1: remove_cast_before_assign_add 0.00% : 0.000610s : 1: remove_dup_value 0.91% : 0.139901s : 3: renormalize.infer 0.35% : 0.054307s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000973s : 1: rewriter_after_opt_a 0.01% : 0.001776s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000155s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000135s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000839s : 1: symbol_engine_optimizer 84.10% : 12.940554s : 1: task_emit 0.01% : 0.001015s : 1: tuple_transform 5.04% : 0.776096s : 1: type_inference 0.01% : 0.001330s : 1: validate TotalTime = 14.4235, [21] [bootstrap]: 0.00147318 [type_inference]: 0.758934 [auto_monad]: 0.00200424 [graph_reusing]: 2.45799e-05 [inline]: 0.0453529, [2] [rewriter_before_opt_a]: 0.00151233 [a1a2]: 0.0437621, [2] [Cycle 1]: 0.0300754, [11] [expand_dump_flag]: 5.21801e-05 [switch_simplify]: 0.00132787 [loop_unroll]: 0.00066461 [a_1]: 0.023427 [recompute_prepare]: 0.00018152 [updatestate_depend_eliminate]: 0.0003937 [updatestate_assign_eliminate]: 0.00011596 [updatestate_loads_eliminate]: 0.00022533 [parameter_eliminate]: 1.034e-05 [a_2]: 0.00331099 [parallel_inline_pass]: 0.00010991 [Cycle 2]: 0.00560047, [11] [expand_dump_flag]: 3.40026e-06 [switch_simplify]: 0.00010439 [loop_unroll]: 0.00010243 [a_1]: 0.00336169 [recompute_prepare]: 0.00010197 [updatestate_depend_eliminate]: 7.66902e-05 [updatestate_assign_eliminate]: 6.08503e-05 [updatestate_loads_eliminate]: 6.74301e-05 [parameter_eliminate]: 4.59002e-06 [a_2]: 0.00152525 [parallel_inline_pass]: 0.00010343 [parallel-infer-symbol]: 0.00018528 [pre_auto_parallel]: 9.62801e-05 [insert-virtual-dataset]: 0.00150899 [parallel-infer-symbol-second]: 2.78e-06 [dataset_repeat_opt]: 0.00012485 [pipeline_split]: 0.00012055 [optimize]: 0.609147, [52] [py_interpret_to_execute]: 0.00016457 [rewriter_before_opt_a]: 0.00028585 [opt_a]: 0.594164, [3] [Cycle 1]: 0.509878, [46] [expand_dump_flag]: 2.6701e-06 [switch_simplify]: 0.00011312 [loop_unroll]: 0.00010091 [a_1]: 0.00337713 [recompute_prepare]: 0.00010533 [updatestate_depend_eliminate]: 0.00010503 [updatestate_assign_eliminate]: 6.31902e-05 [updatestate_loads_eliminate]: 6.98101e-05 [parameter_eliminate]: 5.74999e-06 [a_2]: 0.00157506 [accelerated_algorithm]: 0.00027672 [shard]: 2.72039e-06 [meta_shard_fg_expand]: 4.772e-05 [shard_inline]: 0.00011039 [auto_parallel]: 8.706e-05 [parallel]: 0.0170667 [flash_sp]: 7.38897e-05 [merge_comm]: 0.0001301 [allreduce_fusion]: 7.35801e-05 [matmul_add_comm_reduction]: 0.00010046 [allreduce_slice_to_reducescatter]: 5.99772e-07 [virtual_shard_identity]: 0.00013815 [virtual_dataset]: 0.00017969 [get_grad_eliminate_]: 0.00011781 [virtual_output]: 0.00011675 [merge_forward]: 7.69901e-05 [cell_reuse_recompute_pass]: 4.80982e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021402 [before_grad]: 0.00020788 [inplace_validation]: 0.00013683 [parallel_renormalize]: 0.0238034 [update_top_fg]: 1.72993e-06 [cast_eliminate]: 0.00015002 [meta_fg_expand]: 0.266991 [inplace_validation_after_expand]: 0.00153001 [flash_sp_send_recv_attached]: 0.00121595 [receive_attached]: 9.96101e-05 [after_resolve]: 0.00199934 [a_after_grad]: 0.00398008 [special_op_eliminate]: 0.00184916 [renormalize]: 0.152302 [add_forward_monad_depend]: 0.00037097 [auto_monad_grad]: 0.00022128 [auto_monad_eliminator]: 0.00179351 [cse]: 0.00417192 [a_3]: 0.0240919 [Cycle 2]: 0.0720747, [46] [expand_dump_flag]: 5.14002e-05 [switch_simplify]: 0.0018695 [loop_unroll]: 0.00149392 [a_1]: 0.0324137 [recompute_prepare]: 0.00018219 [updatestate_depend_eliminate]: 0.00023301 [updatestate_assign_eliminate]: 0.0001047 [updatestate_loads_eliminate]: 0.00017641 [parameter_eliminate]: 4.73997e-06 [a_2]: 0.00439834 [accelerated_algorithm]: 0.00016653 [shard]: 2.6403e-06 [meta_shard_fg_expand]: 8.965e-05 [shard_inline]: 0.00014133 [auto_parallel]: 0.00011571 [parallel]: 1.375e-05 [flash_sp]: 0.00012772 [merge_comm]: 0.00011298 [allreduce_fusion]: 9.51602e-05 [matmul_add_comm_reduction]: 0.00011588 [allreduce_slice_to_reducescatter]: 7.59959e-07 [virtual_shard_identity]: 0.00014503 [virtual_dataset]: 0.00013835 [get_grad_eliminate_]: 0.00013526 [virtual_output]: 0.00013606 [merge_forward]: 9.17898e-05 [cell_reuse_recompute_pass]: 2.42004e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024732 [before_grad]: 0.00024219 [inplace_validation]: 8.51201e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 1.11992e-06 [cast_eliminate]: 0.0001562 [meta_fg_expand]: 0.00028514 [inplace_validation_after_expand]: 0.00017761 [flash_sp_send_recv_attached]: 2.19001e-06 [receive_attached]: 1.74996e-06 [after_resolve]: 0.00016121 [a_after_grad]: 0.00027955 [special_op_eliminate]: 0.00013959 [renormalize]: 0.018701 [add_forward_monad_depend]: 6.0997e-06 [auto_monad_grad]: 3.07988e-06 [auto_monad_eliminator]: 0.0002938 [cse]: 0.00726321 [a_3]: 0.00099574 [Cycle 3]: 0.0121821, [46] [expand_dump_flag]: 2.62028e-06 [switch_simplify]: 0.00013613 [loop_unroll]: 0.00013559 [a_1]: 0.00440059 [recompute_prepare]: 0.00014029 [updatestate_depend_eliminate]: 0.00016168 [updatestate_assign_eliminate]: 9.77102e-05 [updatestate_loads_eliminate]: 9.555e-05 [parameter_eliminate]: 5.51017e-06 [a_2]: 0.00210082 [accelerated_algorithm]: 0.00016919 [shard]: 2.2999e-06 [meta_shard_fg_expand]: 5.846e-05 [shard_inline]: 0.00014178 [auto_parallel]: 0.00012052 [parallel]: 1.42301e-05 [flash_sp]: 3.2899e-06 [merge_comm]: 0.00011087 [allreduce_fusion]: 9.79602e-05 [matmul_add_comm_reduction]: 0.00012411 [allreduce_slice_to_reducescatter]: 7.59959e-07 [virtual_shard_identity]: 0.0001978 [virtual_dataset]: 0.00014005 [get_grad_eliminate_]: 0.00013127 [virtual_output]: 0.00013271 [merge_forward]: 0.00010023 [cell_reuse_recompute_pass]: 4.10015e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0002479 [before_grad]: 0.00023562 [inplace_validation]: 9.51402e-05 [parallel_renormalize]: 7.96281e-08 [update_top_fg]: 8.40053e-07 [cast_eliminate]: 0.00015107 [meta_fg_expand]: 0.00011153 [inplace_validation_after_expand]: 0.00012452 [flash_sp_send_recv_attached]: 1.9297e-06 [receive_attached]: 1.74996e-06 [after_resolve]: 0.00015096 [a_after_grad]: 0.00022325 [special_op_eliminate]: 0.00013194 [renormalize]: 1.09896e-07 [add_forward_monad_depend]: 3.15998e-06 [auto_monad_grad]: 2.73017e-06 [auto_monad_eliminator]: 0.00018707 [cse]: 0.00041922 [a_3]: 0.00095307 [py_interpret_to_execute_after_opt_a]: 0.00014519 [slice_cell_reuse_recomputed_activation]: 2.65986e-06 [rewriter_after_opt_a]: 0.00103664 [convert_after_rewriter]: 0.00011757 [order_py_execute_after_rewriter]: 8.43802e-05 [opt_b]: 0.00400778, [1] [Cycle 1]: 0.00399877, [7] [b_1]: 0.00310815 [b_2]: 0.00014096 [updatestate_depend_eliminate]: 0.0001023 [updatestate_assign_eliminate]: 8.99197e-05 [updatestate_loads_eliminate]: 9.28701e-05 [renormalize]: 4.69852e-07 [cse]: 0.00040692 [optimize_parallel_all_gather_comm]: 0.00014519 [overlap_param_gather]: 1.40024e-06 [cconv]: 7.11102e-05 [loop_unroll]: 0.00096412 [opt_after_cconv]: 0.00167975, [1] [Cycle 1]: 0.0016725, [7] [c_1]: 0.00081338 [parameter_eliminate]: 3.52971e-06 [updatestate_depend_eliminate]: 0.00013536 [updatestate_assign_eliminate]: 9.73502e-05 [updatestate_loads_eliminate]: 9.47099e-05 [cse]: 0.00046754 [renormalize]: 5.89993e-07 [remove_dup_value]: 0.0006304 [tuple_transform]: 0.00096504, [1] [Cycle 1]: 0.00095788, [2] [d_1]: 0.0009389 [renormalize]: 2.99886e-07 [partial_unused_args_eliminate]: 3.62005e-06 [add_cache_embedding]: 0.00015591 [add_recomputation]: 0.00074196 [cse_after_recomputation]: 0.00031601, [1] [Cycle 1]: 0.0003072, [1] [cse]: 0.00029338 [environ_conv]: 9.355e-05 [swap_dp_allreduce_reducescatter]: 0.00013436 [bias_add_comm_swap]: 2.82004e-06 [label_micro_interleaved_index]: 1.85007e-06 [label_fine_grained_interleaved_index]: 0.00052905 [merge_cast_opt]: 1.62004e-06 [slice_recompute_activation]: 0.00015018 [micro_interleaved_order_control]: 2.2403e-06 [assign_add_opt]: 0.0003981 [ForceFp32Comm]: 1.68011e-06 [remove_cast_before_assign_add]: 0.00010888 [full_micro_interleaved_order_control]: 2.54996e-06 [reorder_send_recv_between_fp_bp]: 1.77976e-06 [comm_op_add_attrs]: 0.00015456 [add_comm_op_reuse_tag]: 0.00015718 [interleave_split_concat_branches]: 1.03982e-06 [interleave_parallel_branches]: 9.09902e-07 [overlap_opt_shard_in_pipeline]: 3.382e-05 [overlap_opt_shard_grad_in_pipeline]: 3.24007e-06 [control_data_broadcast_order]: 1.29035e-06 [grouped_pairwise_exchange_alltoall]: 1.16103e-05 [offloading_packed_experts]: 2.21003e-06 [overlap_recompute_and_grad_model_parallel]: 2.09967e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.09989e-06 [overlap_recompute_allgather_and_fa_grad]: 9.517e-05 [overlap_grad_ring_attention]: 0.00014676 [overlap_grad_flash_sp]: 0.00011989 [begin_end_overlap_inline]: 7.70204e-07 [split_matmul_comm_elemetwise]: 2.08989e-06 [split_layernorm_comm]: 2.08011e-06 [handle_group_info]: 7.09016e-06 [symbol_engine_optimizer]: 0.00082066, [1] [Cycle 1]: 0.00081333, [6] [build]: 5.61504e-05 [elim_shapecalc]: 0.00014657 [elim_not_effective]: 0.00022361 [opt_reshape]: 0.00013378 [fold_const_symbol]: 0.000213 [renormalize]: 3.7998e-07 [pipeline_parallel_scheduler]: 3.76999e-06 [auto_monad_reorder]: 0.00033505 [get_jit_bprop_graph]: 6.00237e-07 [rewriter_after_jit_bprop_graph]: 4.69852e-07 [eliminate_special_op_node]: 0.00151889 [distribtued_split]: 0.00040367 [validate]: 0.00028882 [task_emit]: 13.0005 [execute]: 1.31503e-05 Sums bootstrap : 0.001473s : 0.01% type_inference : 0.758934s : 5.27% auto_monad : 0.002004s : 0.01% graph_reusing : 0.000025s : 0.00% inline.rewriter_before_opt_a : 0.001512s : 0.01% inline.a1a2.expand_dump_flag : 0.000056s : 0.00% inline.a1a2.switch_simplify : 0.001432s : 0.01% inline.a1a2.loop_unroll : 0.000767s : 0.01% inline.a1a2.a_1 : 0.026789s : 0.19% inline.a1a2.recompute_prepare : 0.000283s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000470s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000177s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000293s : 0.00% inline.a1a2.parameter_eliminate : 0.000015s : 0.00% inline.a1a2.a_2 : 0.004836s : 0.03% inline.a1a2.parallel_inline_pass : 0.000213s : 0.00% parallel-infer-symbol : 0.000185s : 0.00% pre_auto_parallel : 0.000096s : 0.00% insert-virtual-dataset : 0.001509s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000125s : 0.00% pipeline_split : 0.000121s : 0.00% optimize.py_interpret_to_execute : 0.000165s : 0.00% optimize.rewriter_before_opt_a : 0.000286s : 0.00% optimize.opt_a.expand_dump_flag : 0.000057s : 0.00% optimize.opt_a.switch_simplify : 0.002119s : 0.01% optimize.opt_a.loop_unroll : 0.001730s : 0.01% optimize.opt_a.a_1 : 0.040191s : 0.28% optimize.opt_a.recompute_prepare : 0.000428s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000500s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000266s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000342s : 0.00% optimize.opt_a.parameter_eliminate : 0.000016s : 0.00% optimize.opt_a.a_2 : 0.008074s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000612s : 0.00% optimize.opt_a.shard : 0.000008s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000196s : 0.00% optimize.opt_a.shard_inline : 0.000394s : 0.00% optimize.opt_a.auto_parallel : 0.000323s : 0.00% optimize.opt_a.parallel : 0.017095s : 0.12% optimize.opt_a.flash_sp : 0.000205s : 0.00% optimize.opt_a.merge_comm : 0.000354s : 0.00% optimize.opt_a.allreduce_fusion : 0.000267s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000340s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000481s : 0.00% optimize.opt_a.virtual_dataset : 0.000458s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000384s : 0.00% optimize.opt_a.virtual_output : 0.000386s : 0.00% optimize.opt_a.merge_forward : 0.000269s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000011s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000709s : 0.00% optimize.opt_a.before_grad : 0.000686s : 0.00% optimize.opt_a.inplace_validation : 0.000317s : 0.00% optimize.opt_a.parallel_renormalize : 0.023804s : 0.17% optimize.opt_a.update_top_fg : 0.000004s : 0.00% optimize.opt_a.cast_eliminate : 0.000457s : 0.00% optimize.opt_a.meta_fg_expand : 0.267387s : 1.86% optimize.opt_a.inplace_validation_after_expand : 0.001832s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001220s : 0.01% optimize.opt_a.receive_attached : 0.000103s : 0.00% optimize.opt_a.after_resolve : 0.002312s : 0.02% optimize.opt_a.a_after_grad : 0.004483s : 0.03% optimize.opt_a.special_op_eliminate : 0.002121s : 0.01% optimize.opt_a.renormalize : 0.171003s : 1.19% optimize.opt_a.add_forward_monad_depend : 0.000380s : 0.00% optimize.opt_a.auto_monad_grad : 0.000227s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002274s : 0.02% optimize.opt_a.cse : 0.011854s : 0.08% optimize.opt_a.a_3 : 0.026041s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000145s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001037s : 0.01% optimize.convert_after_rewriter : 0.000118s : 0.00% optimize.order_py_execute_after_rewriter : 0.000084s : 0.00% optimize.opt_b.b_1 : 0.003108s : 0.02% optimize.opt_b.b_2 : 0.000141s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000102s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000090s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000093s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000407s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000145s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000071s : 0.00% optimize.loop_unroll : 0.000964s : 0.01% optimize.opt_after_cconv.c_1 : 0.000813s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000135s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000097s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000095s : 0.00% optimize.opt_after_cconv.cse : 0.000468s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000630s : 0.00% optimize.tuple_transform.d_1 : 0.000939s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000156s : 0.00% optimize.add_recomputation : 0.000742s : 0.01% optimize.cse_after_recomputation.cse : 0.000293s : 0.00% optimize.environ_conv : 0.000094s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000134s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000529s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000150s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000398s : 0.00% optimize.ForceFp32Comm : 0.000002s : 0.00% optimize.remove_cast_before_assign_add : 0.000109s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000155s : 0.00% optimize.add_comm_op_reuse_tag : 0.000157s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000034s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000012s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000095s : 0.00% optimize.overlap_grad_ring_attention : 0.000147s : 0.00% optimize.overlap_grad_flash_sp : 0.000120s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000056s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000147s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000224s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000134s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000213s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000335s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001519s : 0.01% distribtued_split : 0.000404s : 0.00% validate : 0.000289s : 0.00% task_emit : 13.000541s : 90.21% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.050466 4298 0.04% : 0.000023s : 5: substitution.ad_related_special_op_eliminate 0.05% : 0.000023s : 9: substitution.addn_check_dump 0.11% : 0.000057s : 7: substitution.addn_zero_filter 0.03% : 0.000017s : 7: substitution.adjust_all_reduce_mul_add 0.72% : 0.000363s : 71: substitution.arithmetic_simplify 0.12% : 0.000059s : 10: substitution.cast_eliminate 0.12% : 0.000062s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.09% : 0.000047s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000021s : 12: substitution.environ_get_depend_swap 0.07% : 0.000035s : 27: substitution.environ_get_eliminate 0.08% : 0.000038s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000022s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.03% : 0.000014s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 62.38% : 0.031479s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000086s : 126: substitution.graph_param_transform 0.02% : 0.000011s : 8: substitution.incorporate_call 0.01% : 0.000006s : 8: substitution.incorporate_call_switch 25.66% : 0.012948s : 331: substitution.inline 1.36% : 0.000686s : 112: substitution.inline_without_move 0.25% : 0.000125s : 309: substitution.j_node_and_user_rematch 0.29% : 0.000146s : 40: substitution.less_batch_normalization 0.10% : 0.000051s : 90: substitution.load_eliminater 0.11% : 0.000055s : 10: substitution.merge_addn 0.23% : 0.000115s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.10% : 0.000049s : 1: substitution.partial_defer_inline 0.13% : 0.000068s : 23: substitution.partial_eliminate 0.03% : 0.000017s : 26: substitution.reduce_all_const_elim 0.08% : 0.000038s : 15: substitution.reduce_eliminate 0.31% : 0.000157s : 309: substitution.remove_not_recompute_node 1.99% : 0.001004s : 508: substitution.replace_applicator 0.22% : 0.000110s : 251: substitution.replace_old_param 0.09% : 0.000044s : 11: substitution.reshape_eliminate 0.03% : 0.000016s : 6: substitution.set_cell_output_no_recompute 0.03% : 0.000014s : 4: substitution.specialize_transform 0.03% : 0.000018s : 12: substitution.split_environ_get_set_with_tuple_value 0.18% : 0.000090s : 34: substitution.switch_simplify 0.06% : 0.000030s : 11: substitution.tile_eliminate 0.53% : 0.000266s : 101: substitution.tuple_list_convert_item_index_to_positive 0.28% : 0.000140s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000219s : 107: substitution.tuple_list_get_item_depend_reorder 1.67% : 0.000844s : 308: substitution.tuple_list_get_item_eliminator 0.37% : 0.000186s : 107: substitution.tuple_list_get_set_item_eliminator 0.45% : 0.000225s : 210: substitution.updatestate_pure_node_eliminater 0.70% : 0.000352s : 265: substitution.updatestate_useless_node_eliminater 0.03% : 0.000015s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.758460 2 96.37% : 0.730907s : 1: type_inference.infer 3.63% : 0.027553s : 1: type_inference.specialize ------[replace.] 0.010747 775 0.41% : 0.000044s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000008s : 1: replace.arithmetic_simplify 0.53% : 0.000057s : 7: replace.depend_value_elim 0.43% : 0.000046s : 3: replace.environ_get_set_eliminate 29.14% : 0.003131s : 183: replace.getattr_setattr_resolve 30.14% : 0.003239s : 310: replace.inline 0.22% : 0.000023s : 1: replace.merge_addn 1.24% : 0.000133s : 7: replace.partial_eliminate 3.83% : 0.000412s : 25: replace.replace_applicator 4.76% : 0.000511s : 34: replace.switch_simplify 0.53% : 0.000057s : 6: replace.tuple_list_get_item_depend_reorder 28.31% : 0.003043s : 191: replace.tuple_list_get_item_eliminator 0.15% : 0.000017s : 1: replace.updatestate_useless_node_eliminater 0.25% : 0.000026s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042863 775 0.04% : 0.000019s : 5: match.ad_related_special_op_eliminate 0.04% : 0.000016s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000018s : 3: match.environ_get_set_eliminate 68.44% : 0.029336s : 183: match.getattr_setattr_resolve 29.66% : 0.012713s : 310: match.inline 0.06% : 0.000026s : 1: match.merge_addn 0.11% : 0.000047s : 7: match.partial_eliminate 0.25% : 0.000106s : 25: match.replace_applicator 0.16% : 0.000068s : 34: match.switch_simplify 0.08% : 0.000034s : 6: match.tuple_list_get_item_depend_reorder 1.06% : 0.000455s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000014s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020678131318 0.77% : 0.000160s : 1198: predicate.accumulaten_eliminater 0.28% : 0.000058s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000113s : 835: predicate.addn_check_dump 1.00% : 0.000207s : 1198: predicate.addn_zero_filter 0.76% : 0.000157s : 1198: predicate.adjust_all_reduce_mul_add 1.78% : 0.000367s : 2034: predicate.arithmetic_simplify 1.14% : 0.000235s : 1586: predicate.cast_eliminate 3.02% : 0.000624s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.61% : 0.000127s : 242: predicate.convert_tensor_all_eliminate 1.23% : 0.000255s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000118s : 838: predicate.depend_value_elim 0.83% : 0.000171s : 1202: predicate.dict_get_item_const_eliminator 0.89% : 0.000184s : 1202: predicate.dict_get_item_eliminator 0.85% : 0.000175s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 126: predicate.elim_not_effective 0.11% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.86% : 0.000179s : 1334: predicate.environ_add_const_eliminate 0.87% : 0.000180s : 1337: predicate.environ_get_add_eliminate 0.85% : 0.000175s : 1334: predicate.environ_get_depend_swap 1.44% : 0.000298s : 2172: predicate.environ_get_eliminate 0.85% : 0.000176s : 1337: predicate.environ_get_set_eliminate 1.12% : 0.000232s : 1717: predicate.exchange_switch_depend_value 1.47% : 0.000305s : 1717: predicate.float_depend_g_call 0.55% : 0.000114s : 835: predicate.float_environ_get_switch 0.64% : 0.000133s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000058s : 395: predicate.get_grad_eliminate 2.47% : 0.000510s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000114s : 835: predicate.incorporate_call 0.54% : 0.000111s : 835: predicate.incorporate_call_switch 3.97% : 0.000821s : 4602: predicate.inline 2.27% : 0.000468s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.34% : 0.000070s : 388: predicate.less_batch_normalization 1.12% : 0.000231s : 1660: predicate.list_to_tuple_eliminator_ 1.92% : 0.000396s : 2874: predicate.load_eliminater 0.19% : 0.000040s : 135: predicate.loop_unroll_after_grad 2.25% : 0.000464s : 2640: predicate.loop_unroll_before_grad 0.98% : 0.000203s : 1478: predicate.make_slice_get_slice_eliminator 0.57% : 0.000117s : 837: predicate.merge_addn 2.90% : 0.000599s : 3380: predicate.micro_step_allgather_replace 3.12% : 0.000646s : 3380: predicate.mini_step_allgather_replace 0.77% : 0.000158s : 1199: predicate.minmaximum_grad 0.18% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.05% : 0.000423s : 1717: predicate.partial_defer_inline 1.13% : 0.000234s : 1541: predicate.partial_eliminate 0.79% : 0.000163s : 1198: predicate.print_const_string_wrapper 0.57% : 0.000117s : 824: predicate.reduce_all_const_elim 1.01% : 0.000209s : 1199: predicate.reduce_eliminate 0.14% : 0.000028s : 395: predicate.remove_not_recompute_node 1.95% : 0.000404s : 4829: predicate.replace_applicator 0.80% : 0.000164s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.79% : 0.000163s : 1199: predicate.reshape_eliminate 2.97% : 0.000613s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.14% : 0.000650s : 3484: predicate.same_eliminate 0.24% : 0.000050s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000061s : 395: predicate.shard_identity_eliminate 2.07% : 0.000428s : 2338: predicate.special_op_eliminate 0.63% : 0.000131s : 837: predicate.specialize_transform 3.25% : 0.000672s : 3380: predicate.split_environ_get_set_with_tuple_value 1.60% : 0.000331s : 2203: predicate.stack_unstack_eliminate 1.97% : 0.000407s : 2874: predicate.stopgrad_eliminater 0.10% : 0.000020s : 135: predicate.switch_call_monad_eliminater 1.25% : 0.000257s : 1717: predicate.switch_defer_inline 4.22% : 0.000873s : 5201: predicate.switch_layer_defer_inline 4.52% : 0.000934s : 5262: predicate.switch_simplify 0.76% : 0.000158s : 1199: predicate.tile_eliminate 0.77% : 0.000159s : 1199: predicate.transpose_eliminate 1.12% : 0.000231s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.06% : 0.000218s : 1469: predicate.tuple_list_get_item_const_eliminator 1.01% : 0.000209s : 1469: predicate.tuple_list_get_item_depend_reorder 2.16% : 0.000447s : 2495: predicate.tuple_list_get_item_eliminator 1.04% : 0.000215s : 1469: predicate.tuple_list_get_set_item_eliminator 1.65% : 0.000340s : 2304: predicate.tuple_list_set_item_eliminator 1.08% : 0.000224s : 1660: predicate.tuple_to_list_eliminator_ 1.91% : 0.000395s : 2874: predicate.updatestate_pure_node_eliminater 2.53% : 0.000523s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.29% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000057s : 395: predicate.virtual_output_eliminate 0.10% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.063397 747 69.01% : 0.043753s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.50% : 0.001585s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.49% : 0.018059s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.443089 346 0.00% : 0.000006s : 1: ForceFp32Comm 0.28% : 0.043769s : 1: a1a2 0.00% : 0.000163s : 1: add_cache_embedding 0.00% : 0.000164s : 1: add_comm_op_reuse_tag 0.00% : 0.000754s : 1: add_recomputation 0.00% : 0.000407s : 1: assign_add_opt 0.01% : 0.002030s : 1: auto_monad 0.00% : 0.000348s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001523s : 1: bootstrap 0.00% : 0.000078s : 1: cconv 0.00% : 0.000162s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000125s : 1: convert_after_rewriter 0.00% : 0.000321s : 1: cse_after_recomputation 0.00% : 0.000135s : 1: dataset_repeat_opt 0.00% : 0.000418s : 1: distribtued_split 0.01% : 0.001534s : 1: eliminate_special_op_node 0.00% : 0.000102s : 1: environ_conv 0.00% : 0.000025s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000015s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.29% : 0.045367s : 1: inline 0.01% : 0.001533s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000537s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000975s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000007s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.22% : 0.034238s : 61: opt.transform.a1a2 0.00% : 0.000181s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.091801s : 148: opt.transform.opt_a 0.01% : 0.000811s : 1: opt.transform.opt_after_cconv 0.02% : 0.003216s : 27: opt.transform.opt_b 0.24% : 0.037741s : 16: opt.transform.opt_resolve 0.01% : 0.000935s : 1: opt.transform.opt_trans_graph 0.01% : 0.000956s : 6: opt.transform.special_op_eliminate 0.00% : 0.000710s : 4: opt.transform.symbol_engine_opt 3.85% : 0.594170s : 1: opt_a 0.01% : 0.001686s : 1: opt_after_cconv 0.03% : 0.004012s : 1: opt_b 3.94% : 0.609159s : 1: optimize 0.00% : 0.000153s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000090s : 1: order_py_execute_after_rewriter 0.00% : 0.000124s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000152s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000038s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000101s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000197s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000130s : 1: pipeline_split 0.00% : 0.000106s : 1: pre_auto_parallel 0.00% : 0.000174s : 1: py_interpret_to_execute 0.00% : 0.000153s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000114s : 1: remove_cast_before_assign_add 0.00% : 0.000642s : 1: remove_dup_value 0.90% : 0.139171s : 3: renormalize.infer 0.36% : 0.055586s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001048s : 1: rewriter_after_opt_a 0.01% : 0.001826s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000156s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000141s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000825s : 1: symbol_engine_optimizer 84.18% : 13.000583s : 1: task_emit 0.01% : 0.000970s : 1: tuple_transform 4.91% : 0.758976s : 1: type_inference 0.01% : 0.001349s : 1: validate TotalTime = 14.5042, [21] [bootstrap]: 0.00149894 [type_inference]: 0.763786 [auto_monad]: 0.00203569 [graph_reusing]: 2.62e-05 [inline]: 0.0442318, [2] [rewriter_before_opt_a]: 0.00160489 [a1a2]: 0.0425551, [2] [Cycle 1]: 0.0287423, [11] [expand_dump_flag]: 3.59202e-05 [switch_simplify]: 0.00109306 [loop_unroll]: 0.00067759 [a_1]: 0.0223132 [recompute_prepare]: 0.00016398 [updatestate_depend_eliminate]: 0.00036383 [updatestate_assign_eliminate]: 0.00011197 [updatestate_loads_eliminate]: 0.0002086 [parameter_eliminate]: 5.9302e-06 [a_2]: 0.00345562 [parallel_inline_pass]: 0.00010244 [Cycle 2]: 0.00551884, [11] [expand_dump_flag]: 2.35019e-06 [switch_simplify]: 9.29101e-05 [loop_unroll]: 9.23402e-05 [a_1]: 0.00320535 [recompute_prepare]: 0.00010098 [updatestate_depend_eliminate]: 0.00021649 [updatestate_assign_eliminate]: 6.61202e-05 [updatestate_loads_eliminate]: 6.36904e-05 [parameter_eliminate]: 3.47011e-06 [a_2]: 0.0014919 [parallel_inline_pass]: 0.00010116 [parallel-infer-symbol]: 0.00018204 [pre_auto_parallel]: 0.00010674 [insert-virtual-dataset]: 0.00141023 [parallel-infer-symbol-second]: 2.7502e-06 [dataset_repeat_opt]: 0.00012819 [pipeline_split]: 9.852e-05 [optimize]: 0.602641, [52] [py_interpret_to_execute]: 0.0001278 [rewriter_before_opt_a]: 0.00028409 [opt_a]: 0.588015, [3] [Cycle 1]: 0.508414, [46] [expand_dump_flag]: 2.04006e-06 [switch_simplify]: 0.00011116 [loop_unroll]: 9.835e-05 [a_1]: 0.00335231 [recompute_prepare]: 0.0001052 [updatestate_depend_eliminate]: 0.00010405 [updatestate_assign_eliminate]: 6.28498e-05 [updatestate_loads_eliminate]: 6.69402e-05 [parameter_eliminate]: 3.53018e-06 [a_2]: 0.00162061 [accelerated_algorithm]: 0.00031871 [shard]: 2.77022e-06 [meta_shard_fg_expand]: 4.88502e-05 [shard_inline]: 0.0001092 [auto_parallel]: 8.29804e-05 [parallel]: 0.0167216 [flash_sp]: 5.79399e-05 [merge_comm]: 0.0001292 [allreduce_fusion]: 7.54199e-05 [matmul_add_comm_reduction]: 0.00010208 [allreduce_slice_to_reducescatter]: 7.59959e-07 [virtual_shard_identity]: 0.00012733 [virtual_dataset]: 0.00016988 [get_grad_eliminate_]: 0.00011631 [virtual_output]: 0.00011292 [merge_forward]: 8.06102e-05 [cell_reuse_recompute_pass]: 3.18e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020916 [before_grad]: 0.00020676 [inplace_validation]: 0.00013107 [parallel_renormalize]: 0.0223759 [update_top_fg]: 8.00006e-07 [cast_eliminate]: 0.00015285 [meta_fg_expand]: 0.270812 [inplace_validation_after_expand]: 0.00153736 [flash_sp_send_recv_attached]: 0.00116836 [receive_attached]: 9.58098e-05 [after_resolve]: 0.00200106 [a_after_grad]: 0.00387728 [special_op_eliminate]: 0.00181122 [renormalize]: 0.14835 [add_forward_monad_depend]: 0.00035748 [auto_monad_grad]: 0.00021061 [auto_monad_eliminator]: 0.00185225 [cse]: 0.00414586 [a_3]: 0.0247421 [Cycle 2]: 0.0677559, [46] [expand_dump_flag]: 5.162e-05 [switch_simplify]: 0.00183663 [loop_unroll]: 0.00153521 [a_1]: 0.0306825 [recompute_prepare]: 0.0001745 [updatestate_depend_eliminate]: 0.00022394 [updatestate_assign_eliminate]: 0.00010291 [updatestate_loads_eliminate]: 0.00016028 [parameter_eliminate]: 4.21004e-06 [a_2]: 0.00431311 [accelerated_algorithm]: 0.00016291 [shard]: 2.56998e-06 [meta_shard_fg_expand]: 6.76601e-05 [shard_inline]: 0.00014061 [auto_parallel]: 0.00012289 [parallel]: 1.47899e-05 [flash_sp]: 0.00012188 [merge_comm]: 0.00010973 [allreduce_fusion]: 9.33199e-05 [matmul_add_comm_reduction]: 0.00011049 [allreduce_slice_to_reducescatter]: 6.79865e-07 [virtual_shard_identity]: 0.00014183 [virtual_dataset]: 0.00013733 [get_grad_eliminate_]: 0.00013275 [virtual_output]: 0.00013501 [merge_forward]: 9.26699e-05 [cell_reuse_recompute_pass]: 2.63983e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024789 [before_grad]: 0.00023764 [inplace_validation]: 8.85399e-05 [parallel_renormalize]: 8.98726e-08 [update_top_fg]: 5.40167e-07 [cast_eliminate]: 0.00015566 [meta_fg_expand]: 0.00026791 [inplace_validation_after_expand]: 0.00018837 [flash_sp_send_recv_attached]: 2.35997e-06 [receive_attached]: 1.92039e-06 [after_resolve]: 0.00016367 [a_after_grad]: 0.00023166 [special_op_eliminate]: 0.0001372 [renormalize]: 0.017186 [add_forward_monad_depend]: 5.09014e-06 [auto_monad_grad]: 2.05962e-06 [auto_monad_eliminator]: 0.00028844 [cse]: 0.00647929 [a_3]: 0.00094858 [Cycle 3]: 0.0118223, [46] [expand_dump_flag]: 2.17976e-06 [switch_simplify]: 0.00013297 [loop_unroll]: 0.00012876 [a_1]: 0.00432304 [recompute_prepare]: 0.00013783 [updatestate_depend_eliminate]: 0.00014369 [updatestate_assign_eliminate]: 9.29097e-05 [updatestate_loads_eliminate]: 9.11597e-05 [parameter_eliminate]: 2.69013e-06 [a_2]: 0.00210017 [accelerated_algorithm]: 0.00015536 [shard]: 1.53994e-06 [meta_shard_fg_expand]: 4.89303e-05 [shard_inline]: 0.00013409 [auto_parallel]: 0.00011068 [parallel]: 8.62963e-06 [flash_sp]: 2.23005e-06 [merge_comm]: 0.00010384 [allreduce_fusion]: 9.31798e-05 [matmul_add_comm_reduction]: 0.00011705 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 0.00013725 [virtual_dataset]: 0.00013197 [get_grad_eliminate_]: 0.00012733 [virtual_output]: 0.00013027 [merge_forward]: 9.323e-05 [cell_reuse_recompute_pass]: 2.93972e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024392 [before_grad]: 0.00023338 [inplace_validation]: 8.83499e-05 [parallel_renormalize]: 7.03149e-08 [update_top_fg]: 4.49829e-07 [cast_eliminate]: 0.00014848 [meta_fg_expand]: 0.00010959 [inplace_validation_after_expand]: 0.00011251 [flash_sp_send_recv_attached]: 1.66008e-06 [receive_attached]: 1.09989e-06 [after_resolve]: 0.00014672 [a_after_grad]: 0.00021914 [special_op_eliminate]: 0.00013157 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 2.48989e-06 [auto_monad_grad]: 1.69035e-06 [auto_monad_eliminator]: 0.00020688 [cse]: 0.00040086 [a_3]: 0.00092764 [py_interpret_to_execute_after_opt_a]: 0.00013095 [slice_cell_reuse_recomputed_activation]: 2.76975e-06 [rewriter_after_opt_a]: 0.00098162 [convert_after_rewriter]: 0.00011176 [order_py_execute_after_rewriter]: 8.12304e-05 [opt_b]: 0.00394401, [1] [Cycle 1]: 0.00393531, [7] [b_1]: 0.00307797 [b_2]: 0.00013838 [updatestate_depend_eliminate]: 9.422e-05 [updatestate_assign_eliminate]: 8.58298e-05 [updatestate_loads_eliminate]: 8.91099e-05 [renormalize]: 4.89876e-07 [cse]: 0.00039311 [optimize_parallel_all_gather_comm]: 0.0001337 [overlap_param_gather]: 1.41002e-06 [cconv]: 6.87102e-05 [loop_unroll]: 0.00096244 [opt_after_cconv]: 0.00157081, [1] [Cycle 1]: 0.00156204, [7] [c_1]: 0.00080738 [parameter_eliminate]: 2.82982e-06 [updatestate_depend_eliminate]: 0.0001283 [updatestate_assign_eliminate]: 9.03998e-05 [updatestate_loads_eliminate]: 9.14899e-05 [cse]: 0.00038502 [renormalize]: 6.10016e-07 [remove_dup_value]: 0.00057873 [tuple_transform]: 0.00095433, [1] [Cycle 1]: 0.00094674, [2] [d_1]: 0.00093002 [renormalize]: 5.69969e-07 [partial_unused_args_eliminate]: 3.49991e-06 [add_cache_embedding]: 0.00014535 [add_recomputation]: 0.00069825 [cse_after_recomputation]: 0.00038385, [1] [Cycle 1]: 0.0003758, [1] [cse]: 0.00036158 [environ_conv]: 9.10801e-05 [swap_dp_allreduce_reducescatter]: 0.00012859 [bias_add_comm_swap]: 2.90014e-06 [label_micro_interleaved_index]: 2.11969e-06 [label_fine_grained_interleaved_index]: 0.00051458 [merge_cast_opt]: 1.57021e-06 [slice_recompute_activation]: 0.00014766 [micro_interleaved_order_control]: 2.04984e-06 [assign_add_opt]: 0.00048875 [ForceFp32Comm]: 1.48034e-06 [remove_cast_before_assign_add]: 0.00010533 [full_micro_interleaved_order_control]: 2.59979e-06 [reorder_send_recv_between_fp_bp]: 2.02004e-06 [comm_op_add_attrs]: 0.00014975 [add_comm_op_reuse_tag]: 0.00014578 [interleave_split_concat_branches]: 1.07987e-06 [interleave_parallel_branches]: 9.19681e-07 [overlap_opt_shard_in_pipeline]: 2.85101e-05 [overlap_opt_shard_grad_in_pipeline]: 3.6601e-06 [control_data_broadcast_order]: 1.22981e-06 [grouped_pairwise_exchange_alltoall]: 1.03498e-05 [offloading_packed_experts]: 2.42004e-06 [overlap_recompute_and_grad_model_parallel]: 2.31015e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.801e-07 [overlap_recompute_allgather_and_fa_grad]: 9.89097e-05 [overlap_grad_ring_attention]: 0.00015643 [overlap_grad_flash_sp]: 0.00012072 [begin_end_overlap_inline]: 8.69855e-07 [split_matmul_comm_elemetwise]: 2.00002e-06 [split_layernorm_comm]: 2.06009e-06 [handle_group_info]: 7.24988e-06 [symbol_engine_optimizer]: 0.00079475, [1] [Cycle 1]: 0.00078783, [6] [build]: 5.01201e-05 [elim_shapecalc]: 0.00014258 [elim_not_effective]: 0.00021969 [opt_reshape]: 0.00012973 [fold_const_symbol]: 0.00020774 [renormalize]: 4.89876e-07 [pipeline_parallel_scheduler]: 3.68012e-06 [auto_monad_reorder]: 0.00033894 [get_jit_bprop_graph]: 5.29923e-07 [rewriter_after_jit_bprop_graph]: 4.49829e-07 [eliminate_special_op_node]: 0.00142391 [distribtued_split]: 0.00038784 [validate]: 0.00029043 [task_emit]: 13.0841 [execute]: 1.34697e-05 Sums bootstrap : 0.001499s : 0.01% type_inference : 0.763786s : 5.27% auto_monad : 0.002036s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001605s : 0.01% inline.a1a2.expand_dump_flag : 0.000038s : 0.00% inline.a1a2.switch_simplify : 0.001186s : 0.01% inline.a1a2.loop_unroll : 0.000770s : 0.01% inline.a1a2.a_1 : 0.025519s : 0.18% inline.a1a2.recompute_prepare : 0.000265s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000580s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000178s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000272s : 0.00% inline.a1a2.parameter_eliminate : 0.000009s : 0.00% inline.a1a2.a_2 : 0.004948s : 0.03% inline.a1a2.parallel_inline_pass : 0.000204s : 0.00% parallel-infer-symbol : 0.000182s : 0.00% pre_auto_parallel : 0.000107s : 0.00% insert-virtual-dataset : 0.001410s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000128s : 0.00% pipeline_split : 0.000099s : 0.00% optimize.py_interpret_to_execute : 0.000128s : 0.00% optimize.rewriter_before_opt_a : 0.000284s : 0.00% optimize.opt_a.expand_dump_flag : 0.000056s : 0.00% optimize.opt_a.switch_simplify : 0.002081s : 0.01% optimize.opt_a.loop_unroll : 0.001762s : 0.01% optimize.opt_a.a_1 : 0.038358s : 0.26% optimize.opt_a.recompute_prepare : 0.000418s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000472s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000259s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000318s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.008034s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000637s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000165s : 0.00% optimize.opt_a.shard_inline : 0.000384s : 0.00% optimize.opt_a.auto_parallel : 0.000317s : 0.00% optimize.opt_a.parallel : 0.016745s : 0.12% optimize.opt_a.flash_sp : 0.000182s : 0.00% optimize.opt_a.merge_comm : 0.000343s : 0.00% optimize.opt_a.allreduce_fusion : 0.000262s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000330s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000406s : 0.00% optimize.opt_a.virtual_dataset : 0.000439s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000376s : 0.00% optimize.opt_a.virtual_output : 0.000378s : 0.00% optimize.opt_a.merge_forward : 0.000267s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000701s : 0.00% optimize.opt_a.before_grad : 0.000678s : 0.00% optimize.opt_a.inplace_validation : 0.000308s : 0.00% optimize.opt_a.parallel_renormalize : 0.022376s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000457s : 0.00% optimize.opt_a.meta_fg_expand : 0.271190s : 1.87% optimize.opt_a.inplace_validation_after_expand : 0.001838s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001172s : 0.01% optimize.opt_a.receive_attached : 0.000099s : 0.00% optimize.opt_a.after_resolve : 0.002311s : 0.02% optimize.opt_a.a_after_grad : 0.004328s : 0.03% optimize.opt_a.special_op_eliminate : 0.002080s : 0.01% optimize.opt_a.renormalize : 0.165536s : 1.14% optimize.opt_a.add_forward_monad_depend : 0.000365s : 0.00% optimize.opt_a.auto_monad_grad : 0.000214s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002348s : 0.02% optimize.opt_a.cse : 0.011026s : 0.08% optimize.opt_a.a_3 : 0.026618s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000131s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000982s : 0.01% optimize.convert_after_rewriter : 0.000112s : 0.00% optimize.order_py_execute_after_rewriter : 0.000081s : 0.00% optimize.opt_b.b_1 : 0.003078s : 0.02% optimize.opt_b.b_2 : 0.000138s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000094s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000086s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000089s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000393s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000134s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000069s : 0.00% optimize.loop_unroll : 0.000962s : 0.01% optimize.opt_after_cconv.c_1 : 0.000807s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000128s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000090s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.cse : 0.000385s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000579s : 0.00% optimize.tuple_transform.d_1 : 0.000930s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000145s : 0.00% optimize.add_recomputation : 0.000698s : 0.00% optimize.cse_after_recomputation.cse : 0.000362s : 0.00% optimize.environ_conv : 0.000091s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000129s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000515s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000148s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000489s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000105s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000150s : 0.00% optimize.add_comm_op_reuse_tag : 0.000146s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000029s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000099s : 0.00% optimize.overlap_grad_ring_attention : 0.000156s : 0.00% optimize.overlap_grad_flash_sp : 0.000121s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000050s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000143s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000220s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000208s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000339s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001424s : 0.01% distribtued_split : 0.000388s : 0.00% validate : 0.000290s : 0.00% task_emit : 13.084051s : 90.28% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.049729 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000020s : 9: substitution.addn_check_dump 0.11% : 0.000052s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.65% : 0.000323s : 71: substitution.arithmetic_simplify 0.11% : 0.000052s : 10: substitution.cast_eliminate 0.11% : 0.000054s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000026s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000020s : 12: substitution.environ_get_depend_swap 0.06% : 0.000030s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000019s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.03% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 64.92% : 0.032282s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000083s : 126: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.42% : 0.011646s : 331: substitution.inline 1.43% : 0.000711s : 112: substitution.inline_without_move 0.25% : 0.000123s : 309: substitution.j_node_and_user_rematch 0.39% : 0.000195s : 40: substitution.less_batch_normalization 0.09% : 0.000046s : 90: substitution.load_eliminater 0.10% : 0.000051s : 10: substitution.merge_addn 0.23% : 0.000114s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.07% : 0.000036s : 1: substitution.partial_defer_inline 0.14% : 0.000072s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000033s : 15: substitution.reduce_eliminate 0.32% : 0.000158s : 309: substitution.remove_not_recompute_node 1.99% : 0.000992s : 508: substitution.replace_applicator 0.22% : 0.000110s : 251: substitution.replace_old_param 0.07% : 0.000037s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000085s : 34: substitution.switch_simplify 0.06% : 0.000029s : 11: substitution.tile_eliminate 0.51% : 0.000256s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000137s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000215s : 107: substitution.tuple_list_get_item_depend_reorder 1.53% : 0.000759s : 308: substitution.tuple_list_get_item_eliminator 0.38% : 0.000187s : 107: substitution.tuple_list_get_set_item_eliminator 0.39% : 0.000193s : 210: substitution.updatestate_pure_node_eliminater 0.69% : 0.000343s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.763260 2 96.47% : 0.736291s : 1: type_inference.infer 3.53% : 0.026969s : 1: type_inference.specialize ------[replace.] 0.009820 775 0.43% : 0.000043s : 5: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.52% : 0.000051s : 7: replace.depend_value_elim 0.43% : 0.000042s : 3: replace.environ_get_set_eliminate 30.82% : 0.003026s : 183: replace.getattr_setattr_resolve 29.44% : 0.002891s : 310: replace.inline 0.22% : 0.000021s : 1: replace.merge_addn 1.17% : 0.000115s : 7: replace.partial_eliminate 4.46% : 0.000438s : 25: replace.replace_applicator 3.88% : 0.000381s : 34: replace.switch_simplify 0.52% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 27.68% : 0.002718s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.20% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042206 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.04% : 0.000016s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 71.19% : 0.030047s : 183: match.getattr_setattr_resolve 27.06% : 0.011423s : 310: match.inline 0.06% : 0.000025s : 1: match.merge_addn 0.09% : 0.000040s : 7: match.partial_eliminate 0.24% : 0.000100s : 25: match.replace_applicator 0.15% : 0.000065s : 34: match.switch_simplify 0.08% : 0.000033s : 6: match.tuple_list_get_item_depend_reorder 0.95% : 0.000400s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000011s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020952131318 0.75% : 0.000158s : 1198: predicate.accumulaten_eliminater 0.26% : 0.000055s : 254: predicate.ad_related_special_op_eliminate 0.54% : 0.000113s : 835: predicate.addn_check_dump 0.78% : 0.000164s : 1198: predicate.addn_zero_filter 0.76% : 0.000160s : 1198: predicate.adjust_all_reduce_mul_add 1.75% : 0.000367s : 2034: predicate.arithmetic_simplify 1.11% : 0.000233s : 1586: predicate.cast_eliminate 3.51% : 0.000736s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.18% : 0.000248s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000118s : 838: predicate.depend_value_elim 0.81% : 0.000171s : 1202: predicate.dict_get_item_const_eliminator 0.86% : 0.000180s : 1202: predicate.dict_get_item_eliminator 0.82% : 0.000172s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000021s : 126: predicate.elim_shapecalc_of_broadcastargs 0.84% : 0.000176s : 1334: predicate.environ_add_const_eliminate 0.85% : 0.000178s : 1337: predicate.environ_get_add_eliminate 0.83% : 0.000174s : 1334: predicate.environ_get_depend_swap 1.43% : 0.000300s : 2172: predicate.environ_get_eliminate 0.84% : 0.000177s : 1337: predicate.environ_get_set_eliminate 1.10% : 0.000230s : 1717: predicate.exchange_switch_depend_value 1.37% : 0.000288s : 1717: predicate.float_depend_g_call 0.55% : 0.000116s : 835: predicate.float_environ_get_switch 0.64% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000058s : 395: predicate.get_grad_eliminate 2.38% : 0.000498s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000115s : 835: predicate.incorporate_call 0.54% : 0.000113s : 835: predicate.incorporate_call_switch 3.84% : 0.000805s : 4602: predicate.inline 2.22% : 0.000464s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.32% : 0.000067s : 388: predicate.less_batch_normalization 1.10% : 0.000231s : 1660: predicate.list_to_tuple_eliminator_ 1.85% : 0.000387s : 2874: predicate.load_eliminater 0.18% : 0.000038s : 135: predicate.loop_unroll_after_grad 2.37% : 0.000496s : 2640: predicate.loop_unroll_before_grad 0.97% : 0.000203s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000116s : 837: predicate.merge_addn 3.14% : 0.000659s : 3380: predicate.micro_step_allgather_replace 3.14% : 0.000657s : 3380: predicate.mini_step_allgather_replace 0.80% : 0.000168s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.00% : 0.000419s : 1717: predicate.partial_defer_inline 1.07% : 0.000225s : 1541: predicate.partial_eliminate 0.75% : 0.000157s : 1198: predicate.print_const_string_wrapper 0.55% : 0.000115s : 824: predicate.reduce_all_const_elim 0.97% : 0.000202s : 1199: predicate.reduce_eliminate 0.14% : 0.000028s : 395: predicate.remove_not_recompute_node 1.93% : 0.000405s : 4829: predicate.replace_applicator 0.79% : 0.000165s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.76% : 0.000159s : 1199: predicate.reshape_eliminate 3.18% : 0.000666s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.32% : 0.000696s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000060s : 395: predicate.shard_identity_eliminate 2.01% : 0.000420s : 2338: predicate.special_op_eliminate 0.63% : 0.000132s : 837: predicate.specialize_transform 3.47% : 0.000728s : 3380: predicate.split_environ_get_set_with_tuple_value 1.58% : 0.000330s : 2203: predicate.stack_unstack_eliminate 1.80% : 0.000377s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.20% : 0.000252s : 1717: predicate.switch_defer_inline 4.45% : 0.000932s : 5201: predicate.switch_layer_defer_inline 4.31% : 0.000903s : 5262: predicate.switch_simplify 0.77% : 0.000160s : 1199: predicate.tile_eliminate 0.74% : 0.000155s : 1199: predicate.transpose_eliminate 1.31% : 0.000275s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.07% : 0.000223s : 1469: predicate.tuple_list_get_item_const_eliminator 0.96% : 0.000201s : 1469: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000394s : 2495: predicate.tuple_list_get_item_eliminator 1.04% : 0.000217s : 1469: predicate.tuple_list_get_set_item_eliminator 1.72% : 0.000360s : 2304: predicate.tuple_list_set_item_eliminator 1.40% : 0.000293s : 1660: predicate.tuple_to_list_eliminator_ 1.90% : 0.000398s : 2874: predicate.updatestate_pure_node_eliminater 2.90% : 0.000607s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.28% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000056s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.060760 747 69.36% : 0.042145s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.44% : 0.001482s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.20% : 0.017132s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.506543 346 0.00% : 0.000006s : 1: ForceFp32Comm 0.27% : 0.042560s : 1: a1a2 0.00% : 0.000152s : 1: add_cache_embedding 0.00% : 0.000152s : 1: add_comm_op_reuse_tag 0.00% : 0.000710s : 1: add_recomputation 0.00% : 0.000501s : 1: assign_add_opt 0.01% : 0.002065s : 1: auto_monad 0.00% : 0.000354s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001549s : 1: bootstrap 0.00% : 0.000075s : 1: cconv 0.00% : 0.000157s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000119s : 1: convert_after_rewriter 0.00% : 0.000389s : 1: cse_after_recomputation 0.00% : 0.000138s : 1: dataset_repeat_opt 0.00% : 0.000402s : 1: distribtued_split 0.01% : 0.001440s : 1: eliminate_special_op_node 0.00% : 0.000099s : 1: environ_conv 0.00% : 0.000024s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000036s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.29% : 0.044247s : 1: inline 0.01% : 0.001434s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000523s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.000974s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032817s : 61: opt.transform.a1a2 0.00% : 0.000173s : 1: opt.transform.loop_unroll_optimizer 0.58% : 0.090193s : 148: opt.transform.opt_a 0.01% : 0.000804s : 1: opt.transform.opt_after_cconv 0.02% : 0.003184s : 27: opt.transform.opt_b 0.25% : 0.038260s : 16: opt.transform.opt_resolve 0.01% : 0.000927s : 1: opt.transform.opt_trans_graph 0.01% : 0.000835s : 6: opt.transform.special_op_eliminate 0.00% : 0.000694s : 4: opt.transform.symbol_engine_opt 3.79% : 0.588021s : 1: opt_a 0.01% : 0.001577s : 1: opt_after_cconv 0.03% : 0.003949s : 1: opt_b 3.89% : 0.602654s : 1: optimize 0.00% : 0.000141s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000086s : 1: order_py_execute_after_rewriter 0.00% : 0.000125s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000162s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000033s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000105s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000194s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000108s : 1: pipeline_split 0.00% : 0.000116s : 1: pre_auto_parallel 0.00% : 0.000136s : 1: py_interpret_to_execute 0.00% : 0.000139s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000111s : 1: remove_cast_before_assign_add 0.00% : 0.000592s : 1: remove_dup_value 0.87% : 0.135139s : 3: renormalize.infer 0.34% : 0.052729s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000992s : 1: rewriter_after_opt_a 0.01% : 0.001916s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000154s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000135s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000799s : 1: symbol_engine_optimizer 84.38% : 13.084102s : 1: task_emit 0.01% : 0.000959s : 1: tuple_transform 4.93% : 0.763843s : 1: type_inference 0.01% : 0.001377s : 1: validate TotalTime = 14.791, [21] [bootstrap]: 0.00196072 [type_inference]: 0.846334 [auto_monad]: 0.00200057 [graph_reusing]: 2.43597e-05 [inline]: 0.0452915, [2] [rewriter_before_opt_a]: 0.00149884 [a1a2]: 0.043745, [2] [Cycle 1]: 0.0294323, [11] [expand_dump_flag]: 4.33801e-05 [switch_simplify]: 0.00124425 [loop_unroll]: 0.00069089 [a_1]: 0.0228944 [recompute_prepare]: 0.00018402 [updatestate_depend_eliminate]: 0.00039734 [updatestate_assign_eliminate]: 9.73302e-05 [updatestate_loads_eliminate]: 0.00021932 [parameter_eliminate]: 8.51974e-06 [a_2]: 0.00330932 [parallel_inline_pass]: 0.00010707 [Cycle 2]: 0.00549654, [11] [expand_dump_flag]: 3.90969e-06 [switch_simplify]: 0.00010406 [loop_unroll]: 0.00010118 [a_1]: 0.00328408 [recompute_prepare]: 0.00010539 [updatestate_depend_eliminate]: 7.67e-05 [updatestate_assign_eliminate]: 6.06799e-05 [updatestate_loads_eliminate]: 6.31697e-05 [parameter_eliminate]: 5.30994e-06 [a_2]: 0.00150104 [parallel_inline_pass]: 0.00010254 [parallel-infer-symbol]: 0.00021378 [pre_auto_parallel]: 0.00011262 [insert-virtual-dataset]: 0.00166529 [parallel-infer-symbol-second]: 2.54018e-06 [dataset_repeat_opt]: 0.00013712 [pipeline_split]: 0.00011299 [optimize]: 0.658405, [52] [py_interpret_to_execute]: 0.00015554 [rewriter_before_opt_a]: 0.00029174 [opt_a]: 0.643203, [3] [Cycle 1]: 0.5591, [46] [expand_dump_flag]: 2.54018e-06 [switch_simplify]: 0.00011275 [loop_unroll]: 0.00010011 [a_1]: 0.00337745 [recompute_prepare]: 0.00010549 [updatestate_depend_eliminate]: 0.00010167 [updatestate_assign_eliminate]: 6.36e-05 [updatestate_loads_eliminate]: 6.79698e-05 [parameter_eliminate]: 4.80004e-06 [a_2]: 0.00156073 [accelerated_algorithm]: 0.0002391 [shard]: 1.91992e-06 [meta_shard_fg_expand]: 5.048e-05 [shard_inline]: 0.00011001 [auto_parallel]: 7.74502e-05 [parallel]: 0.0165061 [flash_sp]: 5.97197e-05 [merge_comm]: 0.00012956 [allreduce_fusion]: 7.38199e-05 [matmul_add_comm_reduction]: 0.00010071 [allreduce_slice_to_reducescatter]: 5.29923e-07 [virtual_shard_identity]: 0.00013434 [virtual_dataset]: 0.0001638 [get_grad_eliminate_]: 0.00011499 [virtual_output]: 0.00011438 [merge_forward]: 7.26003e-05 [cell_reuse_recompute_pass]: 5.33974e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020986 [before_grad]: 0.00021172 [inplace_validation]: 0.0001315 [parallel_renormalize]: 0.0238333 [update_top_fg]: 9.69972e-07 [cast_eliminate]: 0.00015442 [meta_fg_expand]: 0.300111 [inplace_validation_after_expand]: 0.00183237 [flash_sp_send_recv_attached]: 0.00134215 [receive_attached]: 0.00011038 [after_resolve]: 0.0021205 [a_after_grad]: 0.004068 [special_op_eliminate]: 0.00191914 [renormalize]: 0.168299 [add_forward_monad_depend]: 0.00037051 [auto_monad_grad]: 0.00022616 [auto_monad_eliminator]: 0.00185738 [cse]: 0.00428261 [a_3]: 0.0238584 [Cycle 2]: 0.0721182, [46] [expand_dump_flag]: 5.02802e-05 [switch_simplify]: 0.00178951 [loop_unroll]: 0.00148737 [a_1]: 0.0316174 [recompute_prepare]: 0.00017224 [updatestate_depend_eliminate]: 0.00022512 [updatestate_assign_eliminate]: 0.0001032 [updatestate_loads_eliminate]: 0.00016848 [parameter_eliminate]: 4.36977e-06 [a_2]: 0.00433501 [accelerated_algorithm]: 0.00019498 [shard]: 2.90992e-06 [meta_shard_fg_expand]: 7.50697e-05 [shard_inline]: 0.00014026 [auto_parallel]: 0.00013811 [parallel]: 1.51e-05 [flash_sp]: 0.00012628 [merge_comm]: 0.00011274 [allreduce_fusion]: 9.36603e-05 [matmul_add_comm_reduction]: 0.00011225 [allreduce_slice_to_reducescatter]: 6.3004e-07 [virtual_shard_identity]: 0.00014753 [virtual_dataset]: 0.0001377 [get_grad_eliminate_]: 0.00015461 [virtual_output]: 0.00013615 [merge_forward]: 9.399e-05 [cell_reuse_recompute_pass]: 2.53972e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024862 [before_grad]: 0.00024137 [inplace_validation]: 8.542e-05 [parallel_renormalize]: 7.96281e-08 [update_top_fg]: 1.10967e-06 [cast_eliminate]: 0.00015596 [meta_fg_expand]: 0.00029431 [inplace_validation_after_expand]: 0.00017678 [flash_sp_send_recv_attached]: 2.54018e-06 [receive_attached]: 1.88965e-06 [after_resolve]: 0.00016202 [a_after_grad]: 0.0002556 [special_op_eliminate]: 0.00013722 [renormalize]: 0.0201409 [add_forward_monad_depend]: 5.66011e-06 [auto_monad_grad]: 3.13018e-06 [auto_monad_eliminator]: 0.00029182 [cse]: 0.00687804 [a_3]: 0.00095263 [Cycle 3]: 0.0119571, [46] [expand_dump_flag]: 2.78978e-06 [switch_simplify]: 0.00013313 [loop_unroll]: 0.00012916 [a_1]: 0.00434012 [recompute_prepare]: 0.00013732 [updatestate_depend_eliminate]: 0.00015261 [updatestate_assign_eliminate]: 9.534e-05 [updatestate_loads_eliminate]: 9.24701e-05 [parameter_eliminate]: 3.22983e-06 [a_2]: 0.00213338 [accelerated_algorithm]: 0.00015793 [shard]: 1.74018e-06 [meta_shard_fg_expand]: 5.21303e-05 [shard_inline]: 0.00013511 [auto_parallel]: 0.00011512 [parallel]: 1.095e-05 [flash_sp]: 2.44007e-06 [merge_comm]: 0.00010505 [allreduce_fusion]: 9.53502e-05 [matmul_add_comm_reduction]: 0.00012029 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 0.00013978 [virtual_dataset]: 0.00013432 [get_grad_eliminate_]: 0.00012787 [virtual_output]: 0.0001307 [merge_forward]: 9.38997e-05 [cell_reuse_recompute_pass]: 3.25032e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024544 [before_grad]: 0.00023533 [inplace_validation]: 8.95602e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 5.0012e-07 [cast_eliminate]: 0.00014939 [meta_fg_expand]: 0.00011484 [inplace_validation_after_expand]: 0.00011513 [flash_sp_send_recv_attached]: 2.08011e-06 [receive_attached]: 1.79978e-06 [after_resolve]: 0.00014757 [a_after_grad]: 0.00022128 [special_op_eliminate]: 0.00013164 [renormalize]: 1.00117e-07 [add_forward_monad_depend]: 2.47033e-06 [auto_monad_grad]: 2.25008e-06 [auto_monad_eliminator]: 0.0001699 [cse]: 0.00044746 [a_3]: 0.00093067 [py_interpret_to_execute_after_opt_a]: 0.00019955 [slice_cell_reuse_recomputed_activation]: 2.65986e-06 [rewriter_after_opt_a]: 0.00110616 [convert_after_rewriter]: 0.00011746 [order_py_execute_after_rewriter]: 8.27098e-05 [opt_b]: 0.00395921, [1] [Cycle 1]: 0.0039459, [7] [b_1]: 0.00307602 [b_2]: 0.00013843 [updatestate_depend_eliminate]: 9.86499e-05 [updatestate_assign_eliminate]: 8.62102e-05 [updatestate_loads_eliminate]: 8.98e-05 [renormalize]: 3.89758e-07 [cse]: 0.00039825 [optimize_parallel_all_gather_comm]: 0.00015755 [overlap_param_gather]: 4.4601e-06 [cconv]: 7.08499e-05 [loop_unroll]: 0.00119803 [opt_after_cconv]: 0.00157334, [1] [Cycle 1]: 0.00156585, [7] [c_1]: 0.00079528 [parameter_eliminate]: 2.96999e-06 [updatestate_depend_eliminate]: 0.00013123 [updatestate_assign_eliminate]: 9.22601e-05 [updatestate_loads_eliminate]: 9.32403e-05 [cse]: 0.00039383 [renormalize]: 6.3004e-07 [remove_dup_value]: 0.00060848 [tuple_transform]: 0.00095013, [1] [Cycle 1]: 0.00094333, [2] [d_1]: 0.00092505 [renormalize]: 4.69852e-07 [partial_unused_args_eliminate]: 4.09968e-06 [add_cache_embedding]: 0.0001604 [add_recomputation]: 0.00075979 [cse_after_recomputation]: 0.00031156, [1] [Cycle 1]: 0.00030333, [1] [cse]: 0.00028842 [environ_conv]: 9.06903e-05 [swap_dp_allreduce_reducescatter]: 0.00012947 [bias_add_comm_swap]: 2.56021e-06 [label_micro_interleaved_index]: 2.31015e-06 [label_fine_grained_interleaved_index]: 0.0005247 [merge_cast_opt]: 1.62981e-06 [slice_recompute_activation]: 0.00014532 [micro_interleaved_order_control]: 1.97021e-06 [assign_add_opt]: 0.00041211 [ForceFp32Comm]: 1.47987e-06 [remove_cast_before_assign_add]: 0.00010422 [full_micro_interleaved_order_control]: 2.26032e-06 [reorder_send_recv_between_fp_bp]: 1.79e-06 [comm_op_add_attrs]: 0.00015512 [add_comm_op_reuse_tag]: 0.00015041 [interleave_split_concat_branches]: 1.06031e-06 [interleave_parallel_branches]: 8.59611e-07 [overlap_opt_shard_in_pipeline]: 3.02e-05 [overlap_opt_shard_grad_in_pipeline]: 3.58e-06 [control_data_broadcast_order]: 1.17021e-06 [grouped_pairwise_exchange_alltoall]: 1.06599e-05 [offloading_packed_experts]: 2.27988e-06 [overlap_recompute_and_grad_model_parallel]: 2.12016e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.01002e-06 [overlap_recompute_allgather_and_fa_grad]: 0.00014023 [overlap_grad_ring_attention]: 0.0001685 [overlap_grad_flash_sp]: 0.00012062 [begin_end_overlap_inline]: 1.1404e-06 [split_matmul_comm_elemetwise]: 1.88965e-06 [split_layernorm_comm]: 1.89012e-06 [handle_group_info]: 9.09017e-06 [symbol_engine_optimizer]: 0.00082471, [1] [Cycle 1]: 0.00081739, [6] [build]: 5.42901e-05 [elim_shapecalc]: 0.00014663 [elim_not_effective]: 0.00022066 [opt_reshape]: 0.0001297 [fold_const_symbol]: 0.00022545 [renormalize]: 4.20026e-07 [pipeline_parallel_scheduler]: 3.89991e-06 [auto_monad_reorder]: 0.00034868 [get_jit_bprop_graph]: 8.2003e-07 [rewriter_after_jit_bprop_graph]: 4.29805e-07 [eliminate_special_op_node]: 0.00139528 [distribtued_split]: 0.00039972 [validate]: 0.00033831 [task_emit]: 13.2307 [execute]: 1.253e-05 Sums bootstrap : 0.001961s : 0.01% type_inference : 0.846334s : 5.73% auto_monad : 0.002001s : 0.01% graph_reusing : 0.000024s : 0.00% inline.rewriter_before_opt_a : 0.001499s : 0.01% inline.a1a2.expand_dump_flag : 0.000047s : 0.00% inline.a1a2.switch_simplify : 0.001348s : 0.01% inline.a1a2.loop_unroll : 0.000792s : 0.01% inline.a1a2.a_1 : 0.026179s : 0.18% inline.a1a2.recompute_prepare : 0.000289s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000474s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000158s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000282s : 0.00% inline.a1a2.parameter_eliminate : 0.000014s : 0.00% inline.a1a2.a_2 : 0.004810s : 0.03% inline.a1a2.parallel_inline_pass : 0.000210s : 0.00% parallel-infer-symbol : 0.000214s : 0.00% pre_auto_parallel : 0.000113s : 0.00% insert-virtual-dataset : 0.001665s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000137s : 0.00% pipeline_split : 0.000113s : 0.00% optimize.py_interpret_to_execute : 0.000156s : 0.00% optimize.rewriter_before_opt_a : 0.000292s : 0.00% optimize.opt_a.expand_dump_flag : 0.000056s : 0.00% optimize.opt_a.switch_simplify : 0.002035s : 0.01% optimize.opt_a.loop_unroll : 0.001717s : 0.01% optimize.opt_a.a_1 : 0.039335s : 0.27% optimize.opt_a.recompute_prepare : 0.000415s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000479s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000262s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000329s : 0.00% optimize.opt_a.parameter_eliminate : 0.000012s : 0.00% optimize.opt_a.a_2 : 0.008029s : 0.05% optimize.opt_a.accelerated_algorithm : 0.000592s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000178s : 0.00% optimize.opt_a.shard_inline : 0.000385s : 0.00% optimize.opt_a.auto_parallel : 0.000331s : 0.00% optimize.opt_a.parallel : 0.016532s : 0.11% optimize.opt_a.flash_sp : 0.000188s : 0.00% optimize.opt_a.merge_comm : 0.000347s : 0.00% optimize.opt_a.allreduce_fusion : 0.000263s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000333s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000422s : 0.00% optimize.opt_a.virtual_dataset : 0.000436s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000397s : 0.00% optimize.opt_a.virtual_output : 0.000381s : 0.00% optimize.opt_a.merge_forward : 0.000260s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000011s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000704s : 0.00% optimize.opt_a.before_grad : 0.000688s : 0.00% optimize.opt_a.inplace_validation : 0.000306s : 0.00% optimize.opt_a.parallel_renormalize : 0.023833s : 0.16% optimize.opt_a.update_top_fg : 0.000003s : 0.00% optimize.opt_a.cast_eliminate : 0.000460s : 0.00% optimize.opt_a.meta_fg_expand : 0.300520s : 2.03% optimize.opt_a.inplace_validation_after_expand : 0.002124s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001347s : 0.01% optimize.opt_a.receive_attached : 0.000114s : 0.00% optimize.opt_a.after_resolve : 0.002430s : 0.02% optimize.opt_a.a_after_grad : 0.004545s : 0.03% optimize.opt_a.special_op_eliminate : 0.002188s : 0.01% optimize.opt_a.renormalize : 0.188440s : 1.28% optimize.opt_a.add_forward_monad_depend : 0.000379s : 0.00% optimize.opt_a.auto_monad_grad : 0.000232s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002319s : 0.02% optimize.opt_a.cse : 0.011608s : 0.08% optimize.opt_a.a_3 : 0.025742s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000200s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001106s : 0.01% optimize.convert_after_rewriter : 0.000117s : 0.00% optimize.order_py_execute_after_rewriter : 0.000083s : 0.00% optimize.opt_b.b_1 : 0.003076s : 0.02% optimize.opt_b.b_2 : 0.000138s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000099s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000086s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000398s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000158s : 0.00% optimize.overlap_param_gather : 0.000004s : 0.00% optimize.cconv : 0.000071s : 0.00% optimize.loop_unroll : 0.001198s : 0.01% optimize.opt_after_cconv.c_1 : 0.000795s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000131s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000093s : 0.00% optimize.opt_after_cconv.cse : 0.000394s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000608s : 0.00% optimize.tuple_transform.d_1 : 0.000925s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000160s : 0.00% optimize.add_recomputation : 0.000760s : 0.01% optimize.cse_after_recomputation.cse : 0.000288s : 0.00% optimize.environ_conv : 0.000091s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000129s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000525s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000145s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000412s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000104s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000155s : 0.00% optimize.add_comm_op_reuse_tag : 0.000150s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000030s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000140s : 0.00% optimize.overlap_grad_ring_attention : 0.000169s : 0.00% optimize.overlap_grad_flash_sp : 0.000121s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000009s : 0.00% optimize.symbol_engine_optimizer.build : 0.000054s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000147s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000221s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000225s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000349s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001395s : 0.01% distribtued_split : 0.000400s : 0.00% validate : 0.000338s : 0.00% task_emit : 13.230720s : 89.53% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.053681 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000023s : 9: substitution.addn_check_dump 0.10% : 0.000055s : 7: substitution.addn_zero_filter 0.03% : 0.000016s : 7: substitution.adjust_all_reduce_mul_add 0.67% : 0.000359s : 71: substitution.arithmetic_simplify 0.11% : 0.000057s : 10: substitution.cast_eliminate 0.11% : 0.000057s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.08% : 0.000042s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000020s : 12: substitution.environ_get_depend_swap 0.07% : 0.000038s : 27: substitution.environ_get_eliminate 0.07% : 0.000038s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000021s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.02% : 0.000013s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 65.85% : 0.035350s : 257: substitution.getattr_setattr_resolve 0.15% : 0.000083s : 126: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.09% : 0.012397s : 331: substitution.inline 1.36% : 0.000731s : 112: substitution.inline_without_move 0.23% : 0.000124s : 309: substitution.j_node_and_user_rematch 0.24% : 0.000131s : 40: substitution.less_batch_normalization 0.09% : 0.000047s : 90: substitution.load_eliminater 0.10% : 0.000052s : 10: substitution.merge_addn 0.22% : 0.000119s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.04% : 0.000020s : 1: substitution.partial_defer_inline 0.13% : 0.000068s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.06% : 0.000035s : 15: substitution.reduce_eliminate 0.29% : 0.000157s : 309: substitution.remove_not_recompute_node 1.83% : 0.000984s : 508: substitution.replace_applicator 0.21% : 0.000114s : 251: substitution.replace_old_param 0.08% : 0.000042s : 11: substitution.reshape_eliminate 0.03% : 0.000019s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000013s : 4: substitution.specialize_transform 0.03% : 0.000018s : 12: substitution.split_environ_get_set_with_tuple_value 0.19% : 0.000104s : 34: substitution.switch_simplify 0.06% : 0.000030s : 11: substitution.tile_eliminate 0.49% : 0.000265s : 101: substitution.tuple_list_convert_item_index_to_positive 0.25% : 0.000135s : 107: substitution.tuple_list_get_item_const_eliminator 0.41% : 0.000220s : 107: substitution.tuple_list_get_item_depend_reorder 1.50% : 0.000806s : 308: substitution.tuple_list_get_item_eliminator 0.34% : 0.000185s : 107: substitution.tuple_list_get_set_item_eliminator 0.37% : 0.000198s : 210: substitution.updatestate_pure_node_eliminater 0.67% : 0.000357s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.845829 2 96.78% : 0.818563s : 1: type_inference.infer 3.22% : 0.027265s : 1: type_inference.specialize ------[replace.] 0.010451 775 0.41% : 0.000043s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.62% : 0.000065s : 7: replace.depend_value_elim 0.42% : 0.000043s : 3: replace.environ_get_set_eliminate 32.70% : 0.003417s : 183: replace.getattr_setattr_resolve 28.37% : 0.002965s : 310: replace.inline 0.21% : 0.000022s : 1: replace.merge_addn 1.13% : 0.000118s : 7: replace.partial_eliminate 3.86% : 0.000403s : 25: replace.replace_applicator 4.07% : 0.000426s : 34: replace.switch_simplify 0.49% : 0.000052s : 6: replace.tuple_list_get_item_depend_reorder 27.29% : 0.002852s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000017s : 1: replace.updatestate_useless_node_eliminater 0.21% : 0.000022s : 1: replace.virtual_dataset_eliminate ------[match.] 0.045519 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.04% : 0.000017s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000019s : 3: match.environ_get_set_eliminate 71.51% : 0.032550s : 183: match.getattr_setattr_resolve 26.76% : 0.012179s : 310: match.inline 0.05% : 0.000024s : 1: match.merge_addn 0.11% : 0.000048s : 7: match.partial_eliminate 0.23% : 0.000106s : 25: match.replace_applicator 0.18% : 0.000084s : 34: match.switch_simplify 0.08% : 0.000037s : 6: match.tuple_list_get_item_depend_reorder 0.91% : 0.000414s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000010s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020923131318 0.83% : 0.000174s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000056s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000115s : 835: predicate.addn_check_dump 0.78% : 0.000163s : 1198: predicate.addn_zero_filter 0.74% : 0.000155s : 1198: predicate.adjust_all_reduce_mul_add 1.91% : 0.000400s : 2034: predicate.arithmetic_simplify 1.14% : 0.000238s : 1586: predicate.cast_eliminate 3.18% : 0.000666s : 3484: predicate.check_bprop_eliminate 0.56% : 0.000117s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.27% : 0.000266s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000120s : 838: predicate.depend_value_elim 0.82% : 0.000171s : 1202: predicate.dict_get_item_const_eliminator 0.88% : 0.000184s : 1202: predicate.dict_get_item_eliminator 0.82% : 0.000171s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.11% : 0.000023s : 126: predicate.elim_shapecalc_of_broadcastargs 0.86% : 0.000180s : 1334: predicate.environ_add_const_eliminate 0.86% : 0.000180s : 1337: predicate.environ_get_add_eliminate 0.85% : 0.000178s : 1334: predicate.environ_get_depend_swap 1.47% : 0.000308s : 2172: predicate.environ_get_eliminate 0.83% : 0.000174s : 1337: predicate.environ_get_set_eliminate 1.14% : 0.000238s : 1717: predicate.exchange_switch_depend_value 1.42% : 0.000296s : 1717: predicate.float_depend_g_call 0.55% : 0.000115s : 835: predicate.float_environ_get_switch 0.64% : 0.000133s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.37% : 0.000077s : 395: predicate.get_grad_eliminate 2.71% : 0.000566s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000115s : 835: predicate.incorporate_call 0.54% : 0.000112s : 835: predicate.incorporate_call_switch 3.89% : 0.000815s : 4602: predicate.inline 2.44% : 0.000511s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.44% : 0.000092s : 388: predicate.less_batch_normalization 1.11% : 0.000231s : 1660: predicate.list_to_tuple_eliminator_ 1.83% : 0.000382s : 2874: predicate.load_eliminater 0.20% : 0.000041s : 135: predicate.loop_unroll_after_grad 2.28% : 0.000477s : 2640: predicate.loop_unroll_before_grad 1.00% : 0.000210s : 1478: predicate.make_slice_get_slice_eliminator 0.57% : 0.000119s : 837: predicate.merge_addn 2.88% : 0.000602s : 3380: predicate.micro_step_allgather_replace 2.98% : 0.000624s : 3380: predicate.mini_step_allgather_replace 0.77% : 0.000160s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.19% : 0.000040s : 135: predicate.parallel_virtual_node 2.03% : 0.000424s : 1717: predicate.partial_defer_inline 1.08% : 0.000226s : 1541: predicate.partial_eliminate 0.76% : 0.000159s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000117s : 824: predicate.reduce_all_const_elim 0.97% : 0.000204s : 1199: predicate.reduce_eliminate 0.14% : 0.000029s : 395: predicate.remove_not_recompute_node 1.96% : 0.000409s : 4829: predicate.replace_applicator 1.08% : 0.000226s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.87% : 0.000183s : 1199: predicate.reshape_eliminate 3.04% : 0.000636s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000023s : 135: predicate.row_tensor_eliminate 3.16% : 0.000662s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000062s : 395: predicate.shard_identity_eliminate 2.24% : 0.000469s : 2338: predicate.special_op_eliminate 0.63% : 0.000132s : 837: predicate.specialize_transform 3.21% : 0.000672s : 3380: predicate.split_environ_get_set_with_tuple_value 1.68% : 0.000352s : 2203: predicate.stack_unstack_eliminate 1.85% : 0.000387s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.24% : 0.000260s : 1717: predicate.switch_defer_inline 4.19% : 0.000877s : 5201: predicate.switch_layer_defer_inline 4.39% : 0.000919s : 5262: predicate.switch_simplify 0.84% : 0.000176s : 1199: predicate.tile_eliminate 0.77% : 0.000160s : 1199: predicate.transpose_eliminate 1.05% : 0.000220s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.07% : 0.000225s : 1469: predicate.tuple_list_get_item_const_eliminator 0.97% : 0.000203s : 1469: predicate.tuple_list_get_item_depend_reorder 1.93% : 0.000403s : 2495: predicate.tuple_list_get_item_eliminator 0.96% : 0.000201s : 1469: predicate.tuple_list_get_set_item_eliminator 1.82% : 0.000381s : 2304: predicate.tuple_list_set_item_eliminator 1.08% : 0.000226s : 1660: predicate.tuple_to_list_eliminator_ 1.84% : 0.000384s : 2874: predicate.updatestate_pure_node_eliminater 2.49% : 0.000521s : 3710: predicate.updatestate_useless_node_eliminater 0.12% : 0.000024s : 135: predicate.value_based_eliminate 0.28% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000057s : 395: predicate.virtual_output_eliminate 0.10% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.065173 747 69.67% : 0.045406s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.40% : 0.001566s : 22: func_graph_cloner_run.FuncGraphClonerNode 27.93% : 0.018201s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.886670 346 0.00% : 0.000006s : 1: ForceFp32Comm 0.28% : 0.043752s : 1: a1a2 0.00% : 0.000167s : 1: add_cache_embedding 0.00% : 0.000158s : 1: add_comm_op_reuse_tag 0.00% : 0.000774s : 1: add_recomputation 0.00% : 0.000423s : 1: assign_add_opt 0.01% : 0.002027s : 1: auto_monad 0.00% : 0.000363s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.002031s : 1: bootstrap 0.00% : 0.000077s : 1: cconv 0.00% : 0.000163s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000125s : 1: convert_after_rewriter 0.00% : 0.000317s : 1: cse_after_recomputation 0.00% : 0.000147s : 1: dataset_repeat_opt 0.00% : 0.000416s : 1: distribtued_split 0.01% : 0.001412s : 1: eliminate_special_op_node 0.00% : 0.000100s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000033s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000012s : 1: handle_group_info 0.29% : 0.045309s : 1: inline 0.01% : 0.001693s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000535s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.001211s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.033544s : 61: opt.transform.a1a2 0.00% : 0.000180s : 1: opt.transform.loop_unroll_optimizer 0.57% : 0.090645s : 148: opt.transform.opt_a 0.00% : 0.000793s : 1: opt.transform.opt_after_cconv 0.02% : 0.003183s : 27: opt.transform.opt_b 0.31% : 0.049143s : 16: opt.transform.opt_resolve 0.01% : 0.000922s : 1: opt.transform.opt_trans_graph 0.01% : 0.000839s : 6: opt.transform.special_op_eliminate 0.00% : 0.000716s : 4: opt.transform.symbol_engine_opt 4.05% : 0.643214s : 1: opt_a 0.01% : 0.001579s : 1: opt_after_cconv 0.02% : 0.003964s : 1: opt_b 4.14% : 0.658419s : 1: optimize 0.00% : 0.000166s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000125s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000175s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000034s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000008s : 1: overlap_param_gather 0.00% : 0.000146s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000228s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000008s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000123s : 1: pipeline_split 0.00% : 0.000122s : 1: pre_auto_parallel 0.00% : 0.000163s : 1: py_interpret_to_execute 0.00% : 0.000210s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000109s : 1: remove_cast_before_assign_add 0.00% : 0.000622s : 1: remove_dup_value 0.94% : 0.149537s : 3: renormalize.infer 0.39% : 0.062681s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001118s : 1: rewriter_after_opt_a 0.01% : 0.001816s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000152s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000136s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000829s : 1: symbol_engine_optimizer 83.28% : 13.230769s : 1: task_emit 0.01% : 0.000955s : 1: tuple_transform 5.33% : 0.846384s : 1: type_inference 0.01% : 0.001415s : 1: validate TotalTime = 14.4316, [21] [bootstrap]: 0.00123909 [type_inference]: 0.751971 [auto_monad]: 0.00207739 [graph_reusing]: 2.506e-05 [inline]: 0.0444302, [2] [rewriter_before_opt_a]: 0.00150468 [a1a2]: 0.0428426, [2] [Cycle 1]: 0.0291919, [11] [expand_dump_flag]: 5.31604e-05 [switch_simplify]: 0.00126852 [loop_unroll]: 0.00065601 [a_1]: 0.0225115 [recompute_prepare]: 0.00016302 [updatestate_depend_eliminate]: 0.0003801 [updatestate_assign_eliminate]: 0.00011413 [updatestate_loads_eliminate]: 0.00021669 [parameter_eliminate]: 6.74976e-06 [a_2]: 0.00347885 [parallel_inline_pass]: 0.00010544 [Cycle 2]: 0.00554851, [11] [expand_dump_flag]: 2.81958e-06 [switch_simplify]: 9.57497e-05 [loop_unroll]: 9.52799e-05 [a_1]: 0.00317578 [recompute_prepare]: 0.0001018 [updatestate_depend_eliminate]: 0.00019946 [updatestate_assign_eliminate]: 6.42301e-05 [updatestate_loads_eliminate]: 6.22203e-05 [parameter_eliminate]: 5.08968e-06 [a_2]: 0.00155679 [parallel_inline_pass]: 0.00010299 [parallel-infer-symbol]: 0.00020099 [pre_auto_parallel]: 9.68999e-05 [insert-virtual-dataset]: 0.00137002 [parallel-infer-symbol-second]: 2.86009e-06 [dataset_repeat_opt]: 0.00014695 [pipeline_split]: 9.92403e-05 [optimize]: 0.5992, [52] [py_interpret_to_execute]: 0.00014998 [rewriter_before_opt_a]: 0.00027843 [opt_a]: 0.584315, [3] [Cycle 1]: 0.502426, [46] [expand_dump_flag]: 1.91014e-06 [switch_simplify]: 0.00011204 [loop_unroll]: 0.00010098 [a_1]: 0.00338133 [recompute_prepare]: 0.00010459 [updatestate_depend_eliminate]: 0.00010729 [updatestate_assign_eliminate]: 6.223e-05 [updatestate_loads_eliminate]: 6.91502e-05 [parameter_eliminate]: 5.20982e-06 [a_2]: 0.00167566 [accelerated_algorithm]: 0.00030648 [shard]: 3.09991e-06 [meta_shard_fg_expand]: 4.80497e-05 [shard_inline]: 0.00010951 [auto_parallel]: 8.22698e-05 [parallel]: 0.0165294 [flash_sp]: 7.25999e-05 [merge_comm]: 0.00013218 [allreduce_fusion]: 7.51498e-05 [matmul_add_comm_reduction]: 0.00010105 [allreduce_slice_to_reducescatter]: 5.80214e-07 [virtual_shard_identity]: 0.000131 [virtual_dataset]: 0.00017559 [get_grad_eliminate_]: 0.00011797 [virtual_output]: 0.00011583 [merge_forward]: 7.906e-05 [cell_reuse_recompute_pass]: 3.94043e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0002122 [before_grad]: 0.00021115 [inplace_validation]: 0.00013079 [parallel_renormalize]: 0.0229218 [update_top_fg]: 1.73971e-06 [cast_eliminate]: 0.00015508 [meta_fg_expand]: 0.263384 [inplace_validation_after_expand]: 0.00155831 [flash_sp_send_recv_attached]: 0.00123587 [receive_attached]: 8.826e-05 [after_resolve]: 0.00195526 [a_after_grad]: 0.00386749 [special_op_eliminate]: 0.00189682 [renormalize]: 0.149379 [add_forward_monad_depend]: 0.00036726 [auto_monad_grad]: 0.00021916 [auto_monad_eliminator]: 0.00184141 [cse]: 0.00421317 [a_3]: 0.024496 [Cycle 2]: 0.0699371, [46] [expand_dump_flag]: 5.16502e-05 [switch_simplify]: 0.001826 [loop_unroll]: 0.00154664 [a_1]: 0.0314268 [recompute_prepare]: 0.00017455 [updatestate_depend_eliminate]: 0.00023075 [updatestate_assign_eliminate]: 0.00010706 [updatestate_loads_eliminate]: 0.00016717 [parameter_eliminate]: 3.96976e-06 [a_2]: 0.00442621 [accelerated_algorithm]: 0.00016439 [shard]: 2.2999e-06 [meta_shard_fg_expand]: 7.54301e-05 [shard_inline]: 0.00014097 [auto_parallel]: 0.00011679 [parallel]: 1.40099e-05 [flash_sp]: 0.00012519 [merge_comm]: 0.00011224 [allreduce_fusion]: 9.44599e-05 [matmul_add_comm_reduction]: 0.00011547 [allreduce_slice_to_reducescatter]: 6.19795e-07 [virtual_shard_identity]: 0.00014797 [virtual_dataset]: 0.00014037 [get_grad_eliminate_]: 0.00013428 [virtual_output]: 0.00013831 [merge_forward]: 9.42899e-05 [cell_reuse_recompute_pass]: 2.21981e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025055 [before_grad]: 0.00024379 [inplace_validation]: 8.54898e-05 [parallel_renormalize]: 1.00117e-07 [update_top_fg]: 1.05985e-06 [cast_eliminate]: 0.00020997 [meta_fg_expand]: 0.00027988 [inplace_validation_after_expand]: 0.00018051 [flash_sp_send_recv_attached]: 2.17976e-06 [receive_attached]: 1.75973e-06 [after_resolve]: 0.00016311 [a_after_grad]: 0.00023176 [special_op_eliminate]: 0.0001378 [renormalize]: 0.0181564 [add_forward_monad_depend]: 6.47036e-06 [auto_monad_grad]: 2.86987e-06 [auto_monad_eliminator]: 0.00029224 [cse]: 0.00671884 [a_3]: 0.00096507 [Cycle 3]: 0.0119265, [46] [expand_dump_flag]: 2.52016e-06 [switch_simplify]: 0.00013443 [loop_unroll]: 0.00012997 [a_1]: 0.00430984 [recompute_prepare]: 0.00014014 [updatestate_depend_eliminate]: 0.00015123 [updatestate_assign_eliminate]: 9.62801e-05 [updatestate_loads_eliminate]: 9.31001e-05 [parameter_eliminate]: 3.70992e-06 [a_2]: 0.0020674 [accelerated_algorithm]: 0.00015725 [shard]: 2.57976e-06 [meta_shard_fg_expand]: 5.19501e-05 [shard_inline]: 0.00013475 [auto_parallel]: 0.0001149 [parallel]: 1.18501e-05 [flash_sp]: 2.56998e-06 [merge_comm]: 0.0001098 [allreduce_fusion]: 9.68901e-05 [matmul_add_comm_reduction]: 0.00012139 [allreduce_slice_to_reducescatter]: 3.70201e-07 [virtual_shard_identity]: 0.00014107 [virtual_dataset]: 0.00013286 [get_grad_eliminate_]: 0.00012842 [virtual_output]: 0.00017203 [merge_forward]: 0.00010009 [cell_reuse_recompute_pass]: 3.43006e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024845 [before_grad]: 0.00023689 [inplace_validation]: 9.14e-05 [parallel_renormalize]: 9.96515e-08 [update_top_fg]: 6.9011e-07 [cast_eliminate]: 0.00015026 [meta_fg_expand]: 0.00011407 [inplace_validation_after_expand]: 0.00011877 [flash_sp_send_recv_attached]: 2.21981e-06 [receive_attached]: 1.9297e-06 [after_resolve]: 0.00014877 [a_after_grad]: 0.00022135 [special_op_eliminate]: 0.00013257 [renormalize]: 9.96515e-08 [add_forward_monad_depend]: 2.96999e-06 [auto_monad_grad]: 2.22027e-06 [auto_monad_eliminator]: 0.00017736 [cse]: 0.00041034 [a_3]: 0.0009474 [py_interpret_to_execute_after_opt_a]: 0.00014623 [slice_cell_reuse_recomputed_activation]: 2.50014e-06 [rewriter_after_opt_a]: 0.00104867 [convert_after_rewriter]: 0.00011831 [order_py_execute_after_rewriter]: 8.23704e-05 [opt_b]: 0.00401494, [1] [Cycle 1]: 0.00400659, [7] [b_1]: 0.00306905 [b_2]: 0.00020151 [updatestate_depend_eliminate]: 9.797e-05 [updatestate_assign_eliminate]: 8.71299e-05 [updatestate_loads_eliminate]: 9.043e-05 [renormalize]: 5.40167e-07 [cse]: 0.00039875 [optimize_parallel_all_gather_comm]: 0.00013656 [overlap_param_gather]: 1.70013e-06 [cconv]: 7.30399e-05 [loop_unroll]: 0.00101337 [opt_after_cconv]: 0.00156616, [1] [Cycle 1]: 0.00155894, [7] [c_1]: 0.00078646 [parameter_eliminate]: 2.90992e-06 [updatestate_depend_eliminate]: 0.0001309 [updatestate_assign_eliminate]: 9.441e-05 [updatestate_loads_eliminate]: 9.20999e-05 [cse]: 0.00039452 [renormalize]: 5.49946e-07 [remove_dup_value]: 0.00066265 [tuple_transform]: 0.00095072, [1] [Cycle 1]: 0.00094379, [2] [d_1]: 0.00092556 [renormalize]: 3.39933e-07 [partial_unused_args_eliminate]: 3.56976e-06 [add_cache_embedding]: 0.00015405 [add_recomputation]: 0.00071991 [cse_after_recomputation]: 0.00030767, [1] [Cycle 1]: 0.00029993, [1] [cse]: 0.00028643 [environ_conv]: 9.69502e-05 [swap_dp_allreduce_reducescatter]: 0.00012818 [bias_add_comm_swap]: 2.59001e-06 [label_micro_interleaved_index]: 1.8701e-06 [label_fine_grained_interleaved_index]: 0.00052147 [merge_cast_opt]: 1.4999e-06 [slice_recompute_activation]: 0.00014766 [micro_interleaved_order_control]: 1.70013e-06 [assign_add_opt]: 0.00039388 [ForceFp32Comm]: 1.29035e-06 [remove_cast_before_assign_add]: 0.00011841 [full_micro_interleaved_order_control]: 2.42004e-06 [reorder_send_recv_between_fp_bp]: 1.69966e-06 [comm_op_add_attrs]: 0.00015149 [add_comm_op_reuse_tag]: 0.00016243 [interleave_split_concat_branches]: 1.39e-06 [interleave_parallel_branches]: 8.60076e-07 [overlap_opt_shard_in_pipeline]: 3.16901e-05 [overlap_opt_shard_grad_in_pipeline]: 3.64985e-06 [control_data_broadcast_order]: 1.22003e-06 [grouped_pairwise_exchange_alltoall]: 1.12201e-05 [offloading_packed_experts]: 2.51969e-06 [overlap_recompute_and_grad_model_parallel]: 1.97999e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.10251e-07 [overlap_recompute_allgather_and_fa_grad]: 8.69101e-05 [overlap_grad_ring_attention]: 0.00014595 [overlap_grad_flash_sp]: 0.000121 [begin_end_overlap_inline]: 8.19564e-07 [split_matmul_comm_elemetwise]: 2.08011e-06 [split_layernorm_comm]: 2.19001e-06 [handle_group_info]: 5.89993e-06 [symbol_engine_optimizer]: 0.00081159, [1] [Cycle 1]: 0.00080446, [6] [build]: 5.48502e-05 [elim_shapecalc]: 0.00014441 [elim_not_effective]: 0.00022301 [opt_reshape]: 0.00013163 [fold_const_symbol]: 0.00021107 [renormalize]: 3.39933e-07 [pipeline_parallel_scheduler]: 4.04986e-06 [auto_monad_reorder]: 0.00033559 [get_jit_bprop_graph]: 5.29923e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00140001 [distribtued_split]: 0.00039009 [validate]: 0.00035635 [task_emit]: 13.0268 [execute]: 1.19498e-05 Sums bootstrap : 0.001239s : 0.01% type_inference : 0.751971s : 5.21% auto_monad : 0.002077s : 0.01% graph_reusing : 0.000025s : 0.00% inline.rewriter_before_opt_a : 0.001505s : 0.01% inline.a1a2.expand_dump_flag : 0.000056s : 0.00% inline.a1a2.switch_simplify : 0.001364s : 0.01% inline.a1a2.loop_unroll : 0.000751s : 0.01% inline.a1a2.a_1 : 0.025687s : 0.18% inline.a1a2.recompute_prepare : 0.000265s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000580s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000178s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000279s : 0.00% inline.a1a2.parameter_eliminate : 0.000012s : 0.00% inline.a1a2.a_2 : 0.005036s : 0.03% inline.a1a2.parallel_inline_pass : 0.000208s : 0.00% parallel-infer-symbol : 0.000201s : 0.00% pre_auto_parallel : 0.000097s : 0.00% insert-virtual-dataset : 0.001370s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000147s : 0.00% pipeline_split : 0.000099s : 0.00% optimize.py_interpret_to_execute : 0.000150s : 0.00% optimize.rewriter_before_opt_a : 0.000278s : 0.00% optimize.opt_a.expand_dump_flag : 0.000056s : 0.00% optimize.opt_a.switch_simplify : 0.002072s : 0.01% optimize.opt_a.loop_unroll : 0.001778s : 0.01% optimize.opt_a.a_1 : 0.039118s : 0.27% optimize.opt_a.recompute_prepare : 0.000419s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000489s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000266s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000329s : 0.00% optimize.opt_a.parameter_eliminate : 0.000013s : 0.00% optimize.opt_a.a_2 : 0.008169s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000628s : 0.00% optimize.opt_a.shard : 0.000008s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000175s : 0.00% optimize.opt_a.shard_inline : 0.000385s : 0.00% optimize.opt_a.auto_parallel : 0.000314s : 0.00% optimize.opt_a.parallel : 0.016555s : 0.11% optimize.opt_a.flash_sp : 0.000200s : 0.00% optimize.opt_a.merge_comm : 0.000354s : 0.00% optimize.opt_a.allreduce_fusion : 0.000266s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000338s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000420s : 0.00% optimize.opt_a.virtual_dataset : 0.000449s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000381s : 0.00% optimize.opt_a.virtual_output : 0.000426s : 0.00% optimize.opt_a.merge_forward : 0.000273s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000010s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000711s : 0.00% optimize.opt_a.before_grad : 0.000692s : 0.00% optimize.opt_a.inplace_validation : 0.000308s : 0.00% optimize.opt_a.parallel_renormalize : 0.022922s : 0.16% optimize.opt_a.update_top_fg : 0.000003s : 0.00% optimize.opt_a.cast_eliminate : 0.000515s : 0.00% optimize.opt_a.meta_fg_expand : 0.263777s : 1.83% optimize.opt_a.inplace_validation_after_expand : 0.001858s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001240s : 0.01% optimize.opt_a.receive_attached : 0.000092s : 0.00% optimize.opt_a.after_resolve : 0.002267s : 0.02% optimize.opt_a.a_after_grad : 0.004321s : 0.03% optimize.opt_a.special_op_eliminate : 0.002167s : 0.02% optimize.opt_a.renormalize : 0.167536s : 1.16% optimize.opt_a.add_forward_monad_depend : 0.000377s : 0.00% optimize.opt_a.auto_monad_grad : 0.000224s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002311s : 0.02% optimize.opt_a.cse : 0.011342s : 0.08% optimize.opt_a.a_3 : 0.026408s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000146s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001049s : 0.01% optimize.convert_after_rewriter : 0.000118s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.003069s : 0.02% optimize.opt_b.b_2 : 0.000202s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000098s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000087s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000399s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000137s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000073s : 0.00% optimize.loop_unroll : 0.001013s : 0.01% optimize.opt_after_cconv.c_1 : 0.000786s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000131s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.cse : 0.000395s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000663s : 0.00% optimize.tuple_transform.d_1 : 0.000926s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000154s : 0.00% optimize.add_recomputation : 0.000720s : 0.00% optimize.cse_after_recomputation.cse : 0.000286s : 0.00% optimize.environ_conv : 0.000097s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000128s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000521s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000148s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000394s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000118s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000151s : 0.00% optimize.add_comm_op_reuse_tag : 0.000162s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000032s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000087s : 0.00% optimize.overlap_grad_ring_attention : 0.000146s : 0.00% optimize.overlap_grad_flash_sp : 0.000121s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000055s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000144s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000223s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000132s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000211s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000336s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001400s : 0.01% distribtued_split : 0.000390s : 0.00% validate : 0.000356s : 0.00% task_emit : 13.026795s : 90.34% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.049007 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.05% : 0.000023s : 9: substitution.addn_check_dump 0.12% : 0.000056s : 7: substitution.addn_zero_filter 0.03% : 0.000017s : 7: substitution.adjust_all_reduce_mul_add 0.69% : 0.000336s : 71: substitution.arithmetic_simplify 0.12% : 0.000057s : 10: substitution.cast_eliminate 0.12% : 0.000057s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.21% : 0.000103s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.07% : 0.000032s : 27: substitution.environ_get_eliminate 0.07% : 0.000037s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000021s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.03% : 0.000014s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 62.96% : 0.030853s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000084s : 126: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000006s : 8: substitution.incorporate_call_switch 24.87% : 0.012187s : 331: substitution.inline 1.36% : 0.000667s : 112: substitution.inline_without_move 0.26% : 0.000125s : 309: substitution.j_node_and_user_rematch 0.37% : 0.000183s : 40: substitution.less_batch_normalization 0.09% : 0.000046s : 90: substitution.load_eliminater 0.10% : 0.000051s : 10: substitution.merge_addn 0.28% : 0.000136s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.09% : 0.000046s : 1: substitution.partial_defer_inline 0.12% : 0.000061s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000035s : 15: substitution.reduce_eliminate 0.33% : 0.000159s : 309: substitution.remove_not_recompute_node 2.01% : 0.000984s : 508: substitution.replace_applicator 0.22% : 0.000108s : 251: substitution.replace_old_param 0.08% : 0.000040s : 11: substitution.reshape_eliminate 0.03% : 0.000014s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.19% : 0.000092s : 34: substitution.switch_simplify 0.06% : 0.000030s : 11: substitution.tile_eliminate 0.53% : 0.000260s : 101: substitution.tuple_list_convert_item_index_to_positive 0.28% : 0.000136s : 107: substitution.tuple_list_get_item_const_eliminator 0.44% : 0.000218s : 107: substitution.tuple_list_get_item_depend_reorder 1.64% : 0.000803s : 308: substitution.tuple_list_get_item_eliminator 0.40% : 0.000195s : 107: substitution.tuple_list_get_set_item_eliminator 0.41% : 0.000202s : 210: substitution.updatestate_pure_node_eliminater 0.72% : 0.000352s : 265: substitution.updatestate_useless_node_eliminater 0.03% : 0.000013s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.751472 2 96.27% : 0.723444s : 1: type_inference.infer 3.73% : 0.028028s : 1: type_inference.specialize ------[replace.] 0.010010 775 0.41% : 0.000041s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.56% : 0.000056s : 7: replace.depend_value_elim 0.40% : 0.000040s : 3: replace.environ_get_set_eliminate 30.52% : 0.003055s : 183: replace.getattr_setattr_resolve 29.70% : 0.002973s : 310: replace.inline 0.22% : 0.000022s : 1: replace.merge_addn 1.19% : 0.000119s : 7: replace.partial_eliminate 3.82% : 0.000383s : 25: replace.replace_applicator 4.24% : 0.000425s : 34: replace.switch_simplify 0.51% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 27.97% : 0.002800s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.21% : 0.000021s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041438 775 0.04% : 0.000018s : 5: match.ad_related_special_op_eliminate 0.04% : 0.000016s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 69.30% : 0.028714s : 183: match.getattr_setattr_resolve 28.86% : 0.011958s : 310: match.inline 0.06% : 0.000024s : 1: match.merge_addn 0.10% : 0.000041s : 7: match.partial_eliminate 0.23% : 0.000096s : 25: match.replace_applicator 0.17% : 0.000072s : 34: match.switch_simplify 0.08% : 0.000035s : 6: match.tuple_list_get_item_depend_reorder 1.02% : 0.000424s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000012s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020824131318 0.77% : 0.000160s : 1198: predicate.accumulaten_eliminater 0.26% : 0.000055s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000114s : 835: predicate.addn_check_dump 0.77% : 0.000160s : 1198: predicate.addn_zero_filter 0.75% : 0.000156s : 1198: predicate.adjust_all_reduce_mul_add 1.73% : 0.000360s : 2034: predicate.arithmetic_simplify 1.11% : 0.000230s : 1586: predicate.cast_eliminate 3.14% : 0.000655s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.14% : 0.000236s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000118s : 838: predicate.depend_value_elim 0.84% : 0.000175s : 1202: predicate.dict_get_item_const_eliminator 0.86% : 0.000180s : 1202: predicate.dict_get_item_eliminator 0.83% : 0.000172s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.83% : 0.000173s : 1334: predicate.environ_add_const_eliminate 0.87% : 0.000180s : 1337: predicate.environ_get_add_eliminate 0.84% : 0.000174s : 1334: predicate.environ_get_depend_swap 1.44% : 0.000299s : 2172: predicate.environ_get_eliminate 0.88% : 0.000183s : 1337: predicate.environ_get_set_eliminate 1.16% : 0.000242s : 1717: predicate.exchange_switch_depend_value 1.37% : 0.000286s : 1717: predicate.float_depend_g_call 0.56% : 0.000116s : 835: predicate.float_environ_get_switch 0.64% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000059s : 395: predicate.get_grad_eliminate 2.34% : 0.000488s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.54% : 0.000113s : 835: predicate.incorporate_call 0.54% : 0.000112s : 835: predicate.incorporate_call_switch 3.87% : 0.000805s : 4602: predicate.inline 2.42% : 0.000505s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.33% : 0.000068s : 388: predicate.less_batch_normalization 1.11% : 0.000230s : 1660: predicate.list_to_tuple_eliminator_ 2.16% : 0.000449s : 2874: predicate.load_eliminater 0.19% : 0.000039s : 135: predicate.loop_unroll_after_grad 2.27% : 0.000473s : 2640: predicate.loop_unroll_before_grad 0.97% : 0.000202s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000117s : 837: predicate.merge_addn 3.22% : 0.000671s : 3380: predicate.micro_step_allgather_replace 3.04% : 0.000633s : 3380: predicate.mini_step_allgather_replace 0.78% : 0.000163s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.00% : 0.000416s : 1717: predicate.partial_defer_inline 1.09% : 0.000227s : 1541: predicate.partial_eliminate 0.76% : 0.000158s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000116s : 824: predicate.reduce_all_const_elim 0.96% : 0.000199s : 1199: predicate.reduce_eliminate 0.14% : 0.000029s : 395: predicate.remove_not_recompute_node 1.93% : 0.000402s : 4829: predicate.replace_applicator 0.79% : 0.000165s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.80% : 0.000166s : 1199: predicate.reshape_eliminate 3.11% : 0.000647s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000023s : 135: predicate.row_tensor_eliminate 3.23% : 0.000674s : 3484: predicate.same_eliminate 0.24% : 0.000050s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000062s : 395: predicate.shard_identity_eliminate 2.36% : 0.000492s : 2338: predicate.special_op_eliminate 0.63% : 0.000131s : 837: predicate.specialize_transform 3.36% : 0.000701s : 3380: predicate.split_environ_get_set_with_tuple_value 1.57% : 0.000327s : 2203: predicate.stack_unstack_eliminate 1.82% : 0.000380s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.24% : 0.000258s : 1717: predicate.switch_defer_inline 4.32% : 0.000899s : 5201: predicate.switch_layer_defer_inline 4.57% : 0.000952s : 5262: predicate.switch_simplify 0.80% : 0.000167s : 1199: predicate.tile_eliminate 0.77% : 0.000160s : 1199: predicate.transpose_eliminate 1.10% : 0.000230s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.19% : 0.000248s : 1469: predicate.tuple_list_get_item_const_eliminator 0.94% : 0.000195s : 1469: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000392s : 2495: predicate.tuple_list_get_item_eliminator 1.00% : 0.000209s : 1469: predicate.tuple_list_get_set_item_eliminator 1.62% : 0.000337s : 2304: predicate.tuple_list_set_item_eliminator 1.09% : 0.000227s : 1660: predicate.tuple_to_list_eliminator_ 1.85% : 0.000385s : 2874: predicate.updatestate_pure_node_eliminater 2.72% : 0.000567s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.29% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.46% : 0.000095s : 395: predicate.virtual_output_eliminate 0.11% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.062082 747 68.40% : 0.042467s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.53% : 0.001572s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.06% : 0.018044s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.433116 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.28% : 0.042849s : 1: a1a2 0.00% : 0.000162s : 1: add_cache_embedding 0.00% : 0.000170s : 1: add_comm_op_reuse_tag 0.00% : 0.000732s : 1: add_recomputation 0.00% : 0.000403s : 1: assign_add_opt 0.01% : 0.002108s : 1: auto_monad 0.00% : 0.000350s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001281s : 1: bootstrap 0.00% : 0.000080s : 1: cconv 0.00% : 0.000159s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000127s : 1: convert_after_rewriter 0.00% : 0.000313s : 1: cse_after_recomputation 0.00% : 0.000157s : 1: dataset_repeat_opt 0.00% : 0.000405s : 1: distribtued_split 0.01% : 0.001415s : 1: eliminate_special_op_node 0.00% : 0.000106s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000009s : 1: handle_group_info 0.29% : 0.044444s : 1: inline 0.01% : 0.001393s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000531s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001024s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.21% : 0.033177s : 61: opt.transform.a1a2 0.00% : 0.000176s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.091073s : 148: opt.transform.opt_a 0.01% : 0.000784s : 1: opt.transform.opt_after_cconv 0.02% : 0.003239s : 27: opt.transform.opt_b 0.24% : 0.036854s : 16: opt.transform.opt_resolve 0.01% : 0.000923s : 1: opt.transform.opt_trans_graph 0.01% : 0.000843s : 6: opt.transform.special_op_eliminate 0.00% : 0.000704s : 4: opt.transform.symbol_engine_opt 3.79% : 0.584322s : 1: opt_a 0.01% : 0.001572s : 1: opt_after_cconv 0.03% : 0.004020s : 1: opt_b 3.88% : 0.599213s : 1: optimize 0.00% : 0.000145s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000126s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000152s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000037s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000093s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000213s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000109s : 1: pipeline_split 0.00% : 0.000106s : 1: pre_auto_parallel 0.00% : 0.000159s : 1: py_interpret_to_execute 0.00% : 0.000155s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000125s : 1: remove_cast_before_assign_add 0.00% : 0.000677s : 1: remove_dup_value 0.88% : 0.135289s : 3: renormalize.infer 0.36% : 0.055120s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001060s : 1: rewriter_after_opt_a 0.01% : 0.001812s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000154s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000135s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000816s : 1: symbol_engine_optimizer 84.41% : 13.026840s : 1: task_emit 0.01% : 0.000955s : 1: tuple_transform 4.87% : 0.752018s : 1: type_inference 0.01% : 0.001412s : 1: validate TotalTime = 14.4614, [21] [bootstrap]: 0.0012779 [type_inference]: 0.767029 [auto_monad]: 0.00204496 [graph_reusing]: 2.25399e-05 [inline]: 0.0449665, [2] [rewriter_before_opt_a]: 0.00155611 [a1a2]: 0.0433059, [2] [Cycle 1]: 0.0293003, [11] [expand_dump_flag]: 4.57503e-05 [switch_simplify]: 0.00119529 [loop_unroll]: 0.00066598 [a_1]: 0.0226652 [recompute_prepare]: 0.00016375 [updatestate_depend_eliminate]: 0.00037427 [updatestate_assign_eliminate]: 0.00011437 [updatestate_loads_eliminate]: 0.00020611 [parameter_eliminate]: 7.10972e-06 [a_2]: 0.00352463 [parallel_inline_pass]: 0.0001057 [Cycle 2]: 0.00557607, [11] [expand_dump_flag]: 3.11993e-06 [switch_simplify]: 9.62e-05 [loop_unroll]: 9.34601e-05 [a_1]: 0.00321767 [recompute_prepare]: 0.00010354 [updatestate_depend_eliminate]: 0.00020119 [updatestate_assign_eliminate]: 6.617e-05 [updatestate_loads_eliminate]: 6.59898e-05 [parameter_eliminate]: 4.31016e-06 [a_2]: 0.0015297 [parallel_inline_pass]: 0.00010817 [parallel-infer-symbol]: 0.00018446 [pre_auto_parallel]: 9.80501e-05 [insert-virtual-dataset]: 0.00143883 [parallel-infer-symbol-second]: 2.82004e-06 [dataset_repeat_opt]: 0.00011872 [pipeline_split]: 0.0001113 [optimize]: 0.602312, [52] [py_interpret_to_execute]: 0.0001576 [rewriter_before_opt_a]: 0.00028525 [opt_a]: 0.587385, [3] [Cycle 1]: 0.504585, [46] [expand_dump_flag]: 3.15998e-06 [switch_simplify]: 0.00012612 [loop_unroll]: 0.00010942 [a_1]: 0.00342302 [recompute_prepare]: 0.00010762 [updatestate_depend_eliminate]: 0.00010612 [updatestate_assign_eliminate]: 6.54799e-05 [updatestate_loads_eliminate]: 7.08997e-05 [parameter_eliminate]: 5.15999e-06 [a_2]: 0.00171747 [accelerated_algorithm]: 0.0003136 [shard]: 2.79024e-06 [meta_shard_fg_expand]: 4.73699e-05 [shard_inline]: 0.00011143 [auto_parallel]: 8.765e-05 [parallel]: 0.0170631 [flash_sp]: 7.02301e-05 [merge_comm]: 0.00013144 [allreduce_fusion]: 7.49999e-05 [matmul_add_comm_reduction]: 0.00010329 [allreduce_slice_to_reducescatter]: 4.49829e-07 [virtual_shard_identity]: 0.0001367 [virtual_dataset]: 0.00017613 [get_grad_eliminate_]: 0.00011736 [virtual_output]: 0.00011609 [merge_forward]: 7.96001e-05 [cell_reuse_recompute_pass]: 4.24031e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021883 [before_grad]: 0.00021151 [inplace_validation]: 0.00012638 [parallel_renormalize]: 0.0229999 [update_top_fg]: 9.69972e-07 [cast_eliminate]: 0.00015406 [meta_fg_expand]: 0.26373 [inplace_validation_after_expand]: 0.00155673 [flash_sp_send_recv_attached]: 0.00117458 [receive_attached]: 9.502e-05 [after_resolve]: 0.00198029 [a_after_grad]: 0.00388119 [special_op_eliminate]: 0.00186621 [renormalize]: 0.150934 [add_forward_monad_depend]: 0.00036187 [auto_monad_grad]: 0.00021779 [auto_monad_eliminator]: 0.00178324 [cse]: 0.00416579 [a_3]: 0.0241269 [Cycle 2]: 0.0707474, [46] [expand_dump_flag]: 5.46696e-05 [switch_simplify]: 0.00186959 [loop_unroll]: 0.0015004 [a_1]: 0.0317708 [recompute_prepare]: 0.00017402 [updatestate_depend_eliminate]: 0.00023315 [updatestate_assign_eliminate]: 0.0001038 [updatestate_loads_eliminate]: 0.0001704 [parameter_eliminate]: 4.86011e-06 [a_2]: 0.00439536 [accelerated_algorithm]: 0.00016609 [shard]: 2.90014e-06 [meta_shard_fg_expand]: 9.35299e-05 [shard_inline]: 0.00014456 [auto_parallel]: 0.00012489 [parallel]: 1.46599e-05 [flash_sp]: 0.0001305 [merge_comm]: 0.00011461 [allreduce_fusion]: 9.65199e-05 [matmul_add_comm_reduction]: 0.00011945 [allreduce_slice_to_reducescatter]: 5.99772e-07 [virtual_shard_identity]: 0.00014515 [virtual_dataset]: 0.0001397 [get_grad_eliminate_]: 0.00013543 [virtual_output]: 0.00013817 [merge_forward]: 9.41702e-05 [cell_reuse_recompute_pass]: 2.96999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024879 [before_grad]: 0.00024178 [inplace_validation]: 8.843e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 7.20378e-07 [cast_eliminate]: 0.00015559 [meta_fg_expand]: 0.0002823 [inplace_validation_after_expand]: 0.00017704 [flash_sp_send_recv_attached]: 2.1602e-06 [receive_attached]: 1.55019e-06 [after_resolve]: 0.00016267 [a_after_grad]: 0.00023344 [special_op_eliminate]: 0.00013691 [renormalize]: 0.0184923 [add_forward_monad_depend]: 6.21006e-06 [auto_monad_grad]: 3.22983e-06 [auto_monad_eliminator]: 0.0002933 [cse]: 0.00686854 [a_3]: 0.00097176 [Cycle 3]: 0.0120245, [46] [expand_dump_flag]: 3.77977e-06 [switch_simplify]: 0.00013709 [loop_unroll]: 0.00013179 [a_1]: 0.00437942 [recompute_prepare]: 0.00013793 [updatestate_depend_eliminate]: 0.00015583 [updatestate_assign_eliminate]: 9.55299e-05 [updatestate_loads_eliminate]: 9.422e-05 [parameter_eliminate]: 4.35999e-06 [a_2]: 0.00212639 [accelerated_algorithm]: 0.00016021 [shard]: 2.17976e-06 [meta_shard_fg_expand]: 5.29401e-05 [shard_inline]: 0.00013548 [auto_parallel]: 0.00011806 [parallel]: 1.20699e-05 [flash_sp]: 2.61981e-06 [merge_comm]: 0.00010792 [allreduce_fusion]: 9.62601e-05 [matmul_add_comm_reduction]: 0.00012167 [allreduce_slice_to_reducescatter]: 6.40284e-07 [virtual_shard_identity]: 0.00014172 [virtual_dataset]: 0.00013382 [get_grad_eliminate_]: 0.00013047 [virtual_output]: 0.00013336 [merge_forward]: 9.561e-05 [cell_reuse_recompute_pass]: 3.6899e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0002488 [before_grad]: 0.0002354 [inplace_validation]: 8.94102e-05 [parallel_renormalize]: 8.98726e-08 [update_top_fg]: 6.10016e-07 [cast_eliminate]: 0.00015178 [meta_fg_expand]: 0.0001117 [inplace_validation_after_expand]: 0.00012558 [flash_sp_send_recv_attached]: 2.01026e-06 [receive_attached]: 1.85985e-06 [after_resolve]: 0.00015572 [a_after_grad]: 0.00022218 [special_op_eliminate]: 0.00013271 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 3.11993e-06 [auto_monad_grad]: 2.50991e-06 [auto_monad_eliminator]: 0.0001781 [cse]: 0.00040018 [a_3]: 0.00094338 [py_interpret_to_execute_after_opt_a]: 0.00014616 [slice_cell_reuse_recomputed_activation]: 2.23005e-06 [rewriter_after_opt_a]: 0.00104631 [convert_after_rewriter]: 0.00011892 [order_py_execute_after_rewriter]: 8.18898e-05 [opt_b]: 0.00403663, [1] [Cycle 1]: 0.00402802, [7] [b_1]: 0.00315087 [b_2]: 0.00013955 [updatestate_depend_eliminate]: 9.993e-05 [updatestate_assign_eliminate]: 8.95998e-05 [updatestate_loads_eliminate]: 9.20999e-05 [renormalize]: 4.89876e-07 [cse]: 0.00039709 [optimize_parallel_all_gather_comm]: 0.00014264 [overlap_param_gather]: 1.46963e-06 [cconv]: 7.312e-05 [loop_unroll]: 0.00094331 [opt_after_cconv]: 0.00156833, [1] [Cycle 1]: 0.00156074, [7] [c_1]: 0.00079313 [parameter_eliminate]: 3.5502e-06 [updatestate_depend_eliminate]: 0.00013016 [updatestate_assign_eliminate]: 9.24901e-05 [updatestate_loads_eliminate]: 9.34699e-05 [cse]: 0.00039087 [renormalize]: 5.20144e-07 [remove_dup_value]: 0.00061031 [tuple_transform]: 0.00100174, [1] [Cycle 1]: 0.00099444, [2] [d_1]: 0.00097544 [renormalize]: 4.00003e-07 [partial_unused_args_eliminate]: 3.46033e-06 [add_cache_embedding]: 0.00016002 [add_recomputation]: 0.00072124 [cse_after_recomputation]: 0.00030992, [1] [Cycle 1]: 0.00030106, [1] [cse]: 0.00028743 [environ_conv]: 9.33502e-05 [swap_dp_allreduce_reducescatter]: 0.00012817 [bias_add_comm_swap]: 2.50991e-06 [label_micro_interleaved_index]: 2.12993e-06 [label_fine_grained_interleaved_index]: 0.00055019 [merge_cast_opt]: 1.68988e-06 [slice_recompute_activation]: 0.00014834 [micro_interleaved_order_control]: 2.09967e-06 [assign_add_opt]: 0.00040108 [ForceFp32Comm]: 1.43982e-06 [remove_cast_before_assign_add]: 0.00010723 [full_micro_interleaved_order_control]: 2.88012e-06 [reorder_send_recv_between_fp_bp]: 1.95019e-06 [comm_op_add_attrs]: 0.00015091 [add_comm_op_reuse_tag]: 0.00014824 [interleave_split_concat_branches]: 9.80217e-07 [interleave_parallel_branches]: 8.99658e-07 [overlap_opt_shard_in_pipeline]: 3.31597e-05 [overlap_opt_shard_grad_in_pipeline]: 3.51993e-06 [control_data_broadcast_order]: 1.19023e-06 [grouped_pairwise_exchange_alltoall]: 1.23102e-05 [offloading_packed_experts]: 2.72971e-06 [overlap_recompute_and_grad_model_parallel]: 2.0396e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.0496e-06 [overlap_recompute_allgather_and_fa_grad]: 0.00010269 [overlap_grad_ring_attention]: 0.00014806 [overlap_grad_flash_sp]: 0.00012336 [begin_end_overlap_inline]: 8.2003e-07 [split_matmul_comm_elemetwise]: 2.35997e-06 [split_layernorm_comm]: 2.46987e-06 [handle_group_info]: 7.07014e-06 [symbol_engine_optimizer]: 0.0008546, [1] [Cycle 1]: 0.00084725, [6] [build]: 5.28898e-05 [elim_shapecalc]: 0.00014782 [elim_not_effective]: 0.00025399 [opt_reshape]: 0.00013617 [fold_const_symbol]: 0.00021366 [renormalize]: 7.59959e-07 [pipeline_parallel_scheduler]: 3.59025e-06 [auto_monad_reorder]: 0.00034847 [get_jit_bprop_graph]: 6.19795e-07 [rewriter_after_jit_bprop_graph]: 7.20378e-07 [eliminate_special_op_node]: 0.00141504 [distribtued_split]: 0.00042183 [validate]: 0.00028879 [task_emit]: 13.0377 [execute]: 1.22404e-05 Sums bootstrap : 0.001278s : 0.01% type_inference : 0.767029s : 5.31% auto_monad : 0.002045s : 0.01% graph_reusing : 0.000023s : 0.00% inline.rewriter_before_opt_a : 0.001556s : 0.01% inline.a1a2.expand_dump_flag : 0.000049s : 0.00% inline.a1a2.switch_simplify : 0.001291s : 0.01% inline.a1a2.loop_unroll : 0.000759s : 0.01% inline.a1a2.a_1 : 0.025883s : 0.18% inline.a1a2.recompute_prepare : 0.000267s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000575s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000181s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000272s : 0.00% inline.a1a2.parameter_eliminate : 0.000011s : 0.00% inline.a1a2.a_2 : 0.005054s : 0.03% inline.a1a2.parallel_inline_pass : 0.000214s : 0.00% parallel-infer-symbol : 0.000184s : 0.00% pre_auto_parallel : 0.000098s : 0.00% insert-virtual-dataset : 0.001439s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000119s : 0.00% pipeline_split : 0.000111s : 0.00% optimize.py_interpret_to_execute : 0.000158s : 0.00% optimize.rewriter_before_opt_a : 0.000285s : 0.00% optimize.opt_a.expand_dump_flag : 0.000062s : 0.00% optimize.opt_a.switch_simplify : 0.002133s : 0.01% optimize.opt_a.loop_unroll : 0.001742s : 0.01% optimize.opt_a.a_1 : 0.039573s : 0.27% optimize.opt_a.recompute_prepare : 0.000420s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000495s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000265s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000336s : 0.00% optimize.opt_a.parameter_eliminate : 0.000014s : 0.00% optimize.opt_a.a_2 : 0.008239s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000640s : 0.00% optimize.opt_a.shard : 0.000008s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000194s : 0.00% optimize.opt_a.shard_inline : 0.000391s : 0.00% optimize.opt_a.auto_parallel : 0.000331s : 0.00% optimize.opt_a.parallel : 0.017090s : 0.12% optimize.opt_a.flash_sp : 0.000203s : 0.00% optimize.opt_a.merge_comm : 0.000354s : 0.00% optimize.opt_a.allreduce_fusion : 0.000268s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000344s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000424s : 0.00% optimize.opt_a.virtual_dataset : 0.000450s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000383s : 0.00% optimize.opt_a.virtual_output : 0.000388s : 0.00% optimize.opt_a.merge_forward : 0.000269s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000011s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000716s : 0.00% optimize.opt_a.before_grad : 0.000689s : 0.00% optimize.opt_a.inplace_validation : 0.000304s : 0.00% optimize.opt_a.parallel_renormalize : 0.023000s : 0.16% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000461s : 0.00% optimize.opt_a.meta_fg_expand : 0.264124s : 1.83% optimize.opt_a.inplace_validation_after_expand : 0.001859s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001179s : 0.01% optimize.opt_a.receive_attached : 0.000098s : 0.00% optimize.opt_a.after_resolve : 0.002299s : 0.02% optimize.opt_a.a_after_grad : 0.004337s : 0.03% optimize.opt_a.special_op_eliminate : 0.002136s : 0.01% optimize.opt_a.renormalize : 0.169426s : 1.17% optimize.opt_a.add_forward_monad_depend : 0.000371s : 0.00% optimize.opt_a.auto_monad_grad : 0.000224s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002255s : 0.02% optimize.opt_a.cse : 0.011435s : 0.08% optimize.opt_a.a_3 : 0.026042s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000146s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001046s : 0.01% optimize.convert_after_rewriter : 0.000119s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.003151s : 0.02% optimize.opt_b.b_2 : 0.000140s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000100s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000090s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000397s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000143s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000073s : 0.00% optimize.loop_unroll : 0.000943s : 0.01% optimize.opt_after_cconv.c_1 : 0.000793s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000130s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000093s : 0.00% optimize.opt_after_cconv.cse : 0.000391s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000610s : 0.00% optimize.tuple_transform.d_1 : 0.000975s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000160s : 0.00% optimize.add_recomputation : 0.000721s : 0.00% optimize.cse_after_recomputation.cse : 0.000287s : 0.00% optimize.environ_conv : 0.000093s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000128s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000550s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000148s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000401s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000107s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000151s : 0.00% optimize.add_comm_op_reuse_tag : 0.000148s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000033s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000012s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000103s : 0.00% optimize.overlap_grad_ring_attention : 0.000148s : 0.00% optimize.overlap_grad_flash_sp : 0.000123s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000053s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000148s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000254s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000136s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000214s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000348s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.001415s : 0.01% distribtued_split : 0.000422s : 0.00% validate : 0.000289s : 0.00% task_emit : 13.037678s : 90.23% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.049602 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.05% : 0.000023s : 9: substitution.addn_check_dump 0.11% : 0.000055s : 7: substitution.addn_zero_filter 0.03% : 0.000017s : 7: substitution.adjust_all_reduce_mul_add 0.70% : 0.000345s : 71: substitution.arithmetic_simplify 0.12% : 0.000058s : 10: substitution.cast_eliminate 0.12% : 0.000058s : 47: substitution.depend_value_elim 0.11% : 0.000055s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.09% : 0.000042s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000021s : 12: substitution.environ_get_depend_swap 0.06% : 0.000031s : 27: substitution.environ_get_eliminate 0.08% : 0.000039s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000022s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.03% : 0.000014s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 63.31% : 0.031405s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000083s : 126: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000006s : 8: substitution.incorporate_call_switch 24.51% : 0.012158s : 331: substitution.inline 1.39% : 0.000689s : 112: substitution.inline_without_move 0.26% : 0.000127s : 309: substitution.j_node_and_user_rematch 0.38% : 0.000187s : 40: substitution.less_batch_normalization 0.10% : 0.000050s : 90: substitution.load_eliminater 0.11% : 0.000056s : 10: substitution.merge_addn 0.23% : 0.000114s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.08% : 0.000041s : 1: substitution.partial_defer_inline 0.13% : 0.000064s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000036s : 15: substitution.reduce_eliminate 0.32% : 0.000159s : 309: substitution.remove_not_recompute_node 1.98% : 0.000984s : 508: substitution.replace_applicator 0.23% : 0.000113s : 251: substitution.replace_old_param 0.08% : 0.000041s : 11: substitution.reshape_eliminate 0.03% : 0.000016s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000012s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.18% : 0.000088s : 34: substitution.switch_simplify 0.06% : 0.000030s : 11: substitution.tile_eliminate 0.53% : 0.000261s : 101: substitution.tuple_list_convert_item_index_to_positive 0.31% : 0.000153s : 107: substitution.tuple_list_get_item_const_eliminator 0.44% : 0.000217s : 107: substitution.tuple_list_get_item_depend_reorder 1.73% : 0.000861s : 308: substitution.tuple_list_get_item_eliminator 0.37% : 0.000182s : 107: substitution.tuple_list_get_set_item_eliminator 0.41% : 0.000201s : 210: substitution.updatestate_pure_node_eliminater 0.73% : 0.000364s : 265: substitution.updatestate_useless_node_eliminater 0.03% : 0.000014s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.766536 2 96.30% : 0.738138s : 1: type_inference.infer 3.70% : 0.028398s : 1: type_inference.specialize ------[replace.] 0.010019 775 0.44% : 0.000044s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.54% : 0.000054s : 7: replace.depend_value_elim 0.43% : 0.000043s : 3: replace.environ_get_set_eliminate 30.60% : 0.003066s : 183: replace.getattr_setattr_resolve 29.61% : 0.002967s : 310: replace.inline 0.22% : 0.000022s : 1: replace.merge_addn 1.17% : 0.000117s : 7: replace.partial_eliminate 3.97% : 0.000398s : 25: replace.replace_applicator 4.14% : 0.000415s : 34: replace.switch_simplify 0.51% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 27.94% : 0.002799s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.22% : 0.000022s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041842 775 0.04% : 0.000018s : 5: match.ad_related_special_op_eliminate 0.04% : 0.000018s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.05% : 0.000020s : 3: match.environ_get_set_eliminate 69.65% : 0.029142s : 183: match.getattr_setattr_resolve 28.50% : 0.011924s : 310: match.inline 0.07% : 0.000027s : 1: match.merge_addn 0.10% : 0.000043s : 7: match.partial_eliminate 0.24% : 0.000102s : 25: match.replace_applicator 0.16% : 0.000068s : 34: match.switch_simplify 0.08% : 0.000033s : 6: match.tuple_list_get_item_depend_reorder 1.01% : 0.000423s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000010s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000012s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020536131318 0.80% : 0.000164s : 1198: predicate.accumulaten_eliminater 0.28% : 0.000057s : 254: predicate.ad_related_special_op_eliminate 0.57% : 0.000116s : 835: predicate.addn_check_dump 0.78% : 0.000160s : 1198: predicate.addn_zero_filter 0.75% : 0.000153s : 1198: predicate.adjust_all_reduce_mul_add 1.79% : 0.000368s : 2034: predicate.arithmetic_simplify 1.14% : 0.000235s : 1586: predicate.cast_eliminate 3.09% : 0.000635s : 3484: predicate.check_bprop_eliminate 0.57% : 0.000117s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.18% : 0.000037s : 242: predicate.convert_tensor_all_eliminate 1.14% : 0.000235s : 1399: predicate.convert_tensor_eliminate 0.58% : 0.000119s : 838: predicate.depend_value_elim 0.84% : 0.000172s : 1202: predicate.dict_get_item_const_eliminator 0.84% : 0.000173s : 1202: predicate.dict_get_item_eliminator 0.85% : 0.000175s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000021s : 126: predicate.elim_shapecalc_of_broadcastargs 0.89% : 0.000182s : 1334: predicate.environ_add_const_eliminate 0.86% : 0.000176s : 1337: predicate.environ_get_add_eliminate 0.87% : 0.000178s : 1334: predicate.environ_get_depend_swap 1.46% : 0.000300s : 2172: predicate.environ_get_eliminate 0.85% : 0.000175s : 1337: predicate.environ_get_set_eliminate 1.14% : 0.000233s : 1717: predicate.exchange_switch_depend_value 1.40% : 0.000288s : 1717: predicate.float_depend_g_call 0.57% : 0.000117s : 835: predicate.float_environ_get_switch 0.66% : 0.000136s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.29% : 0.000060s : 395: predicate.get_grad_eliminate 2.31% : 0.000475s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.56% : 0.000115s : 835: predicate.incorporate_call 0.55% : 0.000113s : 835: predicate.incorporate_call_switch 4.00% : 0.000821s : 4602: predicate.inline 2.28% : 0.000469s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.34% : 0.000069s : 388: predicate.less_batch_normalization 1.14% : 0.000234s : 1660: predicate.list_to_tuple_eliminator_ 1.90% : 0.000389s : 2874: predicate.load_eliminater 0.19% : 0.000040s : 135: predicate.loop_unroll_after_grad 2.27% : 0.000467s : 2640: predicate.loop_unroll_before_grad 1.01% : 0.000207s : 1478: predicate.make_slice_get_slice_eliminator 0.58% : 0.000119s : 837: predicate.merge_addn 2.99% : 0.000614s : 3380: predicate.micro_step_allgather_replace 3.10% : 0.000636s : 3380: predicate.mini_step_allgather_replace 0.81% : 0.000167s : 1199: predicate.minmaximum_grad 0.18% : 0.000037s : 242: predicate.mutable_eliminate 0.10% : 0.000020s : 126: predicate.opt_reshape 0.11% : 0.000022s : 135: predicate.parallel_virtual_node 2.02% : 0.000414s : 1717: predicate.partial_defer_inline 1.22% : 0.000251s : 1541: predicate.partial_eliminate 0.76% : 0.000156s : 1198: predicate.print_const_string_wrapper 0.64% : 0.000132s : 824: predicate.reduce_all_const_elim 0.97% : 0.000199s : 1199: predicate.reduce_eliminate 0.14% : 0.000029s : 395: predicate.remove_not_recompute_node 2.02% : 0.000415s : 4829: predicate.replace_applicator 0.78% : 0.000161s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.81% : 0.000166s : 1199: predicate.reshape_eliminate 3.04% : 0.000624s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.19% : 0.000656s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000061s : 395: predicate.shard_identity_eliminate 2.09% : 0.000429s : 2338: predicate.special_op_eliminate 0.66% : 0.000135s : 837: predicate.specialize_transform 3.32% : 0.000683s : 3380: predicate.split_environ_get_set_with_tuple_value 1.60% : 0.000328s : 2203: predicate.stack_unstack_eliminate 1.88% : 0.000387s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.26% : 0.000260s : 1717: predicate.switch_defer_inline 4.34% : 0.000892s : 5201: predicate.switch_layer_defer_inline 4.61% : 0.000946s : 5262: predicate.switch_simplify 0.82% : 0.000169s : 1199: predicate.tile_eliminate 0.83% : 0.000171s : 1199: predicate.transpose_eliminate 1.08% : 0.000222s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.11% : 0.000228s : 1469: predicate.tuple_list_get_item_const_eliminator 0.97% : 0.000198s : 1469: predicate.tuple_list_get_item_depend_reorder 1.98% : 0.000407s : 2495: predicate.tuple_list_get_item_eliminator 1.01% : 0.000207s : 1469: predicate.tuple_list_get_set_item_eliminator 1.69% : 0.000346s : 2304: predicate.tuple_list_set_item_eliminator 1.12% : 0.000231s : 1660: predicate.tuple_to_list_eliminator_ 1.90% : 0.000391s : 2874: predicate.updatestate_pure_node_eliminater 2.51% : 0.000515s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.29% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000058s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.061685 747 68.32% : 0.042145s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.49% : 0.001537s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.19% : 0.018003s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.469549 346 0.00% : 0.000006s : 1: ForceFp32Comm 0.28% : 0.043313s : 1: a1a2 0.00% : 0.000166s : 1: add_cache_embedding 0.00% : 0.000156s : 1: add_comm_op_reuse_tag 0.00% : 0.000733s : 1: add_recomputation 0.00% : 0.000410s : 1: assign_add_opt 0.01% : 0.002078s : 1: auto_monad 0.00% : 0.000365s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001326s : 1: bootstrap 0.00% : 0.000079s : 1: cconv 0.00% : 0.000158s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000128s : 1: convert_after_rewriter 0.00% : 0.000315s : 1: cse_after_recomputation 0.00% : 0.000128s : 1: dataset_repeat_opt 0.00% : 0.000438s : 1: distribtued_split 0.01% : 0.001431s : 1: eliminate_special_op_node 0.00% : 0.000103s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000032s : 1: graph_reusing 0.00% : 0.000016s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.29% : 0.044982s : 1: inline 0.01% : 0.001469s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000559s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000955s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.22% : 0.033392s : 61: opt.transform.a1a2 0.00% : 0.000177s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.091199s : 148: opt.transform.opt_a 0.01% : 0.000790s : 1: opt.transform.opt_after_cconv 0.02% : 0.003259s : 27: opt.transform.opt_b 0.24% : 0.037420s : 16: opt.transform.opt_resolve 0.01% : 0.000972s : 1: opt.transform.opt_trans_graph 0.01% : 0.000857s : 6: opt.transform.special_op_eliminate 0.00% : 0.000745s : 4: opt.transform.symbol_engine_opt 3.80% : 0.587393s : 1: opt_a 0.01% : 0.001574s : 1: opt_after_cconv 0.03% : 0.004041s : 1: opt_b 3.89% : 0.602326s : 1: optimize 0.00% : 0.000150s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000128s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000154s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000038s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000108s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000198s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000010s : 1: pipeline_parallel_scheduler 0.00% : 0.000122s : 1: pipeline_split 0.00% : 0.000108s : 1: pre_auto_parallel 0.00% : 0.000166s : 1: py_interpret_to_execute 0.00% : 0.000154s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000113s : 1: remove_cast_before_assign_add 0.00% : 0.000624s : 1: remove_dup_value 0.89% : 0.137570s : 3: renormalize.infer 0.35% : 0.054810s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001058s : 1: rewriter_after_opt_a 0.01% : 0.001872s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000154s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000135s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000859s : 1: symbol_engine_optimizer 84.28% : 13.037733s : 1: task_emit 0.01% : 0.001006s : 1: tuple_transform 4.96% : 0.767080s : 1: type_inference 0.01% : 0.001464s : 1: validate . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 100.50s (0:01:40) ================== .... =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") =============================== warnings summary ===============================/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 =============================== warnings summary ============================================================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") ================== 1 passed, 18 warnings in 100.99s (0:01:40) ================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer")-- Docs: https://docs.pytest.org/en/latest/warnings.html -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 101.42s (0:01:41) ==================================== 1 passed, 18 warnings in 100.69s (0:01:40) ================== ================== 1 passed, 18 warnings in 101.13s (0:01:41) ================== . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 101.33s (0:01:41) ================== . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 100.93s (0:01:40) ================== . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 100.50s (0:01:40) ================== [WARNING] DEVICE(26051,ffff7f9d5c10,python3.7):2025-02-07-15:49:27.248.714 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x461f4ad0 is not exist. [WARNING] DEVICE(26040,ffffb911fc10,python3.7):2025-02-07-15:49:27.365.734 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x395ed0e0 is not exist. [WARNING] DEVICE(26029,ffffbca4cc10,python3.7):2025-02-07-15:49:27.377.740 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x1b3d1c60 is not exist. [WARNING] DEVICE(26020,ffffbacacc10,python3.7):2025-02-07-15:49:29.297.057 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2e2b93e0 is not exist. [WARNING] DEVICE(26068,ffffad937c10,python3.7):2025-02-07-15:49:29.391.013 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x28b33b60 is not exist. [WARNING] DEVICE(26112,ffff8247fc10,python3.7):2025-02-07-15:49:29.592.585 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x28a69050 is not exist. [WARNING] DEVICE(26100,ffff9bab1c10,python3.7):2025-02-07-15:49:29.608.054 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2436b7b0 is not exist. [WARNING] DEVICE(26086,ffffa208fc10,python3.7):2025-02-07-15:49:29.623.715 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x21037900 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 127.93s (0:02:07) ================== ff8c39f2e51611efac92c4447d93fe45/pass/test_all_test_hccl_send_receive.log0000644000175400017540000007511614751343157025502 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collected 1 item test_all.py ============================= test session starts =========================================================== test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sinkrootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collecting ... collecting ... [WARNING] ME(162275:281473356848144,MainProcess):2025-02-07-13:52:31.798.569 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(162274:281473381293072,MainProcess):2025-02-07-13:52:31.798.569 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] DISTRIBUTED(162275,ffff9f72dc10,python3.7):2025-02-07-13:53:12.281.289 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(162274,ffffa0e7dc10,python3.7):2025-02-07-13:53:13.039.714 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(162275,fffed3fef0f0,python3.7):2025-02-07-13:53:13.529.521 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162275,fffed37ee0f0,python3.7):2025-02-07-13:53:13.620.812 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(162274,fffed57f20f0,python3.7):2025-02-07-13:53:13.650.282 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162274,fffe9effd0f0,python3.7):2025-02-07-13:53:13.782.095 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(162274,fffe9effd0f0,python3.7):2025-02-07-13:53:14.018.114 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162274,fffed57f20f0,python3.7):2025-02-07-13:53:14.018.513 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group  collecting 1 item  collected 1 item  test_send_receive.py . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 47.66s ======================== [WARNING] DEVICE(162275,fffed37ee0f0,python3.7):2025-02-07-13:53:14.146.937 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162275,fffed3fef0f0,python3.7):2025-02-07-13:53:14.147.353 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PROFILER(162275,fffed1feb0f0,python3.7):2025-02-07-13:53:14.516.382 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(162274,fffe9d7fa0f0,python3.7):2025-02-07-13:53:14.574.963 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory  collecting 1 item  collected 1 item  test_send_receive.py . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 48.72s ======================== [WARNING] DEVICE(162275,ffff9f72dc10,python3.7):2025-02-07-13:53:19.906.578 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x45442e80 is not exist. [WARNING] DEVICE(162275,ffff9f72dc10,python3.7):2025-02-07-13:53:19.907.714 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0xfffe7c0051c0 is not exist. [WARNING] DEVICE(162274,ffffa0e7dc10,python3.7):2025-02-07-13:53:22.316.941 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x46b7ca40 is not exist. [WARNING] DEVICE(162274,ffffa0e7dc10,python3.7):2025-02-07-13:53:22.318.107 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0xfffe740057c0 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 68.56s (0:01:08) =================== ././@LongLink0000644000000000000000000000015400000000000011603 Lustar rootrootff8c39f2e51611efac92c4447d93fe45/pass/test_remove_redundancy_test_no_init_parameters_without_load_param.logff8c39f2e51611efac92c4447d93fe45/pass/test_remove_redundancy_test_no_init_parameters_without_load_pa0000644000175400017540000147156614751343157033375 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collected 1 item test_remove_redundancy.py [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:26.273.137 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:26.401.223 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:26.545.826 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:26.706.445 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:26.882.088 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:worker_4.log. Environment variable [RANK_ID] is exported. rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:27.612.29 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:worker_5.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:27.248.477 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:worker_6.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:27.437.618 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:worker_7.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] ME(162587:281472914570256,MainProcess):2025-02-07-15:51:27.628.412 [mindspore/parallel/cluster/process_entity/_api.py:223] Distributed job is spawned. Waiting all processes to exit... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:32.206.762 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50212, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:32.206.829 [mindspore/ccsrc/distributed/rpc/tcp/tcp_client.cc:76] Connect] Failed to connect to the tcp server : 127.0.0.1:8118, retry to reconnect(1/1)... [WARNING] DISTRIBUTED(162669,ffff29d740f0,python3.7):2025-02-07-15:51:32.338.449 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50214 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:32.338.449 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50214, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:32.338.632 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50216, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:32.338.660 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(162669,ffff2ad760f0,python3.7):2025-02-07-15:51:32.338.657 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50216 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:32.417.528 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50218, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162678,ffff399d50f0,python3.7):2025-02-07-15:51:32.417.545 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50218 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:32.417.593 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:32.683.923 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50220, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162691,ffff077fe0f0,python3.7):2025-02-07-15:51:32.683.932 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50220 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:32.683.975 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:32.706.919 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:173] Register] Failed to connect to the meta server node url: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:32.706.944 [mindspore/ccsrc/distributed/cluster/topology/compute_graph_node.cc:363] ReconnectWithTimeoutWindow] Failed to register and try to reconnect to the meta server. [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:32.736.131 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50222, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162702,ffff1e4f90f0,python3.7):2025-02-07-15:51:32.736.146 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50222 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:32.736.215 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:32.839.174 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:32.917.863 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50224, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:32.917.904 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(162678,ffff3a9d70f0,python3.7):2025-02-07-15:51:32.917.906 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50224 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:33.015.868 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50226, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162713,ffff3270f0f0,python3.7):2025-02-07-15:51:33.015.868 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50226 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:33.015.935 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:33.184.251 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50228, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:33.184.279 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(162691,ffff0cb160f0,python3.7):2025-02-07-15:51:33.184.294 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50228 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:33.194.128 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50230, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162724,ffff10a940f0,python3.7):2025-02-07-15:51:33.194.133 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50230 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:33.194.175 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:33.207.114 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50232, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:33.207.142 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(162660,ffff229560f0,python3.7):2025-02-07-15:51:33.207.157 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50232 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:33.236.388 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50234, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:33.236.414 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(162702,ffff1f4fb0f0,python3.7):2025-02-07-15:51:33.236.434 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50234 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:33.339.264 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:33.418.302 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:33.516.107 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50236, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:33.516.137 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(162713,ffff337110f0,python3.7):2025-02-07-15:51:33.516.134 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50236 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:33.653.565 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50238, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162741,ffff2e4980f0,python3.7):2025-02-07-15:51:33.653.565 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50238 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:33.653.678 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:33.684.615 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:33.694.336 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50240, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:33.694.370 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(162724,ffff11a960f0,python3.7):2025-02-07-15:51:33.694.370 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50240 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:33.707.286 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 21 source: 127.0.0.1:50242, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:33.707.309 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(162660,ffff219540f0,python3.7):2025-02-07-15:51:33.707.322 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50242 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:33.736.766 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:33.839.394 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:33.918.387 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:34.016.471 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:34.153.876 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50244, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:34.153.913 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(162741,ffff2f49a0f0,python3.7):2025-02-07-15:51:34.153.913 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50244 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:34.184.726 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:34.194.707 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:34.207.595 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:34.236.849 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:34.339.520 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:34.418.497 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:34.516.562 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:34.654.361 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:34.684.848 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:34.694.803 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:34.707.683 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:34.736.927 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:34.839.605 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(5/1200). [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:34.918.624 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:35.016.689 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:35.154.477 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:35.154.510 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 7 rank id: 7 [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:35.184.976 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:35.185.004 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 3 rank id: 3 [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:35.194.906 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:35.194.934 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 6 rank id: 6 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:35.207.779 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:35.207.806 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 0 rank id: 0 [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:35.237.024 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:35.237.049 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 4 rank id: 4 [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:35.339.721 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:35.339.753 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 1 rank id: 1 [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:35.418.779 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:35.418.827 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 2 rank id: 2 [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:35.516.802 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:35.516.835 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 5 rank id: 5 [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:39.806.285 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:39.806.424 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(162691,fffe7f7fe0f0,python3.7):2025-02-07-15:51:39.807.041 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:39.913.004 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:39.913.178 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(162702,fffe92ffd0f0,python3.7):2025-02-07-15:51:39.913.885 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:39.968.883 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(162660,ffffa2124c10,python3.7):2025-02-07-15:51:39.969.049 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group distribute network train. distribute network. [WARNING] DISTRIBUTED(162660,fffe9dffb0f0,python3.7):2025-02-07-15:51:39.979.984 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162660,fffe7b7fe0f0,python3.7):2025-02-07-15:51:39.980.334 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:40.034.567 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(162724,ffff91263c10,python3.7):2025-02-07-15:51:40.034.710 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(162724,fffe957fa0f0,python3.7):2025-02-07-15:51:40.035.281 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162724,fffe94ff90f0,python3.7):2025-02-07-15:51:40.035.524 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:40.082.354 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(162669,ffffaa555c10,python3.7):2025-02-07-15:51:40.082.664 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(162669,fffea5ffb0f0,python3.7):2025-02-07-15:51:40.083.504 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162669,fffea57fa0f0,python3.7):2025-02-07-15:51:40.083.847 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network. distribute network train. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:40.146.707 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:40.146.873 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(162678,fffeb5ffb0f0,python3.7):2025-02-07-15:51:40.147.565 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162678,fffeb57fa0f0,python3.7):2025-02-07-15:51:40.147.890 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network. distribute network train. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(162691,fffe7f7fe0f0,python3.7):2025-02-07-15:51:40.307.695 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162691,fffd2bfff0f0,python3.7):2025-02-07-15:51:40.308.177 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(162702,fffe92ffd0f0,python3.7):2025-02-07-15:51:40.414.457 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162702,fffe47fff0f0,python3.7):2025-02-07-15:51:40.417.263 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:40.423.073 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(162741,ffffaec72c10,python3.7):2025-02-07-15:51:40.423.318 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(162741,fffea2ffd0f0,python3.7):2025-02-07-15:51:40.423.995 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162741,fffea27fc0f0,python3.7):2025-02-07-15:51:40.424.234 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:41.006.600 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:41.006.829 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(162713,fffea6ffd0f0,python3.7):2025-02-07-15:51:41.007.382 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162713,fffea67fc0f0,python3.7):2025-02-07-15:51:41.007.636 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. [WARNING] PARALLEL(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:41.040.992 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. distribute network train. [WARNING] PARALLEL(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:41.199.622 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DEVICE(162660,fffe7b7fe0f0,python3.7):2025-02-07-15:51:41.210.757 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162660,fffe9dffb0f0,python3.7):2025-02-07-15:51:41.211.016 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162741,fffea27fc0f0,python3.7):2025-02-07-15:51:41.231.271 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162741,fffea2ffd0f0,python3.7):2025-02-07-15:51:41.231.553 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162724,fffe94ff90f0,python3.7):2025-02-07-15:51:41.233.729 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162724,fffe957fa0f0,python3.7):2025-02-07-15:51:41.236.171 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PARALLEL(162660,ffffa2124c10,python3.7):2025-02-07-15:51:41.245.041 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(162724,ffff91263c10,python3.7):2025-02-07-15:51:41.266.446 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DEVICE(162691,fffd2bfff0f0,python3.7):2025-02-07-15:51:41.312.690 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(162669,fffea57fa0f0,python3.7):2025-02-07-15:51:41.312.738 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162691,fffe7f7fe0f0,python3.7):2025-02-07-15:51:41.312.883 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(162669,fffea5ffb0f0,python3.7):2025-02-07-15:51:41.316.048 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162678,fffeb57fa0f0,python3.7):2025-02-07-15:51:41.372.883 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162678,fffeb5ffb0f0,python3.7):2025-02-07-15:51:41.373.110 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(162713,fffea67fc0f0,python3.7):2025-02-07-15:51:41.402.882 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162713,fffea6ffd0f0,python3.7):2025-02-07-15:51:41.403.230 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PARALLEL(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:41.404.608 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] DEVICE(162702,fffe47fff0f0,python3.7):2025-02-07-15:51:41.430.472 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(162702,fffe92ffd0f0,python3.7):2025-02-07-15:51:41.430.679 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PARALLEL(162669,ffffaa555c10,python3.7):2025-02-07-15:51:41.457.206 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(162691,ffff8c2f5c10,python3.7):2025-02-07-15:51:41.633.162 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(162741,ffffaec72c10,python3.7):2025-02-07-15:51:41.682.057 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(162702,ffff9ecc0c10,python3.7):2025-02-07-15:51:41.779.140 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(162660,ffffa2124c10,python3.7):2025-02-07-15:51:41.843.582 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(162724,ffff91263c10,python3.7):2025-02-07-15:51:41.853.875 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(162678,ffffba1a1c10,python3.7):2025-02-07-15:51:41.985.677 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(162669,ffffaa555c10,python3.7):2025-02-07-15:51:42.046.558 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:42.250.498 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(162741,ffffaec72c10,python3.7):2025-02-07-15:51:42.291.090 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(162713,ffffb2ee9c10,python3.7):2025-02-07-15:51:42.841.783 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 14.3897, [21] [bootstrap]: 0.00139301 [type_inference]: 0.734117 [auto_monad]: 0.00183998 [graph_reusing]: 2.615e-05 [inline]: 0.0425251, [2] [rewriter_before_opt_a]: 0.00148958 [a1a2]: 0.0410027, [2] [Cycle 1]: 0.0281976, [11] [expand_dump_flag]: 3.419e-05 [switch_simplify]: 0.00105405 [loop_unroll]: 0.00067061 [a_1]: 0.0219628 [recompute_prepare]: 0.00016118 [updatestate_depend_eliminate]: 0.00035873 [updatestate_assign_eliminate]: 9.15801e-05 [updatestate_loads_eliminate]: 0.00020084 [parameter_eliminate]: 4.09002e-06 [a_2]: 0.00339525 [parallel_inline_pass]: 0.00010067 [Cycle 2]: 0.00544464, [11] [expand_dump_flag]: 1.73994e-06 [switch_simplify]: 9.25999e-05 [loop_unroll]: 9.16501e-05 [a_1]: 0.00315169 [recompute_prepare]: 9.87001e-05 [updatestate_depend_eliminate]: 0.00022024 [updatestate_assign_eliminate]: 6.408e-05 [updatestate_loads_eliminate]: 6.307e-05 [parameter_eliminate]: 2.64996e-06 [a_2]: 0.00148366 [parallel_inline_pass]: 0.00010302 [parallel-infer-symbol]: 0.00018903 [pre_auto_parallel]: 9.207e-05 [insert-virtual-dataset]: 0.00129162 [parallel-infer-symbol-second]: 2.06998e-06 [dataset_repeat_opt]: 8.51101e-05 [pipeline_split]: 9.697e-05 [optimize]: 0.593184, [52] [py_interpret_to_execute]: 0.00012525 [rewriter_before_opt_a]: 0.00027371 [opt_a]: 0.578718, [3] [Cycle 1]: 0.497408, [46] [expand_dump_flag]: 1.90001e-06 [switch_simplify]: 0.00010848 [loop_unroll]: 9.683e-05 [a_1]: 0.00346327 [recompute_prepare]: 0.00010395 [updatestate_depend_eliminate]: 0.00010601 [updatestate_assign_eliminate]: 6.267e-05 [updatestate_loads_eliminate]: 6.765e-05 [parameter_eliminate]: 2.58e-06 [a_2]: 0.00161494 [accelerated_algorithm]: 0.00031901 [shard]: 2.10991e-06 [meta_shard_fg_expand]: 5.04999e-05 [shard_inline]: 0.00010761 [auto_parallel]: 8.19101e-05 [parallel]: 0.0143466 [flash_sp]: 5.443e-05 [merge_comm]: 0.00012632 [allreduce_fusion]: 7.353e-05 [matmul_add_comm_reduction]: 9.80101e-05 [allreduce_slice_to_reducescatter]: 3.7998e-07 [virtual_shard_identity]: 0.000127 [virtual_dataset]: 0.00016418 [get_grad_eliminate_]: 0.0001138 [virtual_output]: 0.00011248 [merge_forward]: 7.63399e-05 [cell_reuse_recompute_pass]: 2.84996e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020941 [before_grad]: 0.00019429 [inplace_validation]: 0.0001217 [parallel_renormalize]: 0.0214368 [update_top_fg]: 9.30042e-07 [cast_eliminate]: 0.00014608 [meta_fg_expand]: 0.267231 [inplace_validation_after_expand]: 0.00154065 [flash_sp_send_recv_attached]: 0.00116807 [receive_attached]: 9.417e-05 [after_resolve]: 0.0019636 [a_after_grad]: 0.00385207 [special_op_eliminate]: 0.00180731 [renormalize]: 0.14474 [add_forward_monad_depend]: 0.00036844 [auto_monad_grad]: 0.00021308 [auto_monad_eliminator]: 0.00179989 [cse]: 0.00412136 [a_3]: 0.0243461 [Cycle 2]: 0.0695028, [46] [expand_dump_flag]: 5.06201e-05 [switch_simplify]: 0.00181311 [loop_unroll]: 0.00149029 [a_1]: 0.0309004 [recompute_prepare]: 0.00017615 [updatestate_depend_eliminate]: 0.00022574 [updatestate_assign_eliminate]: 0.00010413 [updatestate_loads_eliminate]: 0.00015993 [parameter_eliminate]: 3.42994e-06 [a_2]: 0.00430422 [accelerated_algorithm]: 0.00016069 [shard]: 1.94996e-06 [meta_shard_fg_expand]: 7.75701e-05 [shard_inline]: 0.00013982 [auto_parallel]: 0.00011371 [parallel]: 1.044e-05 [flash_sp]: 0.00012344 [merge_comm]: 0.00011268 [allreduce_fusion]: 9.26601e-05 [matmul_add_comm_reduction]: 0.00011184 [allreduce_slice_to_reducescatter]: 3.89991e-07 [virtual_shard_identity]: 0.00014123 [virtual_dataset]: 0.00013773 [get_grad_eliminate_]: 0.00013045 [virtual_output]: 0.00013631 [merge_forward]: 8.786e-05 [cell_reuse_recompute_pass]: 1.91003e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025054 [before_grad]: 0.00023814 [inplace_validation]: 9.864e-05 [parallel_renormalize]: 7.0082e-08 [update_top_fg]: 6.3004e-07 [cast_eliminate]: 0.00015694 [meta_fg_expand]: 0.00028173 [inplace_validation_after_expand]: 0.00017594 [flash_sp_send_recv_attached]: 1.56998e-06 [receive_attached]: 9.00007e-07 [after_resolve]: 0.00015801 [a_after_grad]: 0.00022607 [special_op_eliminate]: 0.00013449 [renormalize]: 0.0182398 [add_forward_monad_depend]: 5.50004e-06 [auto_monad_grad]: 2.33995e-06 [auto_monad_eliminator]: 0.00028665 [cse]: 0.00705244 [a_3]: 0.00097046 [Cycle 3]: 0.0117861, [46] [expand_dump_flag]: 2.23005e-06 [switch_simplify]: 0.00014389 [loop_unroll]: 0.00013045 [a_1]: 0.00424999 [recompute_prepare]: 0.00013652 [updatestate_depend_eliminate]: 0.00015096 [updatestate_assign_eliminate]: 9.70401e-05 [updatestate_loads_eliminate]: 9.35899e-05 [parameter_eliminate]: 4.10003e-06 [a_2]: 0.00206402 [accelerated_algorithm]: 0.0001551 [shard]: 1.87999e-06 [meta_shard_fg_expand]: 5.68799e-05 [shard_inline]: 0.00013349 [auto_parallel]: 0.00011254 [parallel]: 1.064e-05 [flash_sp]: 2.68e-06 [merge_comm]: 0.00010849 [allreduce_fusion]: 9.67201e-05 [matmul_add_comm_reduction]: 0.00012426 [allreduce_slice_to_reducescatter]: 3.29921e-07 [virtual_shard_identity]: 0.00015321 [virtual_dataset]: 0.00013512 [get_grad_eliminate_]: 0.00012648 [virtual_output]: 0.000131 [merge_forward]: 9.586e-05 [cell_reuse_recompute_pass]: 3.26999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024815 [before_grad]: 0.00023218 [inplace_validation]: 9.451e-05 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 5.20027e-07 [cast_eliminate]: 0.00014911 [meta_fg_expand]: 0.00011329 [inplace_validation_after_expand]: 0.00012218 [flash_sp_send_recv_attached]: 1.62993e-06 [receive_attached]: 1.32003e-06 [after_resolve]: 0.00014823 [a_after_grad]: 0.00021659 [special_op_eliminate]: 0.00012916 [renormalize]: 1.20024e-07 [add_forward_monad_depend]: 2.26009e-06 [auto_monad_grad]: 2.27999e-06 [auto_monad_eliminator]: 0.00017763 [cse]: 0.00040635 [a_3]: 0.00093652 [py_interpret_to_execute_after_opt_a]: 0.00014467 [slice_cell_reuse_recomputed_activation]: 1.84006e-06 [rewriter_after_opt_a]: 0.00098306 [convert_after_rewriter]: 0.00011345 [order_py_execute_after_rewriter]: 8.21999e-05 [opt_b]: 0.00392264, [1] [Cycle 1]: 0.00391281, [7] [b_1]: 0.00304593 [b_2]: 0.00013722 [updatestate_depend_eliminate]: 9.834e-05 [updatestate_assign_eliminate]: 8.80699e-05 [updatestate_loads_eliminate]: 9.229e-05 [renormalize]: 3.89991e-07 [cse]: 0.00039838 [optimize_parallel_all_gather_comm]: 0.0001416 [overlap_param_gather]: 3.21004e-06 [cconv]: 6.895e-05 [loop_unroll]: 0.00103583 [opt_after_cconv]: 0.00155077, [1] [Cycle 1]: 0.00154415, [7] [c_1]: 0.00077942 [parameter_eliminate]: 2.40002e-06 [updatestate_depend_eliminate]: 0.00013039 [updatestate_assign_eliminate]: 9.445e-05 [updatestate_loads_eliminate]: 9.458e-05 [cse]: 0.00039105 [renormalize]: 5.20027e-07 [remove_dup_value]: 0.0006356 [tuple_transform]: 0.00093026, [1] [Cycle 1]: 0.00092322, [2] [d_1]: 0.00090629 [renormalize]: 5.10016e-07 [partial_unused_args_eliminate]: 2.42004e-06 [add_cache_embedding]: 0.00015381 [add_recomputation]: 0.00072451 [cse_after_recomputation]: 0.0003103, [1] [Cycle 1]: 0.00030283, [1] [cse]: 0.00029006 [environ_conv]: 8.97701e-05 [swap_dp_allreduce_reducescatter]: 0.00012938 [bias_add_comm_swap]: 1.91003e-06 [label_micro_interleaved_index]: 1.14006e-06 [label_fine_grained_interleaved_index]: 0.0005229 [merge_cast_opt]: 1.57009e-06 [slice_recompute_activation]: 0.00014565 [micro_interleaved_order_control]: 1.17009e-06 [assign_add_opt]: 0.00038317 [ForceFp32Comm]: 1.09e-06 [remove_cast_before_assign_add]: 0.00012051 [full_micro_interleaved_order_control]: 1.67999e-06 [reorder_send_recv_between_fp_bp]: 1.14995e-06 [comm_op_add_attrs]: 0.00015101 [add_comm_op_reuse_tag]: 0.00014934 [interleave_split_concat_branches]: 7.50064e-07 [interleave_parallel_branches]: 6.89994e-07 [overlap_opt_shard_in_pipeline]: 8.99006e-06 [overlap_opt_shard_grad_in_pipeline]: 2.33995e-06 [control_data_broadcast_order]: 7.29924e-07 [grouped_pairwise_exchange_alltoall]: 9.05001e-06 [offloading_packed_experts]: 2.02993e-06 [overlap_recompute_and_grad_model_parallel]: 1.20001e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.30039e-07 [overlap_recompute_allgather_and_fa_grad]: 7.41901e-05 [overlap_grad_ring_attention]: 0.00015312 [overlap_grad_flash_sp]: 0.00011972 [begin_end_overlap_inline]: 5.89993e-07 [split_matmul_comm_elemetwise]: 1.54006e-06 [split_layernorm_comm]: 1.31002e-06 [handle_group_info]: 6.47991e-06 [symbol_engine_optimizer]: 0.00080595, [1] [Cycle 1]: 0.00079979, [6] [build]: 5.42899e-05 [elim_shapecalc]: 0.00014235 [elim_not_effective]: 0.00022488 [opt_reshape]: 0.00012909 [fold_const_symbol]: 0.00021053 [renormalize]: 3.7998e-07 [pipeline_parallel_scheduler]: 2.66999e-06 [auto_monad_reorder]: 0.0002929 [get_jit_bprop_graph]: 5.39934e-07 [rewriter_after_jit_bprop_graph]: 3.60073e-07 [eliminate_special_op_node]: 0.00138265 [distribtued_split]: 0.0003626 [validate]: 0.00029741 [task_emit]: 13.0112 [execute]: 8.46991e-06 Sums bootstrap : 0.001393s : 0.01% type_inference : 0.734117s : 5.11% auto_monad : 0.001840s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001490s : 0.01% inline.a1a2.expand_dump_flag : 0.000036s : 0.00% inline.a1a2.switch_simplify : 0.001147s : 0.01% inline.a1a2.loop_unroll : 0.000762s : 0.01% inline.a1a2.a_1 : 0.025114s : 0.17% inline.a1a2.recompute_prepare : 0.000260s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000579s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000156s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000264s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.004879s : 0.03% inline.a1a2.parallel_inline_pass : 0.000204s : 0.00% parallel-infer-symbol : 0.000189s : 0.00% pre_auto_parallel : 0.000092s : 0.00% insert-virtual-dataset : 0.001292s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000085s : 0.00% pipeline_split : 0.000097s : 0.00% optimize.py_interpret_to_execute : 0.000125s : 0.00% optimize.rewriter_before_opt_a : 0.000274s : 0.00% optimize.opt_a.expand_dump_flag : 0.000055s : 0.00% optimize.opt_a.switch_simplify : 0.002065s : 0.01% optimize.opt_a.loop_unroll : 0.001718s : 0.01% optimize.opt_a.a_1 : 0.038614s : 0.27% optimize.opt_a.recompute_prepare : 0.000417s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000483s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000264s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000321s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.007983s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000635s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000185s : 0.00% optimize.opt_a.shard_inline : 0.000381s : 0.00% optimize.opt_a.auto_parallel : 0.000308s : 0.00% optimize.opt_a.parallel : 0.014368s : 0.10% optimize.opt_a.flash_sp : 0.000181s : 0.00% optimize.opt_a.merge_comm : 0.000347s : 0.00% optimize.opt_a.allreduce_fusion : 0.000263s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000334s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000421s : 0.00% optimize.opt_a.virtual_dataset : 0.000437s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000371s : 0.00% optimize.opt_a.virtual_output : 0.000380s : 0.00% optimize.opt_a.merge_forward : 0.000260s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000708s : 0.00% optimize.opt_a.before_grad : 0.000665s : 0.00% optimize.opt_a.inplace_validation : 0.000315s : 0.00% optimize.opt_a.parallel_renormalize : 0.021437s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000452s : 0.00% optimize.opt_a.meta_fg_expand : 0.267626s : 1.86% optimize.opt_a.inplace_validation_after_expand : 0.001839s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001171s : 0.01% optimize.opt_a.receive_attached : 0.000096s : 0.00% optimize.opt_a.after_resolve : 0.002270s : 0.02% optimize.opt_a.a_after_grad : 0.004295s : 0.03% optimize.opt_a.special_op_eliminate : 0.002071s : 0.01% optimize.opt_a.renormalize : 0.162980s : 1.13% optimize.opt_a.add_forward_monad_depend : 0.000376s : 0.00% optimize.opt_a.auto_monad_grad : 0.000218s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002264s : 0.02% optimize.opt_a.cse : 0.011580s : 0.08% optimize.opt_a.a_3 : 0.026253s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000145s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000983s : 0.01% optimize.convert_after_rewriter : 0.000113s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.003046s : 0.02% optimize.opt_b.b_2 : 0.000137s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000098s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000088s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000398s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000142s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000069s : 0.00% optimize.loop_unroll : 0.001036s : 0.01% optimize.opt_after_cconv.c_1 : 0.000779s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000130s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000095s : 0.00% optimize.opt_after_cconv.cse : 0.000391s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000636s : 0.00% optimize.tuple_transform.d_1 : 0.000906s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000154s : 0.00% optimize.add_recomputation : 0.000725s : 0.01% optimize.cse_after_recomputation.cse : 0.000290s : 0.00% optimize.environ_conv : 0.000090s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000129s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000523s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000146s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000383s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000121s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000151s : 0.00% optimize.add_comm_op_reuse_tag : 0.000149s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000009s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000074s : 0.00% optimize.overlap_grad_ring_attention : 0.000153s : 0.00% optimize.overlap_grad_flash_sp : 0.000120s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000054s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000142s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000225s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000129s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000211s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000293s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001383s : 0.01% distribtued_split : 0.000363s : 0.00% validate : 0.000297s : 0.00% task_emit : 13.011152s : 90.49% execute : 0.000008s : 0.00% Time group info: ------[substitution.] 0.048695 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.10% : 0.000050s : 7: substitution.addn_zero_filter 0.03% : 0.000013s : 7: substitution.adjust_all_reduce_mul_add 0.59% : 0.000287s : 71: substitution.arithmetic_simplify 0.10% : 0.000051s : 10: substitution.cast_eliminate 0.11% : 0.000053s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000023s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000016s : 12: substitution.environ_get_depend_swap 0.06% : 0.000027s : 27: substitution.environ_get_eliminate 0.07% : 0.000033s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000018s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.08% : 0.000040s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 64.74% : 0.031523s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000083s : 126: substitution.graph_param_transform 0.02% : 0.000007s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.68% : 0.011530s : 331: substitution.inline 1.39% : 0.000678s : 112: substitution.inline_without_move 0.25% : 0.000122s : 309: substitution.j_node_and_user_rematch 0.40% : 0.000197s : 40: substitution.less_batch_normalization 0.09% : 0.000044s : 90: substitution.load_eliminater 0.10% : 0.000047s : 10: substitution.merge_addn 0.22% : 0.000106s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.06% : 0.000027s : 1: substitution.partial_defer_inline 0.19% : 0.000092s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.06% : 0.000029s : 15: substitution.reduce_eliminate 0.33% : 0.000159s : 309: substitution.remove_not_recompute_node 1.97% : 0.000960s : 508: substitution.replace_applicator 0.22% : 0.000108s : 251: substitution.replace_old_param 0.08% : 0.000037s : 11: substitution.reshape_eliminate 0.02% : 0.000012s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000009s : 4: substitution.specialize_transform 0.03% : 0.000015s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000079s : 34: substitution.switch_simplify 0.07% : 0.000033s : 11: substitution.tile_eliminate 0.51% : 0.000247s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000132s : 107: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000205s : 107: substitution.tuple_list_get_item_depend_reorder 1.55% : 0.000755s : 308: substitution.tuple_list_get_item_eliminator 0.36% : 0.000177s : 107: substitution.tuple_list_get_set_item_eliminator 0.39% : 0.000191s : 210: substitution.updatestate_pure_node_eliminater 0.68% : 0.000333s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.733661 2 96.51% : 0.708091s : 1: type_inference.infer 3.49% : 0.025570s : 1: type_inference.specialize ------[replace.] 0.009599 775 0.41% : 0.000040s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.51% : 0.000049s : 7: replace.depend_value_elim 0.44% : 0.000043s : 3: replace.environ_get_set_eliminate 28.44% : 0.002730s : 183: replace.getattr_setattr_resolve 30.28% : 0.002907s : 310: replace.inline 0.22% : 0.000021s : 1: replace.merge_addn 1.15% : 0.000111s : 7: replace.partial_eliminate 3.89% : 0.000374s : 25: replace.replace_applicator 3.85% : 0.000370s : 34: replace.switch_simplify 0.51% : 0.000049s : 6: replace.tuple_list_get_item_depend_reorder 29.87% : 0.002867s : 191: replace.tuple_list_get_item_eliminator 0.17% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.18% : 0.000017s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041218 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000012s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 70.87% : 0.029210s : 183: match.getattr_setattr_resolve 27.43% : 0.011306s : 310: match.inline 0.05% : 0.000021s : 1: match.merge_addn 0.08% : 0.000034s : 7: match.partial_eliminate 0.23% : 0.000093s : 25: match.replace_applicator 0.15% : 0.000060s : 34: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 0.97% : 0.000401s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000007s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020592131318 0.79% : 0.000163s : 1198: predicate.accumulaten_eliminater 0.28% : 0.000058s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000114s : 835: predicate.addn_check_dump 0.86% : 0.000177s : 1198: predicate.addn_zero_filter 0.79% : 0.000162s : 1198: predicate.adjust_all_reduce_mul_add 1.79% : 0.000369s : 2034: predicate.arithmetic_simplify 1.14% : 0.000235s : 1586: predicate.cast_eliminate 3.21% : 0.000661s : 3484: predicate.check_bprop_eliminate 0.57% : 0.000117s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.18% : 0.000242s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000118s : 838: predicate.depend_value_elim 0.82% : 0.000169s : 1202: predicate.dict_get_item_const_eliminator 0.91% : 0.000187s : 1202: predicate.dict_get_item_eliminator 0.83% : 0.000171s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.11% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.89% : 0.000183s : 1334: predicate.environ_add_const_eliminate 0.85% : 0.000175s : 1337: predicate.environ_get_add_eliminate 0.86% : 0.000177s : 1334: predicate.environ_get_depend_swap 1.46% : 0.000300s : 2172: predicate.environ_get_eliminate 0.84% : 0.000174s : 1337: predicate.environ_get_set_eliminate 1.13% : 0.000233s : 1717: predicate.exchange_switch_depend_value 1.41% : 0.000291s : 1717: predicate.float_depend_g_call 0.56% : 0.000115s : 835: predicate.float_environ_get_switch 0.65% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000058s : 395: predicate.get_grad_eliminate 2.42% : 0.000499s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000113s : 835: predicate.incorporate_call 0.54% : 0.000111s : 835: predicate.incorporate_call_switch 3.96% : 0.000816s : 4602: predicate.inline 2.29% : 0.000472s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.32% : 0.000067s : 388: predicate.less_batch_normalization 1.15% : 0.000237s : 1660: predicate.list_to_tuple_eliminator_ 1.88% : 0.000387s : 2874: predicate.load_eliminater 0.20% : 0.000042s : 135: predicate.loop_unroll_after_grad 2.36% : 0.000487s : 2640: predicate.loop_unroll_before_grad 0.96% : 0.000198s : 1478: predicate.make_slice_get_slice_eliminator 0.57% : 0.000116s : 837: predicate.merge_addn 3.12% : 0.000643s : 3380: predicate.micro_step_allgather_replace 3.09% : 0.000637s : 3380: predicate.mini_step_allgather_replace 0.79% : 0.000162s : 1199: predicate.minmaximum_grad 0.18% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.05% : 0.000422s : 1717: predicate.partial_defer_inline 1.09% : 0.000224s : 1541: predicate.partial_eliminate 0.78% : 0.000160s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000116s : 824: predicate.reduce_all_const_elim 0.95% : 0.000196s : 1199: predicate.reduce_eliminate 0.14% : 0.000030s : 395: predicate.remove_not_recompute_node 2.00% : 0.000412s : 4829: predicate.replace_applicator 0.79% : 0.000163s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.80% : 0.000165s : 1199: predicate.reshape_eliminate 3.13% : 0.000645s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.30% : 0.000679s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000062s : 395: predicate.shard_identity_eliminate 2.09% : 0.000431s : 2338: predicate.special_op_eliminate 0.64% : 0.000131s : 837: predicate.specialize_transform 3.40% : 0.000700s : 3380: predicate.split_environ_get_set_with_tuple_value 1.57% : 0.000324s : 2203: predicate.stack_unstack_eliminate 1.91% : 0.000394s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.27% : 0.000262s : 1717: predicate.switch_defer_inline 4.42% : 0.000911s : 5201: predicate.switch_layer_defer_inline 4.37% : 0.000900s : 5262: predicate.switch_simplify 0.77% : 0.000159s : 1199: predicate.tile_eliminate 0.77% : 0.000158s : 1199: predicate.transpose_eliminate 1.06% : 0.000218s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.08% : 0.000223s : 1469: predicate.tuple_list_get_item_const_eliminator 0.98% : 0.000202s : 1469: predicate.tuple_list_get_item_depend_reorder 1.92% : 0.000396s : 2495: predicate.tuple_list_get_item_eliminator 0.98% : 0.000202s : 1469: predicate.tuple_list_get_set_item_eliminator 1.64% : 0.000339s : 2304: predicate.tuple_list_set_item_eliminator 1.08% : 0.000223s : 1660: predicate.tuple_to_list_eliminator_ 1.91% : 0.000394s : 2874: predicate.updatestate_pure_node_eliminater 2.52% : 0.000519s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.29% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000057s : 395: predicate.virtual_output_eliminate 0.11% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.057657 747 67.98% : 0.039194s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.42% : 0.001393s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.61% : 0.017070s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.375493 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041007s : 1: a1a2 0.00% : 0.000160s : 1: add_cache_embedding 0.00% : 0.000156s : 1: add_comm_op_reuse_tag 0.00% : 0.000736s : 1: add_recomputation 0.00% : 0.000392s : 1: assign_add_opt 0.01% : 0.001862s : 1: auto_monad 0.00% : 0.000305s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.01% : 0.001433s : 1: bootstrap 0.00% : 0.000075s : 1: cconv 0.00% : 0.000158s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000120s : 1: convert_after_rewriter 0.00% : 0.000315s : 1: cse_after_recomputation 0.00% : 0.000094s : 1: dataset_repeat_opt 0.00% : 0.000377s : 1: distribtued_split 0.01% : 0.001397s : 1: eliminate_special_op_node 0.00% : 0.000098s : 1: environ_conv 0.00% : 0.000017s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.28% : 0.042536s : 1: inline 0.01% : 0.001312s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000530s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.001046s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032297s : 61: opt.transform.a1a2 0.00% : 0.000177s : 1: opt.transform.loop_unroll_optimizer 0.58% : 0.089895s : 148: opt.transform.opt_a 0.01% : 0.000777s : 1: opt.transform.opt_after_cconv 0.02% : 0.003156s : 27: opt.transform.opt_b 0.24% : 0.037114s : 16: opt.transform.opt_resolve 0.01% : 0.000904s : 1: opt.transform.opt_trans_graph 0.01% : 0.000828s : 6: opt.transform.special_op_eliminate 0.00% : 0.000701s : 4: opt.transform.symbol_engine_opt 3.76% : 0.578724s : 1: opt_a 0.01% : 0.001556s : 1: opt_after_cconv 0.03% : 0.003927s : 1: opt_b 3.86% : 0.593194s : 1: optimize 0.00% : 0.000149s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000124s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000159s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000013s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.00% : 0.000080s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000200s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000107s : 1: pipeline_split 0.00% : 0.000101s : 1: pre_auto_parallel 0.00% : 0.000133s : 1: py_interpret_to_execute 0.00% : 0.000154s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000127s : 1: remove_cast_before_assign_add 0.00% : 0.000649s : 1: remove_dup_value 0.85% : 0.131034s : 3: renormalize.infer 0.35% : 0.053340s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000994s : 1: rewriter_after_opt_a 0.01% : 0.001783s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000152s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000136s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000810s : 1: symbol_engine_optimizer 84.62% : 13.011190s : 1: task_emit 0.01% : 0.000934s : 1: tuple_transform 4.77% : 0.734149s : 1: type_inference 0.01% : 0.001333s : 1: validate TotalTime = 14.3807, [21] [bootstrap]: 0.00150119 [type_inference]: 0.72614 [auto_monad]: 0.0019084 [graph_reusing]: 2.55e-05 [inline]: 0.0421105, [2] [rewriter_before_opt_a]: 0.00149122 [a1a2]: 0.0405838, [2] [Cycle 1]: 0.0278905, [11] [expand_dump_flag]: 3.70099e-05 [switch_simplify]: 0.00106049 [loop_unroll]: 0.00069005 [a_1]: 0.0217581 [recompute_prepare]: 0.00015722 [updatestate_depend_eliminate]: 0.00038649 [updatestate_assign_eliminate]: 8.91699e-05 [updatestate_loads_eliminate]: 0.0001938 [parameter_eliminate]: 4.77e-06 [a_2]: 0.00325636 [parallel_inline_pass]: 0.0001017 [Cycle 2]: 0.00532865, [11] [expand_dump_flag]: 1.05996e-06 [switch_simplify]: 9.606e-05 [loop_unroll]: 9.236e-05 [a_1]: 0.00312301 [recompute_prepare]: 9.71101e-05 [updatestate_depend_eliminate]: 7.22599e-05 [updatestate_assign_eliminate]: 5.954e-05 [updatestate_loads_eliminate]: 6.245e-05 [parameter_eliminate]: 2.14006e-06 [a_2]: 0.00154234 [parallel_inline_pass]: 9.944e-05 [parallel-infer-symbol]: 0.00018978 [pre_auto_parallel]: 8.74799e-05 [insert-virtual-dataset]: 0.00127959 [parallel-infer-symbol-second]: 2.22004e-06 [dataset_repeat_opt]: 7.882e-05 [pipeline_split]: 8.67799e-05 [optimize]: 0.580502, [52] [py_interpret_to_execute]: 0.00012317 [rewriter_before_opt_a]: 0.00027136 [opt_a]: 0.566325, [3] [Cycle 1]: 0.485593, [46] [expand_dump_flag]: 1.66008e-06 [switch_simplify]: 0.00011097 [loop_unroll]: 9.686e-05 [a_1]: 0.00333032 [recompute_prepare]: 0.00010304 [updatestate_depend_eliminate]: 0.00010248 [updatestate_assign_eliminate]: 6.30501e-05 [updatestate_loads_eliminate]: 6.75899e-05 [parameter_eliminate]: 2.75997e-06 [a_2]: 0.00168085 [accelerated_algorithm]: 0.00032739 [shard]: 2.20002e-06 [meta_shard_fg_expand]: 5.056e-05 [shard_inline]: 0.00010765 [auto_parallel]: 8.288e-05 [parallel]: 0.0144248 [flash_sp]: 7.174e-05 [merge_comm]: 0.00012774 [allreduce_fusion]: 7.57499e-05 [matmul_add_comm_reduction]: 9.65301e-05 [allreduce_slice_to_reducescatter]: 3.69968e-07 [virtual_shard_identity]: 0.00012213 [virtual_dataset]: 0.00015784 [get_grad_eliminate_]: 0.00011385 [virtual_output]: 0.000112 [merge_forward]: 7.518e-05 [cell_reuse_recompute_pass]: 2.68e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020911 [before_grad]: 0.00019432 [inplace_validation]: 0.00011861 [parallel_renormalize]: 0.021383 [update_top_fg]: 6.10016e-07 [cast_eliminate]: 0.00014568 [meta_fg_expand]: 0.259935 [inplace_validation_after_expand]: 0.00155368 [flash_sp_send_recv_attached]: 0.00117199 [receive_attached]: 7.816e-05 [after_resolve]: 0.00196422 [a_after_grad]: 0.00378821 [special_op_eliminate]: 0.00181208 [renormalize]: 0.139814 [add_forward_monad_depend]: 0.00036118 [auto_monad_grad]: 0.00021261 [auto_monad_eliminator]: 0.00180987 [cse]: 0.00414254 [a_3]: 0.0248588 [Cycle 2]: 0.0689445, [46] [expand_dump_flag]: 5.074e-05 [switch_simplify]: 0.00179201 [loop_unroll]: 0.00252055 [a_1]: 0.030911 [recompute_prepare]: 0.00017259 [updatestate_depend_eliminate]: 0.00022391 [updatestate_assign_eliminate]: 0.00010142 [updatestate_loads_eliminate]: 0.00015937 [parameter_eliminate]: 3.38e-06 [a_2]: 0.00437175 [accelerated_algorithm]: 0.000161 [shard]: 1.35996e-06 [meta_shard_fg_expand]: 7.363e-05 [shard_inline]: 0.00013811 [auto_parallel]: 0.00011021 [parallel]: 8.80996e-06 [flash_sp]: 0.00011776 [merge_comm]: 0.00010637 [allreduce_fusion]: 9.18e-05 [matmul_add_comm_reduction]: 0.0001104 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 0.00013861 [virtual_dataset]: 0.00013666 [get_grad_eliminate_]: 0.00013042 [virtual_output]: 0.00013327 [merge_forward]: 8.696e-05 [cell_reuse_recompute_pass]: 2.01003e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024962 [before_grad]: 0.00023686 [inplace_validation]: 8.16201e-05 [parallel_renormalize]: 1.20024e-07 [update_top_fg]: 5.39934e-07 [cast_eliminate]: 0.00016758 [meta_fg_expand]: 0.0002761 [inplace_validation_after_expand]: 0.00017418 [flash_sp_send_recv_attached]: 1.52003e-06 [receive_attached]: 1.16997e-06 [after_resolve]: 0.00015753 [a_after_grad]: 0.00022585 [special_op_eliminate]: 0.00013396 [renormalize]: 0.017183 [add_forward_monad_depend]: 4.99003e-06 [auto_monad_grad]: 1.89e-06 [auto_monad_eliminator]: 0.00028189 [cse]: 0.00654086 [a_3]: 0.00097661 [Cycle 3]: 0.0117692, [46] [expand_dump_flag]: 1.85997e-06 [switch_simplify]: 0.00013085 [loop_unroll]: 0.00012731 [a_1]: 0.00425429 [recompute_prepare]: 0.00013452 [updatestate_depend_eliminate]: 0.00014992 [updatestate_assign_eliminate]: 9.163e-05 [updatestate_loads_eliminate]: 9.06801e-05 [parameter_eliminate]: 3.25998e-06 [a_2]: 0.00212123 [accelerated_algorithm]: 0.00015721 [shard]: 1.66998e-06 [meta_shard_fg_expand]: 5.175e-05 [shard_inline]: 0.0001313 [auto_parallel]: 0.00010926 [parallel]: 8.78004e-06 [flash_sp]: 2.24996e-06 [merge_comm]: 0.00010202 [allreduce_fusion]: 9.35299e-05 [matmul_add_comm_reduction]: 0.00011687 [allreduce_slice_to_reducescatter]: 4.89992e-07 [virtual_shard_identity]: 0.00013634 [virtual_dataset]: 0.00013151 [get_grad_eliminate_]: 0.00012645 [virtual_output]: 0.00012913 [merge_forward]: 9.063e-05 [cell_reuse_recompute_pass]: 3.46999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024667 [before_grad]: 0.00024257 [inplace_validation]: 8.72699e-05 [parallel_renormalize]: 1.09896e-07 [update_top_fg]: 4.20026e-07 [cast_eliminate]: 0.00014388 [meta_fg_expand]: 0.00011442 [inplace_validation_after_expand]: 0.00011337 [flash_sp_send_recv_attached]: 1.66998e-06 [receive_attached]: 1.37999e-06 [after_resolve]: 0.00014546 [a_after_grad]: 0.000248 [special_op_eliminate]: 0.00013092 [renormalize]: 9.01055e-08 [add_forward_monad_depend]: 2.04006e-06 [auto_monad_grad]: 2.0701e-06 [auto_monad_eliminator]: 0.00016883 [cse]: 0.00039299 [a_3]: 0.00095392 [py_interpret_to_execute_after_opt_a]: 0.00013534 [slice_cell_reuse_recomputed_activation]: 1.75997e-06 [rewriter_after_opt_a]: 0.00091706 [convert_after_rewriter]: 0.00010773 [order_py_execute_after_rewriter]: 8.189e-05 [opt_b]: 0.00397382, [1] [Cycle 1]: 0.0039669, [7] [b_1]: 0.00311849 [b_2]: 0.00013589 [updatestate_depend_eliminate]: 9.625e-05 [updatestate_assign_eliminate]: 8.525e-05 [updatestate_loads_eliminate]: 8.913e-05 [renormalize]: 4.59957e-07 [cse]: 0.00039172 [optimize_parallel_all_gather_comm]: 0.00013576 [overlap_param_gather]: 1.03004e-06 [cconv]: 6.38299e-05 [loop_unroll]: 0.00095411 [opt_after_cconv]: 0.00153925, [1] [Cycle 1]: 0.00153215, [7] [c_1]: 0.00078292 [parameter_eliminate]: 2.15007e-06 [updatestate_depend_eliminate]: 0.00012873 [updatestate_assign_eliminate]: 9.06299e-05 [updatestate_loads_eliminate]: 9.008e-05 [cse]: 0.00038594 [renormalize]: 5.60074e-07 [remove_dup_value]: 0.00058221 [tuple_transform]: 0.00095736, [1] [Cycle 1]: 0.00095106, [2] [d_1]: 0.00093591 [renormalize]: 4.20026e-07 [partial_unused_args_eliminate]: 2.37999e-06 [add_cache_embedding]: 0.00014708 [add_recomputation]: 0.00072176 [cse_after_recomputation]: 0.00029711, [1] [Cycle 1]: 0.00028995, [1] [cse]: 0.00027863 [environ_conv]: 9.875e-05 [swap_dp_allreduce_reducescatter]: 0.00012603 [bias_add_comm_swap]: 2.09e-06 [label_micro_interleaved_index]: 1.06997e-06 [label_fine_grained_interleaved_index]: 0.00050953 [merge_cast_opt]: 1.21002e-06 [slice_recompute_activation]: 0.00014281 [micro_interleaved_order_control]: 1.12003e-06 [assign_add_opt]: 0.00037447 [ForceFp32Comm]: 1.27009e-06 [remove_cast_before_assign_add]: 0.00011576 [full_micro_interleaved_order_control]: 1.23004e-06 [reorder_send_recv_between_fp_bp]: 9.69972e-07 [comm_op_add_attrs]: 0.00013964 [add_comm_op_reuse_tag]: 0.00014408 [interleave_split_concat_branches]: 6.89994e-07 [interleave_parallel_branches]: 5.70086e-07 [overlap_opt_shard_in_pipeline]: 1.119e-05 [overlap_opt_shard_grad_in_pipeline]: 2.03005e-06 [control_data_broadcast_order]: 7.00005e-07 [grouped_pairwise_exchange_alltoall]: 8.29995e-06 [offloading_packed_experts]: 1.76998e-06 [overlap_recompute_and_grad_model_parallel]: 1.24995e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.40051e-07 [overlap_recompute_allgather_and_fa_grad]: 6.18499e-05 [overlap_grad_ring_attention]: 0.00014394 [overlap_grad_flash_sp]: 0.00011727 [begin_end_overlap_inline]: 5.79981e-07 [split_matmul_comm_elemetwise]: 1.20001e-06 [split_layernorm_comm]: 1.13994e-06 [handle_group_info]: 3.34997e-06 [symbol_engine_optimizer]: 0.00078601, [1] [Cycle 1]: 0.00077991, [6] [build]: 4.979e-05 [elim_shapecalc]: 0.00013974 [elim_not_effective]: 0.00021887 [opt_reshape]: 0.00012809 [fold_const_symbol]: 0.0002084 [renormalize]: 3.60073e-07 [pipeline_parallel_scheduler]: 2.1701e-06 [auto_monad_reorder]: 0.00029099 [get_jit_bprop_graph]: 3.59956e-07 [rewriter_after_jit_bprop_graph]: 3.10014e-07 [eliminate_special_op_node]: 0.00139948 [distribtued_split]: 0.00037396 [validate]: 0.00027696 [task_emit]: 13.0231 [execute]: 7.58993e-06 Sums bootstrap : 0.001501s : 0.01% type_inference : 0.726140s : 5.05% auto_monad : 0.001908s : 0.01% graph_reusing : 0.000025s : 0.00% inline.rewriter_before_opt_a : 0.001491s : 0.01% inline.a1a2.expand_dump_flag : 0.000038s : 0.00% inline.a1a2.switch_simplify : 0.001157s : 0.01% inline.a1a2.loop_unroll : 0.000782s : 0.01% inline.a1a2.a_1 : 0.024881s : 0.17% inline.a1a2.recompute_prepare : 0.000254s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000459s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000149s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000256s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.004799s : 0.03% inline.a1a2.parallel_inline_pass : 0.000201s : 0.00% parallel-infer-symbol : 0.000190s : 0.00% pre_auto_parallel : 0.000087s : 0.00% insert-virtual-dataset : 0.001280s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000079s : 0.00% pipeline_split : 0.000087s : 0.00% optimize.py_interpret_to_execute : 0.000123s : 0.00% optimize.rewriter_before_opt_a : 0.000271s : 0.00% optimize.opt_a.expand_dump_flag : 0.000054s : 0.00% optimize.opt_a.switch_simplify : 0.002034s : 0.01% optimize.opt_a.loop_unroll : 0.002745s : 0.02% optimize.opt_a.a_1 : 0.038496s : 0.27% optimize.opt_a.recompute_prepare : 0.000410s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000476s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000256s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000318s : 0.00% optimize.opt_a.parameter_eliminate : 0.000009s : 0.00% optimize.opt_a.a_2 : 0.008174s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000646s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000176s : 0.00% optimize.opt_a.shard_inline : 0.000377s : 0.00% optimize.opt_a.auto_parallel : 0.000302s : 0.00% optimize.opt_a.parallel : 0.014442s : 0.10% optimize.opt_a.flash_sp : 0.000192s : 0.00% optimize.opt_a.merge_comm : 0.000336s : 0.00% optimize.opt_a.allreduce_fusion : 0.000261s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000324s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000397s : 0.00% optimize.opt_a.virtual_dataset : 0.000426s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000371s : 0.00% optimize.opt_a.virtual_output : 0.000374s : 0.00% optimize.opt_a.merge_forward : 0.000253s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000705s : 0.00% optimize.opt_a.before_grad : 0.000674s : 0.00% optimize.opt_a.inplace_validation : 0.000287s : 0.00% optimize.opt_a.parallel_renormalize : 0.021383s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000457s : 0.00% optimize.opt_a.meta_fg_expand : 0.260326s : 1.81% optimize.opt_a.inplace_validation_after_expand : 0.001841s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001175s : 0.01% optimize.opt_a.receive_attached : 0.000081s : 0.00% optimize.opt_a.after_resolve : 0.002267s : 0.02% optimize.opt_a.a_after_grad : 0.004262s : 0.03% optimize.opt_a.special_op_eliminate : 0.002077s : 0.01% optimize.opt_a.renormalize : 0.156997s : 1.09% optimize.opt_a.add_forward_monad_depend : 0.000368s : 0.00% optimize.opt_a.auto_monad_grad : 0.000217s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002261s : 0.02% optimize.opt_a.cse : 0.011076s : 0.08% optimize.opt_a.a_3 : 0.026789s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000135s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000917s : 0.01% optimize.convert_after_rewriter : 0.000108s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.003118s : 0.02% optimize.opt_b.b_2 : 0.000136s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000096s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000085s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000089s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000392s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000136s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000064s : 0.00% optimize.loop_unroll : 0.000954s : 0.01% optimize.opt_after_cconv.c_1 : 0.000783s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000129s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_after_cconv.cse : 0.000386s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000582s : 0.00% optimize.tuple_transform.d_1 : 0.000936s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000147s : 0.00% optimize.add_recomputation : 0.000722s : 0.01% optimize.cse_after_recomputation.cse : 0.000279s : 0.00% optimize.environ_conv : 0.000099s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000126s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000510s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000143s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000374s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000116s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000140s : 0.00% optimize.add_comm_op_reuse_tag : 0.000144s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000011s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000062s : 0.00% optimize.overlap_grad_ring_attention : 0.000144s : 0.00% optimize.overlap_grad_flash_sp : 0.000117s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000050s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000140s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000219s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000128s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000208s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000291s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001399s : 0.01% distribtued_split : 0.000374s : 0.00% validate : 0.000277s : 0.00% task_emit : 13.023100s : 90.63% execute : 0.000008s : 0.00% Time group info: ------[substitution.] 0.048013 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.10% : 0.000048s : 7: substitution.addn_zero_filter 0.03% : 0.000013s : 7: substitution.adjust_all_reduce_mul_add 0.58% : 0.000278s : 71: substitution.arithmetic_simplify 0.10% : 0.000047s : 10: substitution.cast_eliminate 0.11% : 0.000052s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.06% : 0.000030s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000015s : 12: substitution.environ_get_depend_swap 0.05% : 0.000026s : 27: substitution.environ_get_eliminate 0.07% : 0.000033s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000018s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.02% : 0.000010s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 64.81% : 0.031115s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000084s : 126: substitution.graph_param_transform 0.01% : 0.000007s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.70% : 0.011380s : 331: substitution.inline 1.37% : 0.000657s : 112: substitution.inline_without_move 0.25% : 0.000121s : 309: substitution.j_node_and_user_rematch 0.42% : 0.000201s : 40: substitution.less_batch_normalization 0.09% : 0.000044s : 90: substitution.load_eliminater 0.10% : 0.000046s : 10: substitution.merge_addn 0.23% : 0.000112s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.03% : 0.000017s : 1: substitution.partial_defer_inline 0.11% : 0.000053s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.06% : 0.000029s : 15: substitution.reduce_eliminate 0.33% : 0.000158s : 309: substitution.remove_not_recompute_node 2.01% : 0.000964s : 508: substitution.replace_applicator 0.23% : 0.000109s : 251: substitution.replace_old_param 0.07% : 0.000036s : 11: substitution.reshape_eliminate 0.02% : 0.000011s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000008s : 4: substitution.specialize_transform 0.03% : 0.000014s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000077s : 34: substitution.switch_simplify 0.05% : 0.000025s : 11: substitution.tile_eliminate 0.51% : 0.000244s : 101: substitution.tuple_list_convert_item_index_to_positive 0.28% : 0.000136s : 107: substitution.tuple_list_get_item_const_eliminator 0.41% : 0.000199s : 107: substitution.tuple_list_get_item_depend_reorder 1.56% : 0.000750s : 308: substitution.tuple_list_get_item_eliminator 0.36% : 0.000174s : 107: substitution.tuple_list_get_set_item_eliminator 0.41% : 0.000198s : 210: substitution.updatestate_pure_node_eliminater 0.68% : 0.000328s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.725693 2 96.60% : 0.700991s : 1: type_inference.infer 3.40% : 0.024702s : 1: type_inference.specialize ------[replace.] 0.009431 775 0.42% : 0.000039s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000006s : 1: replace.arithmetic_simplify 0.47% : 0.000044s : 7: replace.depend_value_elim 0.44% : 0.000041s : 3: replace.environ_get_set_eliminate 28.52% : 0.002690s : 183: replace.getattr_setattr_resolve 30.38% : 0.002865s : 310: replace.inline 0.21% : 0.000020s : 1: replace.merge_addn 1.32% : 0.000125s : 7: replace.partial_eliminate 3.88% : 0.000366s : 25: replace.replace_applicator 3.79% : 0.000358s : 34: replace.switch_simplify 0.53% : 0.000050s : 6: replace.tuple_list_get_item_depend_reorder 29.63% : 0.002794s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000015s : 1: replace.updatestate_useless_node_eliminater 0.17% : 0.000016s : 1: replace.virtual_dataset_eliminate ------[match.] 0.040719 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000010s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 70.88% : 0.028862s : 183: match.getattr_setattr_resolve 27.41% : 0.011161s : 310: match.inline 0.05% : 0.000020s : 1: match.merge_addn 0.09% : 0.000035s : 7: match.partial_eliminate 0.23% : 0.000094s : 25: match.replace_applicator 0.14% : 0.000058s : 34: match.switch_simplify 0.07% : 0.000029s : 6: match.tuple_list_get_item_depend_reorder 0.98% : 0.000398s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020915131318 0.77% : 0.000162s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000056s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000115s : 835: predicate.addn_check_dump 0.79% : 0.000165s : 1198: predicate.addn_zero_filter 0.75% : 0.000156s : 1198: predicate.adjust_all_reduce_mul_add 1.79% : 0.000374s : 2034: predicate.arithmetic_simplify 1.10% : 0.000230s : 1586: predicate.cast_eliminate 3.18% : 0.000664s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000116s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.13% : 0.000236s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000119s : 838: predicate.depend_value_elim 0.86% : 0.000180s : 1202: predicate.dict_get_item_const_eliminator 0.86% : 0.000179s : 1202: predicate.dict_get_item_eliminator 0.83% : 0.000174s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000021s : 126: predicate.elim_shapecalc_of_broadcastargs 0.88% : 0.000184s : 1334: predicate.environ_add_const_eliminate 0.85% : 0.000177s : 1337: predicate.environ_get_add_eliminate 0.85% : 0.000178s : 1334: predicate.environ_get_depend_swap 1.46% : 0.000306s : 2172: predicate.environ_get_eliminate 0.84% : 0.000176s : 1337: predicate.environ_get_set_eliminate 1.15% : 0.000241s : 1717: predicate.exchange_switch_depend_value 1.52% : 0.000319s : 1717: predicate.float_depend_g_call 0.55% : 0.000115s : 835: predicate.float_environ_get_switch 0.64% : 0.000135s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000058s : 395: predicate.get_grad_eliminate 2.56% : 0.000534s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000114s : 835: predicate.incorporate_call 0.54% : 0.000113s : 835: predicate.incorporate_call_switch 3.95% : 0.000825s : 4602: predicate.inline 2.27% : 0.000474s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.33% : 0.000070s : 388: predicate.less_batch_normalization 1.08% : 0.000226s : 1660: predicate.list_to_tuple_eliminator_ 1.94% : 0.000406s : 2874: predicate.load_eliminater 0.19% : 0.000039s : 135: predicate.loop_unroll_after_grad 2.40% : 0.000502s : 2640: predicate.loop_unroll_before_grad 0.95% : 0.000200s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000118s : 837: predicate.merge_addn 3.08% : 0.000644s : 3380: predicate.micro_step_allgather_replace 3.09% : 0.000646s : 3380: predicate.mini_step_allgather_replace 0.77% : 0.000160s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.08% : 0.000434s : 1717: predicate.partial_defer_inline 1.14% : 0.000238s : 1541: predicate.partial_eliminate 0.80% : 0.000168s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000117s : 824: predicate.reduce_all_const_elim 0.94% : 0.000198s : 1199: predicate.reduce_eliminate 0.14% : 0.000030s : 395: predicate.remove_not_recompute_node 1.94% : 0.000407s : 4829: predicate.replace_applicator 0.80% : 0.000167s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.79% : 0.000165s : 1199: predicate.reshape_eliminate 3.26% : 0.000682s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000022s : 135: predicate.row_tensor_eliminate 3.26% : 0.000682s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000060s : 395: predicate.shard_identity_eliminate 2.05% : 0.000429s : 2338: predicate.special_op_eliminate 0.63% : 0.000132s : 837: predicate.specialize_transform 3.43% : 0.000717s : 3380: predicate.split_environ_get_set_with_tuple_value 1.60% : 0.000334s : 2203: predicate.stack_unstack_eliminate 2.03% : 0.000424s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.24% : 0.000260s : 1717: predicate.switch_defer_inline 4.43% : 0.000927s : 5201: predicate.switch_layer_defer_inline 4.30% : 0.000900s : 5262: predicate.switch_simplify 0.77% : 0.000161s : 1199: predicate.tile_eliminate 0.75% : 0.000156s : 1199: predicate.transpose_eliminate 1.10% : 0.000231s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.09% : 0.000228s : 1469: predicate.tuple_list_get_item_const_eliminator 0.99% : 0.000208s : 1469: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000394s : 2495: predicate.tuple_list_get_item_eliminator 1.01% : 0.000210s : 1469: predicate.tuple_list_get_set_item_eliminator 1.70% : 0.000356s : 2304: predicate.tuple_list_set_item_eliminator 1.09% : 0.000228s : 1660: predicate.tuple_to_list_eliminator_ 1.92% : 0.000402s : 2874: predicate.updatestate_pure_node_eliminater 2.51% : 0.000526s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.28% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000057s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.055887 747 68.56% : 0.038313s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.45% : 0.001367s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.00% : 0.016206s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.348233 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.26% : 0.040588s : 1: a1a2 0.00% : 0.000153s : 1: add_cache_embedding 0.00% : 0.000150s : 1: add_comm_op_reuse_tag 0.00% : 0.000734s : 1: add_recomputation 0.00% : 0.000382s : 1: assign_add_opt 0.01% : 0.001932s : 1: auto_monad 0.00% : 0.000304s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.01% : 0.001540s : 1: bootstrap 0.00% : 0.000069s : 1: cconv 0.00% : 0.000146s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.00% : 0.000114s : 1: convert_after_rewriter 0.00% : 0.000302s : 1: cse_after_recomputation 0.00% : 0.000088s : 1: dataset_repeat_opt 0.00% : 0.000388s : 1: distribtued_split 0.01% : 0.001415s : 1: eliminate_special_op_node 0.00% : 0.000106s : 1: environ_conv 0.00% : 0.000016s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000033s : 1: graph_reusing 0.00% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000006s : 1: handle_group_info 0.27% : 0.042122s : 1: inline 0.01% : 0.001300s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000517s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.000965s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032009s : 61: opt.transform.a1a2 0.00% : 0.000169s : 1: opt.transform.loop_unroll_optimizer 0.60% : 0.091436s : 148: opt.transform.opt_a 0.01% : 0.000780s : 1: opt.transform.opt_after_cconv 0.02% : 0.003224s : 27: opt.transform.opt_b 0.24% : 0.036688s : 16: opt.transform.opt_resolve 0.01% : 0.000933s : 1: opt.transform.opt_trans_graph 0.01% : 0.000819s : 6: opt.transform.special_op_eliminate 0.00% : 0.000689s : 4: opt.transform.symbol_engine_opt 3.69% : 0.566331s : 1: opt_a 0.01% : 0.001545s : 1: opt_after_cconv 0.03% : 0.003978s : 1: opt_b 3.78% : 0.580512s : 1: optimize 0.00% : 0.000143s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000087s : 1: order_py_execute_after_rewriter 0.00% : 0.000122s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000150s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000015s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000068s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000201s : 1: parallel-infer-symbol 0.00% : 0.000008s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000096s : 1: pipeline_split 0.00% : 0.000096s : 1: pre_auto_parallel 0.00% : 0.000130s : 1: py_interpret_to_execute 0.00% : 0.000142s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000120s : 1: remove_cast_before_assign_add 0.00% : 0.000594s : 1: remove_dup_value 0.83% : 0.126637s : 3: renormalize.infer 0.34% : 0.051705s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000927s : 1: rewriter_after_opt_a 0.01% : 0.001782s : 2: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000149s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000132s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000790s : 1: symbol_engine_optimizer 84.85% : 13.023139s : 1: task_emit 0.01% : 0.000962s : 1: tuple_transform 4.73% : 0.726172s : 1: type_inference 0.01% : 0.001266s : 1: validate TotalTime = 14.4047, [21] [bootstrap]: 0.00157217 [type_inference]: 0.740308 [auto_monad]: 0.00186314 [graph_reusing]: 2.567e-05 [inline]: 0.0428445, [2] [rewriter_before_opt_a]: 0.00148591 [a1a2]: 0.0413181, [2] [Cycle 1]: 0.028202, [11] [expand_dump_flag]: 3.73301e-05 [switch_simplify]: 0.00111706 [loop_unroll]: 0.00068186 [a_1]: 0.0220559 [recompute_prepare]: 0.00016582 [updatestate_depend_eliminate]: 0.00036467 [updatestate_assign_eliminate]: 8.78701e-05 [updatestate_loads_eliminate]: 0.00020058 [parameter_eliminate]: 4.77e-06 [a_2]: 0.00322344 [parallel_inline_pass]: 0.00010215 [Cycle 2]: 0.00528181, [11] [expand_dump_flag]: 1.42003e-06 [switch_simplify]: 9.33e-05 [loop_unroll]: 9.319e-05 [a_1]: 0.00315228 [recompute_prepare]: 9.845e-05 [updatestate_depend_eliminate]: 7.043e-05 [updatestate_assign_eliminate]: 5.79e-05 [updatestate_loads_eliminate]: 6.097e-05 [parameter_eliminate]: 2.17999e-06 [a_2]: 0.00148323 [parallel_inline_pass]: 9.995e-05 [parallel-infer-symbol]: 0.00016996 [pre_auto_parallel]: 9.332e-05 [insert-virtual-dataset]: 0.00132669 [parallel-infer-symbol-second]: 2.03005e-06 [dataset_repeat_opt]: 6.291e-05 [pipeline_split]: 9.584e-05 [optimize]: 0.599585, [52] [py_interpret_to_execute]: 0.00013989 [rewriter_before_opt_a]: 0.00027559 [opt_a]: 0.585075, [3] [Cycle 1]: 0.502565, [46] [expand_dump_flag]: 1.67999e-06 [switch_simplify]: 0.0001077 [loop_unroll]: 9.945e-05 [a_1]: 0.00328273 [recompute_prepare]: 0.00010342 [updatestate_depend_eliminate]: 9.765e-05 [updatestate_assign_eliminate]: 6.179e-05 [updatestate_loads_eliminate]: 6.40299e-05 [parameter_eliminate]: 2.66999e-06 [a_2]: 0.00155034 [accelerated_algorithm]: 0.00026991 [shard]: 1.97999e-06 [meta_shard_fg_expand]: 4.91e-05 [shard_inline]: 0.00010733 [auto_parallel]: 8.141e-05 [parallel]: 0.0203471 [flash_sp]: 5.927e-05 [merge_comm]: 0.00012727 [allreduce_fusion]: 7.376e-05 [matmul_add_comm_reduction]: 9.941e-05 [allreduce_slice_to_reducescatter]: 4.20026e-07 [virtual_shard_identity]: 0.00012883 [virtual_dataset]: 0.00017665 [get_grad_eliminate_]: 0.00012179 [virtual_output]: 0.0001132 [merge_forward]: 7.616e-05 [cell_reuse_recompute_pass]: 3.46999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022091 [before_grad]: 0.00019839 [inplace_validation]: 0.00012512 [parallel_renormalize]: 0.0222285 [update_top_fg]: 1.03994e-06 [cast_eliminate]: 0.00014657 [meta_fg_expand]: 0.262586 [inplace_validation_after_expand]: 0.0015276 [flash_sp_send_recv_attached]: 0.00120489 [receive_attached]: 8.114e-05 [after_resolve]: 0.00203729 [a_after_grad]: 0.0038423 [special_op_eliminate]: 0.00183348 [renormalize]: 0.148232 [add_forward_monad_depend]: 0.00035679 [auto_monad_grad]: 0.00022063 [auto_monad_eliminator]: 0.00176409 [cse]: 0.00412987 [a_3]: 0.02408 [Cycle 2]: 0.0705121, [46] [expand_dump_flag]: 5.244e-05 [switch_simplify]: 0.00181011 [loop_unroll]: 0.00147295 [a_1]: 0.0311307 [recompute_prepare]: 0.00017586 [updatestate_depend_eliminate]: 0.00023226 [updatestate_assign_eliminate]: 0.00010421 [updatestate_loads_eliminate]: 0.00016343 [parameter_eliminate]: 3.92005e-06 [a_2]: 0.00434293 [accelerated_algorithm]: 0.00016959 [shard]: 2.01003e-06 [meta_shard_fg_expand]: 8.741e-05 [shard_inline]: 0.00014106 [auto_parallel]: 0.00011825 [parallel]: 9.45001e-06 [flash_sp]: 0.0001268 [merge_comm]: 0.00012637 [allreduce_fusion]: 9.643e-05 [matmul_add_comm_reduction]: 0.00011615 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 0.00014752 [virtual_dataset]: 0.00013868 [get_grad_eliminate_]: 0.00013397 [virtual_output]: 0.00013674 [merge_forward]: 9.238e-05 [cell_reuse_recompute_pass]: 2.27999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025593 [before_grad]: 0.00024031 [inplace_validation]: 8.66801e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 6.50063e-07 [cast_eliminate]: 0.00015348 [meta_fg_expand]: 0.00028729 [inplace_validation_after_expand]: 0.00017654 [flash_sp_send_recv_attached]: 1.32993e-06 [receive_attached]: 1.05007e-06 [after_resolve]: 0.00016547 [a_after_grad]: 0.00023195 [special_op_eliminate]: 0.00013714 [renormalize]: 0.0187795 [add_forward_monad_depend]: 5.79993e-06 [auto_monad_grad]: 2.06998e-06 [auto_monad_eliminator]: 0.00028821 [cse]: 0.00716032 [a_3]: 0.0009672 [Cycle 3]: 0.011969, [46] [expand_dump_flag]: 2.07999e-06 [switch_simplify]: 0.00013536 [loop_unroll]: 0.00013196 [a_1]: 0.00433822 [recompute_prepare]: 0.00013675 [updatestate_depend_eliminate]: 0.00015444 [updatestate_assign_eliminate]: 9.65099e-05 [updatestate_loads_eliminate]: 9.46299e-05 [parameter_eliminate]: 4.77e-06 [a_2]: 0.00210539 [accelerated_algorithm]: 0.00016793 [shard]: 1.82004e-06 [meta_shard_fg_expand]: 5.745e-05 [shard_inline]: 0.00013603 [auto_parallel]: 0.0001171 [parallel]: 1.032e-05 [flash_sp]: 2.49001e-06 [merge_comm]: 0.00010862 [allreduce_fusion]: 9.589e-05 [matmul_add_comm_reduction]: 0.00012189 [allreduce_slice_to_reducescatter]: 4.30038e-07 [virtual_shard_identity]: 0.0001394 [virtual_dataset]: 0.00013314 [get_grad_eliminate_]: 0.00012832 [virtual_output]: 0.00013024 [merge_forward]: 9.461e-05 [cell_reuse_recompute_pass]: 3.68e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024976 [before_grad]: 0.00023207 [inplace_validation]: 9.07e-05 [parallel_renormalize]: 6.99656e-08 [update_top_fg]: 1.00001e-06 [cast_eliminate]: 0.00014845 [meta_fg_expand]: 0.00011047 [inplace_validation_after_expand]: 0.0001215 [flash_sp_send_recv_attached]: 1.66008e-06 [receive_attached]: 9.59961e-07 [after_resolve]: 0.00016977 [a_after_grad]: 0.00021846 [special_op_eliminate]: 0.00013101 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 2.13995e-06 [auto_monad_grad]: 2.21003e-06 [auto_monad_eliminator]: 0.00018217 [cse]: 0.00040766 [a_3]: 0.00094211 [py_interpret_to_execute_after_opt_a]: 0.00013974 [slice_cell_reuse_recomputed_activation]: 1.75007e-06 [rewriter_after_opt_a]: 0.00096737 [convert_after_rewriter]: 0.00011153 [order_py_execute_after_rewriter]: 8.06999e-05 [opt_b]: 0.00397182, [1] [Cycle 1]: 0.00396101, [7] [b_1]: 0.00308131 [b_2]: 0.00013837 [updatestate_depend_eliminate]: 0.00010033 [updatestate_assign_eliminate]: 8.741e-05 [updatestate_loads_eliminate]: 9.079e-05 [renormalize]: 4.69969e-07 [cse]: 0.00040593 [optimize_parallel_all_gather_comm]: 0.00014176 [overlap_param_gather]: 1.05996e-06 [cconv]: 6.956e-05 [loop_unroll]: 0.00098888 [opt_after_cconv]: 0.00156714, [1] [Cycle 1]: 0.00156031, [7] [c_1]: 0.00078584 [parameter_eliminate]: 2.34006e-06 [updatestate_depend_eliminate]: 0.00013218 [updatestate_assign_eliminate]: 9.528e-05 [updatestate_loads_eliminate]: 9.221e-05 [cse]: 0.00039741 [renormalize]: 4.89992e-07 [remove_dup_value]: 0.00061753 [tuple_transform]: 0.00094959, [1] [Cycle 1]: 0.00094111, [2] [d_1]: 0.00092379 [renormalize]: 3.50061e-07 [partial_unused_args_eliminate]: 2.45997e-06 [add_cache_embedding]: 0.00015729 [add_recomputation]: 0.00072787 [cse_after_recomputation]: 0.00030638, [1] [Cycle 1]: 0.00029883, [1] [cse]: 0.00028701 [environ_conv]: 8.866e-05 [swap_dp_allreduce_reducescatter]: 0.000129 [bias_add_comm_swap]: 1.85997e-06 [label_micro_interleaved_index]: 1.14995e-06 [label_fine_grained_interleaved_index]: 0.0005294 [merge_cast_opt]: 1.10001e-06 [slice_recompute_activation]: 0.00014635 [micro_interleaved_order_control]: 1.34006e-06 [assign_add_opt]: 0.00039027 [ForceFp32Comm]: 1.02003e-06 [remove_cast_before_assign_add]: 0.00010434 [full_micro_interleaved_order_control]: 1.61002e-06 [reorder_send_recv_between_fp_bp]: 1.06997e-06 [comm_op_add_attrs]: 0.00014743 [add_comm_op_reuse_tag]: 0.00014311 [interleave_split_concat_branches]: 7.20029e-07 [interleave_parallel_branches]: 6.10016e-07 [overlap_opt_shard_in_pipeline]: 9.40997e-06 [overlap_opt_shard_grad_in_pipeline]: 2.21992e-06 [control_data_broadcast_order]: 7.69971e-07 [grouped_pairwise_exchange_alltoall]: 8.65001e-06 [offloading_packed_experts]: 1.53005e-06 [overlap_recompute_and_grad_model_parallel]: 1.33005e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.39937e-07 [overlap_recompute_allgather_and_fa_grad]: 7.22101e-05 [overlap_grad_ring_attention]: 0.000143 [overlap_grad_flash_sp]: 0.00011788 [begin_end_overlap_inline]: 6.10016e-07 [split_matmul_comm_elemetwise]: 1.20001e-06 [split_layernorm_comm]: 1.22003e-06 [handle_group_info]: 3.48e-06 [symbol_engine_optimizer]: 0.0008375, [1] [Cycle 1]: 0.00083088, [6] [build]: 5.34101e-05 [elim_shapecalc]: 0.00014928 [elim_not_effective]: 0.00022048 [opt_reshape]: 0.00013057 [fold_const_symbol]: 0.00023633 [renormalize]: 4.10015e-07 [pipeline_parallel_scheduler]: 2.63005e-06 [auto_monad_reorder]: 0.00029564 [get_jit_bprop_graph]: 3.59956e-07 [rewriter_after_jit_bprop_graph]: 3.10014e-07 [eliminate_special_op_node]: 0.0013899 [distribtued_split]: 0.00035514 [validate]: 0.00028626 [task_emit]: 13.013 [execute]: 8.64e-06 Sums bootstrap : 0.001572s : 0.01% type_inference : 0.740308s : 5.14% auto_monad : 0.001863s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001486s : 0.01% inline.a1a2.expand_dump_flag : 0.000039s : 0.00% inline.a1a2.switch_simplify : 0.001210s : 0.01% inline.a1a2.loop_unroll : 0.000775s : 0.01% inline.a1a2.a_1 : 0.025208s : 0.18% inline.a1a2.recompute_prepare : 0.000264s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000435s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000146s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000262s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.004707s : 0.03% inline.a1a2.parallel_inline_pass : 0.000202s : 0.00% parallel-infer-symbol : 0.000170s : 0.00% pre_auto_parallel : 0.000093s : 0.00% insert-virtual-dataset : 0.001327s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000063s : 0.00% pipeline_split : 0.000096s : 0.00% optimize.py_interpret_to_execute : 0.000140s : 0.00% optimize.rewriter_before_opt_a : 0.000276s : 0.00% optimize.opt_a.expand_dump_flag : 0.000056s : 0.00% optimize.opt_a.switch_simplify : 0.002053s : 0.01% optimize.opt_a.loop_unroll : 0.001704s : 0.01% optimize.opt_a.a_1 : 0.038752s : 0.27% optimize.opt_a.recompute_prepare : 0.000416s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000484s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000263s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000322s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.007999s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000607s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000194s : 0.00% optimize.opt_a.shard_inline : 0.000384s : 0.00% optimize.opt_a.auto_parallel : 0.000317s : 0.00% optimize.opt_a.parallel : 0.020367s : 0.14% optimize.opt_a.flash_sp : 0.000189s : 0.00% optimize.opt_a.merge_comm : 0.000362s : 0.00% optimize.opt_a.allreduce_fusion : 0.000266s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000337s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000416s : 0.00% optimize.opt_a.virtual_dataset : 0.000448s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000384s : 0.00% optimize.opt_a.virtual_output : 0.000380s : 0.00% optimize.opt_a.merge_forward : 0.000263s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000727s : 0.01% optimize.opt_a.before_grad : 0.000671s : 0.00% optimize.opt_a.inplace_validation : 0.000303s : 0.00% optimize.opt_a.parallel_renormalize : 0.022229s : 0.15% optimize.opt_a.update_top_fg : 0.000003s : 0.00% optimize.opt_a.cast_eliminate : 0.000449s : 0.00% optimize.opt_a.meta_fg_expand : 0.262984s : 1.83% optimize.opt_a.inplace_validation_after_expand : 0.001826s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001208s : 0.01% optimize.opt_a.receive_attached : 0.000083s : 0.00% optimize.opt_a.after_resolve : 0.002373s : 0.02% optimize.opt_a.a_after_grad : 0.004293s : 0.03% optimize.opt_a.special_op_eliminate : 0.002102s : 0.01% optimize.opt_a.renormalize : 0.167012s : 1.16% optimize.opt_a.add_forward_monad_depend : 0.000365s : 0.00% optimize.opt_a.auto_monad_grad : 0.000225s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002234s : 0.02% optimize.opt_a.cse : 0.011698s : 0.08% optimize.opt_a.a_3 : 0.025989s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000140s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000967s : 0.01% optimize.convert_after_rewriter : 0.000112s : 0.00% optimize.order_py_execute_after_rewriter : 0.000081s : 0.00% optimize.opt_b.b_1 : 0.003081s : 0.02% optimize.opt_b.b_2 : 0.000138s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000100s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000087s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000406s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000142s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000070s : 0.00% optimize.loop_unroll : 0.000989s : 0.01% optimize.opt_after_cconv.c_1 : 0.000786s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000132s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000095s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.cse : 0.000397s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000618s : 0.00% optimize.tuple_transform.d_1 : 0.000924s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000157s : 0.00% optimize.add_recomputation : 0.000728s : 0.01% optimize.cse_after_recomputation.cse : 0.000287s : 0.00% optimize.environ_conv : 0.000089s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000129s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000529s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000146s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000390s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000104s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000147s : 0.00% optimize.add_comm_op_reuse_tag : 0.000143s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000009s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.00% optimize.overlap_grad_ring_attention : 0.000143s : 0.00% optimize.overlap_grad_flash_sp : 0.000118s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000053s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000149s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000220s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000131s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000236s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000296s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001390s : 0.01% distribtued_split : 0.000355s : 0.00% validate : 0.000286s : 0.00% task_emit : 13.012996s : 90.41% execute : 0.000009s : 0.00% Time group info: ------[substitution.] 0.049418 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.10% : 0.000052s : 7: substitution.addn_zero_filter 0.03% : 0.000013s : 7: substitution.adjust_all_reduce_mul_add 0.58% : 0.000285s : 71: substitution.arithmetic_simplify 0.10% : 0.000050s : 10: substitution.cast_eliminate 0.11% : 0.000053s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000023s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000016s : 12: substitution.environ_get_depend_swap 0.06% : 0.000030s : 27: substitution.environ_get_eliminate 0.07% : 0.000033s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000018s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.02% : 0.000010s : 10: substitution.float_tuple_getitem_switch 0.09% : 0.000046s : 107: substitution.fold_const_symbol 64.38% : 0.031813s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000083s : 126: substitution.graph_param_transform 0.01% : 0.000007s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 24.28% : 0.011997s : 331: substitution.inline 1.38% : 0.000681s : 112: substitution.inline_without_move 0.25% : 0.000122s : 309: substitution.j_node_and_user_rematch 0.29% : 0.000141s : 40: substitution.less_batch_normalization 0.09% : 0.000044s : 90: substitution.load_eliminater 0.10% : 0.000049s : 10: substitution.merge_addn 0.23% : 0.000113s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.03% : 0.000017s : 1: substitution.partial_defer_inline 0.13% : 0.000065s : 23: substitution.partial_eliminate 0.03% : 0.000017s : 26: substitution.reduce_all_const_elim 0.06% : 0.000029s : 15: substitution.reduce_eliminate 0.32% : 0.000160s : 309: substitution.remove_not_recompute_node 2.01% : 0.000991s : 508: substitution.replace_applicator 0.22% : 0.000108s : 251: substitution.replace_old_param 0.08% : 0.000037s : 11: substitution.reshape_eliminate 0.02% : 0.000011s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000009s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000083s : 34: substitution.switch_simplify 0.05% : 0.000025s : 11: substitution.tile_eliminate 0.50% : 0.000248s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000133s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000212s : 107: substitution.tuple_list_get_item_depend_reorder 1.54% : 0.000762s : 308: substitution.tuple_list_get_item_eliminator 0.37% : 0.000183s : 107: substitution.tuple_list_get_set_item_eliminator 0.39% : 0.000191s : 210: substitution.updatestate_pure_node_eliminater 0.67% : 0.000333s : 265: substitution.updatestate_useless_node_eliminater 0.03% : 0.000014s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.739842 2 96.49% : 0.713874s : 1: type_inference.infer 3.51% : 0.025968s : 1: type_inference.specialize ------[replace.] 0.009710 775 0.42% : 0.000041s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000006s : 1: replace.arithmetic_simplify 0.49% : 0.000048s : 7: replace.depend_value_elim 0.46% : 0.000044s : 3: replace.environ_get_set_eliminate 28.00% : 0.002718s : 183: replace.getattr_setattr_resolve 30.22% : 0.002934s : 310: replace.inline 0.22% : 0.000022s : 1: replace.merge_addn 1.18% : 0.000114s : 7: replace.partial_eliminate 4.16% : 0.000404s : 25: replace.replace_applicator 4.22% : 0.000410s : 34: replace.switch_simplify 0.52% : 0.000050s : 6: replace.tuple_list_get_item_depend_reorder 29.69% : 0.002883s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.20% : 0.000019s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041965 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.02% : 0.000010s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 70.20% : 0.029459s : 183: match.getattr_setattr_resolve 28.06% : 0.011774s : 310: match.inline 0.05% : 0.000022s : 1: match.merge_addn 0.09% : 0.000037s : 7: match.partial_eliminate 0.24% : 0.000099s : 25: match.replace_applicator 0.15% : 0.000063s : 34: match.switch_simplify 0.07% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 0.98% : 0.000411s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000013s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020452131318 0.77% : 0.000157s : 1198: predicate.accumulaten_eliminater 0.28% : 0.000058s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000113s : 835: predicate.addn_check_dump 0.78% : 0.000159s : 1198: predicate.addn_zero_filter 0.78% : 0.000160s : 1198: predicate.adjust_all_reduce_mul_add 1.80% : 0.000369s : 2034: predicate.arithmetic_simplify 1.13% : 0.000230s : 1586: predicate.cast_eliminate 3.09% : 0.000632s : 3484: predicate.check_bprop_eliminate 0.56% : 0.000114s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.16% : 0.000238s : 1399: predicate.convert_tensor_eliminate 0.58% : 0.000118s : 838: predicate.depend_value_elim 0.83% : 0.000170s : 1202: predicate.dict_get_item_const_eliminator 0.86% : 0.000175s : 1202: predicate.dict_get_item_eliminator 0.84% : 0.000171s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 126: predicate.elim_not_effective 0.11% : 0.000023s : 126: predicate.elim_shapecalc_of_broadcastargs 0.88% : 0.000180s : 1334: predicate.environ_add_const_eliminate 0.88% : 0.000181s : 1337: predicate.environ_get_add_eliminate 0.93% : 0.000189s : 1334: predicate.environ_get_depend_swap 1.44% : 0.000295s : 2172: predicate.environ_get_eliminate 0.89% : 0.000182s : 1337: predicate.environ_get_set_eliminate 1.13% : 0.000232s : 1717: predicate.exchange_switch_depend_value 1.44% : 0.000295s : 1717: predicate.float_depend_g_call 0.55% : 0.000113s : 835: predicate.float_environ_get_switch 0.65% : 0.000133s : 970: predicate.float_tuple_getitem_switch 0.05% : 0.000010s : 126: predicate.fold_const_symbol 0.29% : 0.000060s : 395: predicate.get_grad_eliminate 2.35% : 0.000481s : 1893: predicate.getattr_setattr_resolve 0.06% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000113s : 835: predicate.incorporate_call 0.55% : 0.000112s : 835: predicate.incorporate_call_switch 3.94% : 0.000805s : 4602: predicate.inline 2.33% : 0.000477s : 2203: predicate.inline_without_move 0.15% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.37% : 0.000076s : 388: predicate.less_batch_normalization 1.13% : 0.000232s : 1660: predicate.list_to_tuple_eliminator_ 1.91% : 0.000390s : 2874: predicate.load_eliminater 0.21% : 0.000042s : 135: predicate.loop_unroll_after_grad 2.37% : 0.000484s : 2640: predicate.loop_unroll_before_grad 0.97% : 0.000198s : 1478: predicate.make_slice_get_slice_eliminator 0.58% : 0.000118s : 837: predicate.merge_addn 2.99% : 0.000611s : 3380: predicate.micro_step_allgather_replace 3.09% : 0.000631s : 3380: predicate.mini_step_allgather_replace 0.78% : 0.000159s : 1199: predicate.minmaximum_grad 0.18% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.11% : 0.000022s : 135: predicate.parallel_virtual_node 2.08% : 0.000426s : 1717: predicate.partial_defer_inline 1.12% : 0.000229s : 1541: predicate.partial_eliminate 0.79% : 0.000163s : 1198: predicate.print_const_string_wrapper 0.57% : 0.000116s : 824: predicate.reduce_all_const_elim 0.96% : 0.000197s : 1199: predicate.reduce_eliminate 0.14% : 0.000029s : 395: predicate.remove_not_recompute_node 1.97% : 0.000403s : 4829: predicate.replace_applicator 0.81% : 0.000165s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.79% : 0.000161s : 1199: predicate.reshape_eliminate 3.05% : 0.000625s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000023s : 135: predicate.row_tensor_eliminate 3.26% : 0.000666s : 3484: predicate.same_eliminate 0.24% : 0.000050s : 633: predicate.set_cell_output_no_recompute 0.31% : 0.000064s : 395: predicate.shard_identity_eliminate 2.12% : 0.000434s : 2338: predicate.special_op_eliminate 0.64% : 0.000131s : 837: predicate.specialize_transform 3.30% : 0.000676s : 3380: predicate.split_environ_get_set_with_tuple_value 1.59% : 0.000326s : 2203: predicate.stack_unstack_eliminate 1.95% : 0.000399s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.27% : 0.000259s : 1717: predicate.switch_defer_inline 4.30% : 0.000880s : 5201: predicate.switch_layer_defer_inline 4.45% : 0.000909s : 5262: predicate.switch_simplify 0.80% : 0.000164s : 1199: predicate.tile_eliminate 0.75% : 0.000154s : 1199: predicate.transpose_eliminate 1.10% : 0.000224s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.12% : 0.000229s : 1469: predicate.tuple_list_get_item_const_eliminator 0.98% : 0.000200s : 1469: predicate.tuple_list_get_item_depend_reorder 1.93% : 0.000394s : 2495: predicate.tuple_list_get_item_eliminator 1.04% : 0.000213s : 1469: predicate.tuple_list_get_set_item_eliminator 1.71% : 0.000349s : 2304: predicate.tuple_list_set_item_eliminator 1.09% : 0.000224s : 1660: predicate.tuple_to_list_eliminator_ 1.90% : 0.000389s : 2874: predicate.updatestate_pure_node_eliminater 2.58% : 0.000527s : 3710: predicate.updatestate_useless_node_eliminater 0.12% : 0.000024s : 135: predicate.value_based_eliminate 0.29% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000058s : 395: predicate.virtual_output_eliminate 0.11% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.057693 747 67.89% : 0.039166s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.56% : 0.001478s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.55% : 0.017049s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.402427 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041323s : 1: a1a2 0.00% : 0.000177s : 1: add_cache_embedding 0.00% : 0.000150s : 1: add_comm_op_reuse_tag 0.00% : 0.000739s : 1: add_recomputation 0.00% : 0.000401s : 1: assign_add_opt 0.01% : 0.001884s : 1: auto_monad 0.00% : 0.000307s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.01% : 0.001616s : 1: bootstrap 0.00% : 0.000076s : 1: cconv 0.00% : 0.000155s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000119s : 1: convert_after_rewriter 0.00% : 0.000311s : 1: cse_after_recomputation 0.00% : 0.000071s : 1: dataset_repeat_opt 0.00% : 0.000368s : 1: distribtued_split 0.01% : 0.001404s : 1: eliminate_special_op_node 0.00% : 0.000097s : 1: environ_conv 0.00% : 0.000016s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000006s : 1: handle_group_info 0.28% : 0.042856s : 1: inline 0.01% : 0.001346s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000538s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.001001s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.21% : 0.032299s : 61: opt.transform.a1a2 0.00% : 0.000178s : 1: opt.transform.loop_unroll_optimizer 0.58% : 0.089892s : 148: opt.transform.opt_a 0.01% : 0.000783s : 1: opt.transform.opt_after_cconv 0.02% : 0.003188s : 27: opt.transform.opt_b 0.24% : 0.037484s : 16: opt.transform.opt_resolve 0.01% : 0.000921s : 1: opt.transform.opt_trans_graph 0.01% : 0.000838s : 6: opt.transform.special_op_eliminate 0.00% : 0.000730s : 4: opt.transform.symbol_engine_opt 3.80% : 0.585082s : 1: opt_a 0.01% : 0.001572s : 1: opt_after_cconv 0.03% : 0.003976s : 1: opt_b 3.89% : 0.599597s : 1: optimize 0.00% : 0.000150s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000087s : 1: order_py_execute_after_rewriter 0.00% : 0.000123s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000149s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000013s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000078s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000180s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000105s : 1: pipeline_split 0.00% : 0.000103s : 1: pre_auto_parallel 0.00% : 0.000147s : 1: py_interpret_to_execute 0.00% : 0.000148s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000110s : 1: remove_cast_before_assign_add 0.00% : 0.000631s : 1: remove_dup_value 0.88% : 0.134775s : 3: renormalize.infer 0.35% : 0.054419s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000978s : 1: rewriter_after_opt_a 0.01% : 0.001783s : 2: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000153s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000136s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000841s : 1: symbol_engine_optimizer 84.49% : 13.013037s : 1: task_emit 0.01% : 0.000955s : 1: tuple_transform 4.81% : 0.740345s : 1: type_inference 0.01% : 0.001311s : 1: validate TotalTime = 14.2767, [21] [bootstrap]: 0.00151572 [type_inference]: 0.726766 [auto_monad]: 0.00200785 [graph_reusing]: 2.477e-05 [inline]: 0.0424369, [2] [rewriter_before_opt_a]: 0.00154538 [a1a2]: 0.0408331, [2] [Cycle 1]: 0.0280579, [11] [expand_dump_flag]: 3.216e-05 [switch_simplify]: 0.00106107 [loop_unroll]: 0.00067694 [a_1]: 0.0219713 [recompute_prepare]: 0.00016247 [updatestate_depend_eliminate]: 0.00036057 [updatestate_assign_eliminate]: 0.00010719 [updatestate_loads_eliminate]: 0.0001992 [parameter_eliminate]: 5.09003e-06 [a_2]: 0.00321601 [parallel_inline_pass]: 0.00010085 [Cycle 2]: 0.00532375, [11] [expand_dump_flag]: 1.15996e-06 [switch_simplify]: 9.323e-05 [loop_unroll]: 9.144e-05 [a_1]: 0.00319836 [recompute_prepare]: 0.00010038 [updatestate_depend_eliminate]: 7.236e-05 [updatestate_assign_eliminate]: 5.93499e-05 [updatestate_loads_eliminate]: 6.27501e-05 [parameter_eliminate]: 2.22004e-06 [a_2]: 0.00147808 [parallel_inline_pass]: 9.89401e-05 [parallel-infer-symbol]: 0.00016936 [pre_auto_parallel]: 8.732e-05 [insert-virtual-dataset]: 0.00128687 [parallel-infer-symbol-second]: 1.80001e-06 [dataset_repeat_opt]: 0.00012627 [pipeline_split]: 8.64901e-05 [optimize]: 0.582038, [52] [py_interpret_to_execute]: 0.0001286 [rewriter_before_opt_a]: 0.00028315 [opt_a]: 0.567654, [3] [Cycle 1]: 0.486916, [46] [expand_dump_flag]: 1.59e-06 [switch_simplify]: 0.00011053 [loop_unroll]: 9.62899e-05 [a_1]: 0.00330856 [recompute_prepare]: 0.00010133 [updatestate_depend_eliminate]: 9.88101e-05 [updatestate_assign_eliminate]: 6.179e-05 [updatestate_loads_eliminate]: 6.357e-05 [parameter_eliminate]: 2.90002e-06 [a_2]: 0.00158086 [accelerated_algorithm]: 0.00022174 [shard]: 1.77999e-06 [meta_shard_fg_expand]: 4.89e-05 [shard_inline]: 0.00010404 [auto_parallel]: 7.152e-05 [parallel]: 0.0151397 [flash_sp]: 6.668e-05 [merge_comm]: 0.00012505 [allreduce_fusion]: 7.434e-05 [matmul_add_comm_reduction]: 9.87101e-05 [allreduce_slice_to_reducescatter]: 4.39934e-07 [virtual_shard_identity]: 0.00012752 [virtual_dataset]: 0.00016471 [get_grad_eliminate_]: 0.00011443 [virtual_output]: 0.00011254 [merge_forward]: 7.835e-05 [cell_reuse_recompute_pass]: 3.24997e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021952 [before_grad]: 0.00021124 [inplace_validation]: 0.00012696 [parallel_renormalize]: 0.0214736 [update_top_fg]: 6.10016e-07 [cast_eliminate]: 0.00014487 [meta_fg_expand]: 0.258799 [inplace_validation_after_expand]: 0.00157633 [flash_sp_send_recv_attached]: 0.00120676 [receive_attached]: 7.96301e-05 [after_resolve]: 0.00203907 [a_after_grad]: 0.00390785 [special_op_eliminate]: 0.00181981 [renormalize]: 0.141032 [add_forward_monad_depend]: 0.0003688 [auto_monad_grad]: 0.00021256 [auto_monad_eliminator]: 0.00181615 [cse]: 0.00416715 [a_3]: 0.0252182 [Cycle 2]: 0.0689149, [46] [expand_dump_flag]: 5.124e-05 [switch_simplify]: 0.00183585 [loop_unroll]: 0.00150179 [a_1]: 0.0308952 [recompute_prepare]: 0.00017493 [updatestate_depend_eliminate]: 0.00022722 [updatestate_assign_eliminate]: 0.00010159 [updatestate_loads_eliminate]: 0.00015787 [parameter_eliminate]: 3.10002e-06 [a_2]: 0.00441359 [accelerated_algorithm]: 0.00016241 [shard]: 2.15997e-06 [meta_shard_fg_expand]: 7.998e-05 [shard_inline]: 0.00014027 [auto_parallel]: 0.0001138 [parallel]: 1.00899e-05 [flash_sp]: 0.00012179 [merge_comm]: 0.00010953 [allreduce_fusion]: 9.23401e-05 [matmul_add_comm_reduction]: 0.00013896 [allreduce_slice_to_reducescatter]: 3.69968e-07 [virtual_shard_identity]: 0.0001418 [virtual_dataset]: 0.00013635 [get_grad_eliminate_]: 0.00013162 [virtual_output]: 0.00013866 [merge_forward]: 9.032e-05 [cell_reuse_recompute_pass]: 2.51003e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025428 [before_grad]: 0.00024725 [inplace_validation]: 8.50799e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 5.79981e-07 [cast_eliminate]: 0.00015102 [meta_fg_expand]: 0.00027698 [inplace_validation_after_expand]: 0.00017691 [flash_sp_send_recv_attached]: 1.75997e-06 [receive_attached]: 1.59e-06 [after_resolve]: 0.00015789 [a_after_grad]: 0.00022721 [special_op_eliminate]: 0.0001348 [renormalize]: 0.0178518 [add_forward_monad_depend]: 4.83997e-06 [auto_monad_grad]: 1.99e-06 [auto_monad_eliminator]: 0.00028357 [cse]: 0.00668229 [a_3]: 0.0009902 [Cycle 3]: 0.0117999, [46] [expand_dump_flag]: 2.14006e-06 [switch_simplify]: 0.00013222 [loop_unroll]: 0.00012906 [a_1]: 0.0042803 [recompute_prepare]: 0.00013598 [updatestate_depend_eliminate]: 0.00014746 [updatestate_assign_eliminate]: 9.303e-05 [updatestate_loads_eliminate]: 8.965e-05 [parameter_eliminate]: 3.03006e-06 [a_2]: 0.00209811 [accelerated_algorithm]: 0.00015717 [shard]: 1.51002e-06 [meta_shard_fg_expand]: 5.18101e-05 [shard_inline]: 0.00015046 [auto_parallel]: 0.00011251 [parallel]: 9.47004e-06 [flash_sp]: 2.51003e-06 [merge_comm]: 0.00010468 [allreduce_fusion]: 9.35e-05 [matmul_add_comm_reduction]: 0.00011861 [allreduce_slice_to_reducescatter]: 4.59957e-07 [virtual_shard_identity]: 0.00013816 [virtual_dataset]: 0.00013254 [get_grad_eliminate_]: 0.00012631 [virtual_output]: 0.00012951 [merge_forward]: 9.14499e-05 [cell_reuse_recompute_pass]: 3.11004e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0002508 [before_grad]: 0.00023369 [inplace_validation]: 8.751e-05 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 5.40051e-07 [cast_eliminate]: 0.00014536 [meta_fg_expand]: 0.00011272 [inplace_validation_after_expand]: 0.00011545 [flash_sp_send_recv_attached]: 1.64995e-06 [receive_attached]: 1.49e-06 [after_resolve]: 0.00014571 [a_after_grad]: 0.00021758 [special_op_eliminate]: 0.0001288 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 2.02004e-06 [auto_monad_grad]: 1.77999e-06 [auto_monad_eliminator]: 0.00016879 [cse]: 0.00039972 [a_3]: 0.00096587 [py_interpret_to_execute_after_opt_a]: 0.00013817 [slice_cell_reuse_recomputed_activation]: 1.71002e-06 [rewriter_after_opt_a]: 0.0009902 [convert_after_rewriter]: 0.0001089 [order_py_execute_after_rewriter]: 8.02e-05 [opt_b]: 0.00400615, [1] [Cycle 1]: 0.00399863, [7] [b_1]: 0.00314358 [b_2]: 0.00013533 [updatestate_depend_eliminate]: 9.70201e-05 [updatestate_assign_eliminate]: 8.64901e-05 [updatestate_loads_eliminate]: 8.871e-05 [renormalize]: 3.00002e-07 [cse]: 0.00039372 [optimize_parallel_all_gather_comm]: 0.00013336 [overlap_param_gather]: 9.59961e-07 [cconv]: 6.29401e-05 [loop_unroll]: 0.00091297 [opt_after_cconv]: 0.00159176, [1] [Cycle 1]: 0.00158538, [7] [c_1]: 0.00077941 [parameter_eliminate]: 1.60991e-06 [updatestate_depend_eliminate]: 0.00012796 [updatestate_assign_eliminate]: 9.247e-05 [updatestate_loads_eliminate]: 9.18e-05 [cse]: 0.00043955 [renormalize]: 5.20027e-07 [remove_dup_value]: 0.0005955 [tuple_transform]: 0.0009337, [1] [Cycle 1]: 0.00092721, [2] [d_1]: 0.00091116 [renormalize]: 2.60072e-07 [partial_unused_args_eliminate]: 2.29001e-06 [add_cache_embedding]: 0.0001489 [add_recomputation]: 0.00068832 [cse_after_recomputation]: 0.00030305, [1] [Cycle 1]: 0.00029564, [1] [cse]: 0.00028425 [environ_conv]: 8.596e-05 [swap_dp_allreduce_reducescatter]: 0.00012306 [bias_add_comm_swap]: 2.10002e-06 [label_micro_interleaved_index]: 1.11992e-06 [label_fine_grained_interleaved_index]: 0.0005104 [merge_cast_opt]: 9.49949e-07 [slice_recompute_activation]: 0.00015661 [micro_interleaved_order_control]: 1.50991e-06 [assign_add_opt]: 0.00037516 [ForceFp32Comm]: 9.59961e-07 [remove_cast_before_assign_add]: 0.0001034 [full_micro_interleaved_order_control]: 1.43005e-06 [reorder_send_recv_between_fp_bp]: 1.0801e-06 [comm_op_add_attrs]: 0.00014105 [add_comm_op_reuse_tag]: 0.00014444 [interleave_split_concat_branches]: 6.20028e-07 [interleave_parallel_branches]: 5.30039e-07 [overlap_opt_shard_in_pipeline]: 2.89901e-05 [overlap_opt_shard_grad_in_pipeline]: 2.14006e-06 [control_data_broadcast_order]: 7.49948e-07 [grouped_pairwise_exchange_alltoall]: 8.74e-06 [offloading_packed_experts]: 1.67999e-06 [overlap_recompute_and_grad_model_parallel]: 1.14006e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.7998e-07 [overlap_recompute_allgather_and_fa_grad]: 7.058e-05 [overlap_grad_ring_attention]: 0.00014172 [overlap_grad_flash_sp]: 0.00011746 [begin_end_overlap_inline]: 5.59958e-07 [split_matmul_comm_elemetwise]: 1.13004e-06 [split_layernorm_comm]: 1.64006e-06 [handle_group_info]: 3.36999e-06 [symbol_engine_optimizer]: 0.00080983, [1] [Cycle 1]: 0.00080363, [6] [build]: 5.236e-05 [elim_shapecalc]: 0.00014187 [elim_not_effective]: 0.00022673 [opt_reshape]: 0.0001296 [fold_const_symbol]: 0.0002138 [renormalize]: 3.7998e-07 [pipeline_parallel_scheduler]: 2.36009e-06 [auto_monad_reorder]: 0.00031757 [get_jit_bprop_graph]: 3.69968e-07 [rewriter_after_jit_bprop_graph]: 3.30037e-07 [eliminate_special_op_node]: 0.00136177 [distribtued_split]: 0.00038024 [validate]: 0.00027238 [task_emit]: 12.9164 [execute]: 8.68004e-06 Sums bootstrap : 0.001516s : 0.01% type_inference : 0.726766s : 5.09% auto_monad : 0.002008s : 0.01% graph_reusing : 0.000025s : 0.00% inline.rewriter_before_opt_a : 0.001545s : 0.01% inline.a1a2.expand_dump_flag : 0.000033s : 0.00% inline.a1a2.switch_simplify : 0.001154s : 0.01% inline.a1a2.loop_unroll : 0.000768s : 0.01% inline.a1a2.a_1 : 0.025170s : 0.18% inline.a1a2.recompute_prepare : 0.000263s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000433s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000167s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000262s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.004694s : 0.03% inline.a1a2.parallel_inline_pass : 0.000200s : 0.00% parallel-infer-symbol : 0.000169s : 0.00% pre_auto_parallel : 0.000087s : 0.00% insert-virtual-dataset : 0.001287s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000126s : 0.00% pipeline_split : 0.000086s : 0.00% optimize.py_interpret_to_execute : 0.000129s : 0.00% optimize.rewriter_before_opt_a : 0.000283s : 0.00% optimize.opt_a.expand_dump_flag : 0.000055s : 0.00% optimize.opt_a.switch_simplify : 0.002079s : 0.01% optimize.opt_a.loop_unroll : 0.001727s : 0.01% optimize.opt_a.a_1 : 0.038484s : 0.27% optimize.opt_a.recompute_prepare : 0.000412s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000473s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000256s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000311s : 0.00% optimize.opt_a.parameter_eliminate : 0.000009s : 0.00% optimize.opt_a.a_2 : 0.008093s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000541s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000181s : 0.00% optimize.opt_a.shard_inline : 0.000395s : 0.00% optimize.opt_a.auto_parallel : 0.000298s : 0.00% optimize.opt_a.parallel : 0.015159s : 0.11% optimize.opt_a.flash_sp : 0.000191s : 0.00% optimize.opt_a.merge_comm : 0.000339s : 0.00% optimize.opt_a.allreduce_fusion : 0.000260s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000356s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000407s : 0.00% optimize.opt_a.virtual_dataset : 0.000434s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000372s : 0.00% optimize.opt_a.virtual_output : 0.000381s : 0.00% optimize.opt_a.merge_forward : 0.000260s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000725s : 0.01% optimize.opt_a.before_grad : 0.000692s : 0.00% optimize.opt_a.inplace_validation : 0.000300s : 0.00% optimize.opt_a.parallel_renormalize : 0.021474s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000441s : 0.00% optimize.opt_a.meta_fg_expand : 0.259189s : 1.82% optimize.opt_a.inplace_validation_after_expand : 0.001869s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001210s : 0.01% optimize.opt_a.receive_attached : 0.000083s : 0.00% optimize.opt_a.after_resolve : 0.002343s : 0.02% optimize.opt_a.a_after_grad : 0.004353s : 0.03% optimize.opt_a.special_op_eliminate : 0.002083s : 0.01% optimize.opt_a.renormalize : 0.158884s : 1.11% optimize.opt_a.add_forward_monad_depend : 0.000376s : 0.00% optimize.opt_a.auto_monad_grad : 0.000216s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002269s : 0.02% optimize.opt_a.cse : 0.011249s : 0.08% optimize.opt_a.a_3 : 0.027174s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000138s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000990s : 0.01% optimize.convert_after_rewriter : 0.000109s : 0.00% optimize.order_py_execute_after_rewriter : 0.000080s : 0.00% optimize.opt_b.b_1 : 0.003144s : 0.02% optimize.opt_b.b_2 : 0.000135s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000097s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000086s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000089s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000394s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000133s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000063s : 0.00% optimize.loop_unroll : 0.000913s : 0.01% optimize.opt_after_cconv.c_1 : 0.000779s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000128s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.cse : 0.000440s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000595s : 0.00% optimize.tuple_transform.d_1 : 0.000911s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000149s : 0.00% optimize.add_recomputation : 0.000688s : 0.00% optimize.cse_after_recomputation.cse : 0.000284s : 0.00% optimize.environ_conv : 0.000086s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000123s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000510s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000157s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000375s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000103s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000141s : 0.00% optimize.add_comm_op_reuse_tag : 0.000144s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000029s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000071s : 0.00% optimize.overlap_grad_ring_attention : 0.000142s : 0.00% optimize.overlap_grad_flash_sp : 0.000117s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000052s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000142s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000227s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000214s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000318s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001362s : 0.01% distribtued_split : 0.000380s : 0.00% validate : 0.000272s : 0.00% task_emit : 12.916424s : 90.54% execute : 0.000009s : 0.00% Time group info: ------[substitution.] 0.047381 4298 0.04% : 0.000019s : 5: substitution.ad_related_special_op_eliminate 0.05% : 0.000022s : 9: substitution.addn_check_dump 0.10% : 0.000049s : 7: substitution.addn_zero_filter 0.03% : 0.000013s : 7: substitution.adjust_all_reduce_mul_add 0.61% : 0.000287s : 71: substitution.arithmetic_simplify 0.10% : 0.000048s : 10: substitution.cast_eliminate 0.11% : 0.000053s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000023s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000016s : 12: substitution.environ_get_depend_swap 0.05% : 0.000025s : 27: substitution.environ_get_eliminate 0.07% : 0.000034s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000018s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.02% : 0.000010s : 10: substitution.float_tuple_getitem_switch 0.07% : 0.000031s : 107: substitution.fold_const_symbol 63.73% : 0.030197s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000083s : 126: substitution.graph_param_transform 0.02% : 0.000007s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 24.50% : 0.011609s : 331: substitution.inline 1.48% : 0.000700s : 112: substitution.inline_without_move 0.27% : 0.000126s : 309: substitution.j_node_and_user_rematch 0.25% : 0.000118s : 40: substitution.less_batch_normalization 0.10% : 0.000046s : 90: substitution.load_eliminater 0.10% : 0.000047s : 10: substitution.merge_addn 0.23% : 0.000108s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.07% : 0.000031s : 1: substitution.partial_defer_inline 0.13% : 0.000061s : 23: substitution.partial_eliminate 0.03% : 0.000015s : 26: substitution.reduce_all_const_elim 0.06% : 0.000030s : 15: substitution.reduce_eliminate 0.34% : 0.000160s : 309: substitution.remove_not_recompute_node 2.09% : 0.000988s : 508: substitution.replace_applicator 0.26% : 0.000122s : 251: substitution.replace_old_param 0.08% : 0.000037s : 11: substitution.reshape_eliminate 0.02% : 0.000011s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000009s : 4: substitution.specialize_transform 0.03% : 0.000015s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000078s : 34: substitution.switch_simplify 0.06% : 0.000027s : 11: substitution.tile_eliminate 0.59% : 0.000281s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000126s : 107: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000199s : 107: substitution.tuple_list_get_item_depend_reorder 1.60% : 0.000757s : 308: substitution.tuple_list_get_item_eliminator 0.36% : 0.000171s : 107: substitution.tuple_list_get_set_item_eliminator 0.40% : 0.000192s : 210: substitution.updatestate_pure_node_eliminater 0.69% : 0.000325s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.726312 2 96.57% : 0.701378s : 1: type_inference.infer 3.43% : 0.024935s : 1: type_inference.specialize ------[replace.] 0.009526 775 0.41% : 0.000039s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000006s : 1: replace.arithmetic_simplify 0.48% : 0.000046s : 7: replace.depend_value_elim 0.43% : 0.000041s : 3: replace.environ_get_set_eliminate 27.56% : 0.002625s : 183: replace.getattr_setattr_resolve 30.94% : 0.002947s : 310: replace.inline 0.22% : 0.000021s : 1: replace.merge_addn 1.20% : 0.000114s : 7: replace.partial_eliminate 4.16% : 0.000396s : 25: replace.replace_applicator 3.94% : 0.000375s : 34: replace.switch_simplify 0.55% : 0.000052s : 6: replace.tuple_list_get_item_depend_reorder 29.70% : 0.002829s : 191: replace.tuple_list_get_item_eliminator 0.17% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.19% : 0.000018s : 1: replace.virtual_dataset_eliminate ------[match.] 0.040172 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000010s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 69.85% : 0.028061s : 183: match.getattr_setattr_resolve 28.34% : 0.011385s : 310: match.inline 0.05% : 0.000020s : 1: match.merge_addn 0.09% : 0.000036s : 7: match.partial_eliminate 0.25% : 0.000100s : 25: match.replace_applicator 0.15% : 0.000059s : 34: match.switch_simplify 0.08% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.04% : 0.000419s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.021575131318 0.75% : 0.000161s : 1198: predicate.accumulaten_eliminater 0.26% : 0.000056s : 254: predicate.ad_related_special_op_eliminate 0.58% : 0.000124s : 835: predicate.addn_check_dump 0.74% : 0.000160s : 1198: predicate.addn_zero_filter 0.71% : 0.000154s : 1198: predicate.adjust_all_reduce_mul_add 1.76% : 0.000379s : 2034: predicate.arithmetic_simplify 1.09% : 0.000235s : 1586: predicate.cast_eliminate 3.52% : 0.000760s : 3484: predicate.check_bprop_eliminate 0.58% : 0.000125s : 835: predicate.compare_switch_simplify 0.07% : 0.000015s : 135: predicate.const_output_eliminate 0.16% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.06% : 0.000228s : 1399: predicate.convert_tensor_eliminate 0.60% : 0.000128s : 838: predicate.depend_value_elim 0.79% : 0.000169s : 1202: predicate.dict_get_item_const_eliminator 0.95% : 0.000205s : 1202: predicate.dict_get_item_eliminator 0.78% : 0.000169s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.82% : 0.000177s : 1334: predicate.environ_add_const_eliminate 0.82% : 0.000177s : 1337: predicate.environ_get_add_eliminate 0.82% : 0.000177s : 1334: predicate.environ_get_depend_swap 1.43% : 0.000309s : 2172: predicate.environ_get_eliminate 0.82% : 0.000176s : 1337: predicate.environ_get_set_eliminate 1.09% : 0.000234s : 1717: predicate.exchange_switch_depend_value 1.33% : 0.000288s : 1717: predicate.float_depend_g_call 0.58% : 0.000125s : 835: predicate.float_environ_get_switch 0.68% : 0.000147s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.27% : 0.000058s : 395: predicate.get_grad_eliminate 2.17% : 0.000468s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.59% : 0.000126s : 835: predicate.incorporate_call 0.58% : 0.000125s : 835: predicate.incorporate_call_switch 3.97% : 0.000855s : 4602: predicate.inline 2.23% : 0.000481s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.31% : 0.000066s : 388: predicate.less_batch_normalization 1.06% : 0.000229s : 1660: predicate.list_to_tuple_eliminator_ 1.78% : 0.000385s : 2874: predicate.load_eliminater 0.18% : 0.000038s : 135: predicate.loop_unroll_after_grad 2.28% : 0.000492s : 2640: predicate.loop_unroll_before_grad 0.96% : 0.000208s : 1478: predicate.make_slice_get_slice_eliminator 0.59% : 0.000127s : 837: predicate.merge_addn 3.29% : 0.000711s : 3380: predicate.micro_step_allgather_replace 3.31% : 0.000714s : 3380: predicate.mini_step_allgather_replace 0.74% : 0.000160s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.12% : 0.000025s : 135: predicate.parallel_virtual_node 1.97% : 0.000425s : 1717: predicate.partial_defer_inline 1.05% : 0.000226s : 1541: predicate.partial_eliminate 0.73% : 0.000157s : 1198: predicate.print_const_string_wrapper 0.59% : 0.000127s : 824: predicate.reduce_all_const_elim 0.92% : 0.000199s : 1199: predicate.reduce_eliminate 0.18% : 0.000039s : 395: predicate.remove_not_recompute_node 2.44% : 0.000527s : 4829: predicate.replace_applicator 0.78% : 0.000167s : 2203: predicate.replace_old_param 0.07% : 0.000015s : 135: predicate.reset_defer_inline 0.89% : 0.000192s : 1199: predicate.reshape_eliminate 3.34% : 0.000721s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000021s : 135: predicate.row_tensor_eliminate 3.52% : 0.000759s : 3484: predicate.same_eliminate 0.23% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.28% : 0.000060s : 395: predicate.shard_identity_eliminate 2.04% : 0.000439s : 2338: predicate.special_op_eliminate 0.65% : 0.000141s : 837: predicate.specialize_transform 3.68% : 0.000794s : 3380: predicate.split_environ_get_set_with_tuple_value 1.53% : 0.000331s : 2203: predicate.stack_unstack_eliminate 1.81% : 0.000391s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.35% : 0.000292s : 1717: predicate.switch_defer_inline 4.55% : 0.000982s : 5201: predicate.switch_layer_defer_inline 4.31% : 0.000930s : 5262: predicate.switch_simplify 0.80% : 0.000173s : 1199: predicate.tile_eliminate 0.73% : 0.000158s : 1199: predicate.transpose_eliminate 1.07% : 0.000230s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.03% : 0.000223s : 1469: predicate.tuple_list_get_item_const_eliminator 0.91% : 0.000196s : 1469: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000405s : 2495: predicate.tuple_list_get_item_eliminator 0.96% : 0.000206s : 1469: predicate.tuple_list_get_set_item_eliminator 1.66% : 0.000359s : 2304: predicate.tuple_list_set_item_eliminator 1.05% : 0.000227s : 1660: predicate.tuple_to_list_eliminator_ 1.79% : 0.000387s : 2874: predicate.updatestate_pure_node_eliminater 2.42% : 0.000523s : 3710: predicate.updatestate_useless_node_eliminater 0.11% : 0.000024s : 135: predicate.value_based_eliminate 0.28% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.26% : 0.000057s : 395: predicate.virtual_output_eliminate 0.11% : 0.000024s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.056499 747 68.23% : 0.038548s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.48% : 0.001403s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.29% : 0.016548s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.246603 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.040838s : 1: a1a2 0.00% : 0.000156s : 1: add_cache_embedding 0.00% : 0.000151s : 1: add_comm_op_reuse_tag 0.00% : 0.000699s : 1: add_recomputation 0.00% : 0.000384s : 1: assign_add_opt 0.01% : 0.002030s : 1: auto_monad 0.00% : 0.000330s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001550s : 1: bootstrap 0.00% : 0.000069s : 1: cconv 0.00% : 0.000148s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000116s : 1: convert_after_rewriter 0.00% : 0.000308s : 1: cse_after_recomputation 0.00% : 0.000135s : 1: dataset_repeat_opt 0.00% : 0.000394s : 1: distribtued_split 0.01% : 0.001375s : 1: eliminate_special_op_node 0.00% : 0.000094s : 1: environ_conv 0.00% : 0.000017s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000033s : 1: graph_reusing 0.00% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000006s : 1: handle_group_info 0.28% : 0.042447s : 1: inline 0.01% : 0.001307s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000518s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.000923s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032186s : 61: opt.transform.a1a2 0.00% : 0.000173s : 1: opt.transform.loop_unroll_optimizer 0.60% : 0.090909s : 148: opt.transform.opt_a 0.01% : 0.000777s : 1: opt.transform.opt_after_cconv 0.02% : 0.003250s : 27: opt.transform.opt_b 0.23% : 0.035636s : 16: opt.transform.opt_resolve 0.01% : 0.000909s : 1: opt.transform.opt_trans_graph 0.01% : 0.000826s : 6: opt.transform.special_op_eliminate 0.00% : 0.000706s : 4: opt.transform.symbol_engine_opt 3.72% : 0.567660s : 1: opt_a 0.01% : 0.001598s : 1: opt_after_cconv 0.03% : 0.004010s : 1: opt_b 3.82% : 0.582049s : 1: optimize 0.00% : 0.000140s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000085s : 1: order_py_execute_after_rewriter 0.00% : 0.000122s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000147s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000033s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000076s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000180s : 1: parallel-infer-symbol 0.00% : 0.000008s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000096s : 1: pipeline_split 0.00% : 0.000096s : 1: pre_auto_parallel 0.00% : 0.000137s : 1: py_interpret_to_execute 0.00% : 0.000147s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000109s : 1: remove_cast_before_assign_add 0.00% : 0.000607s : 1: remove_dup_value 0.84% : 0.127671s : 3: renormalize.infer 0.35% : 0.052644s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001000s : 1: rewriter_after_opt_a 0.01% : 0.001849s : 2: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000164s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000130s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000814s : 1: symbol_engine_optimizer 84.72% : 12.916484s : 1: task_emit 0.01% : 0.000938s : 1: tuple_transform 4.77% : 0.726802s : 1: type_inference 0.01% : 0.001286s : 1: validate TotalTime = 14.4627, [21] [bootstrap]: 0.00122776 [type_inference]: 0.727409 [auto_monad]: 0.00186775 [graph_reusing]: 2.454e-05 [inline]: 0.0436136, [2] [rewriter_before_opt_a]: 0.00145516 [a1a2]: 0.042117, [2] [Cycle 1]: 0.0286551, [11] [expand_dump_flag]: 3.71799e-05 [switch_simplify]: 0.00110886 [loop_unroll]: 0.00067835 [a_1]: 0.0221697 [recompute_prepare]: 0.00016164 [updatestate_depend_eliminate]: 0.00037632 [updatestate_assign_eliminate]: 9.677e-05 [updatestate_loads_eliminate]: 0.00020864 [parameter_eliminate]: 4.09002e-06 [a_2]: 0.00353899 [parallel_inline_pass]: 0.00010329 [Cycle 2]: 0.00562119, [11] [expand_dump_flag]: 1.52993e-06 [switch_simplify]: 9.289e-05 [loop_unroll]: 9.169e-05 [a_1]: 0.00329324 [recompute_prepare]: 9.882e-05 [updatestate_depend_eliminate]: 0.00021958 [updatestate_assign_eliminate]: 6.556e-05 [updatestate_loads_eliminate]: 6.491e-05 [parameter_eliminate]: 3.24997e-06 [a_2]: 0.00150719 [parallel_inline_pass]: 0.00010117 [parallel-infer-symbol]: 0.00017551 [pre_auto_parallel]: 9.514e-05 [insert-virtual-dataset]: 0.00128071 [parallel-infer-symbol-second]: 2.33005e-06 [dataset_repeat_opt]: 7.742e-05 [pipeline_split]: 0.00010036 [optimize]: 0.58842, [52] [py_interpret_to_execute]: 0.00013829 [rewriter_before_opt_a]: 0.00027592 [opt_a]: 0.574075, [3] [Cycle 1]: 0.491785, [46] [expand_dump_flag]: 1.66998e-06 [switch_simplify]: 0.0001094 [loop_unroll]: 9.59199e-05 [a_1]: 0.00332015 [recompute_prepare]: 0.00010234 [updatestate_depend_eliminate]: 0.00010517 [updatestate_assign_eliminate]: 6.282e-05 [updatestate_loads_eliminate]: 6.882e-05 [parameter_eliminate]: 2.53005e-06 [a_2]: 0.00160719 [accelerated_algorithm]: 0.00030837 [shard]: 1.50991e-06 [meta_shard_fg_expand]: 4.985e-05 [shard_inline]: 0.00010627 [auto_parallel]: 8.186e-05 [parallel]: 0.0143407 [flash_sp]: 5.66101e-05 [merge_comm]: 0.00012844 [allreduce_fusion]: 7.49199e-05 [matmul_add_comm_reduction]: 9.795e-05 [allreduce_slice_to_reducescatter]: 4.00003e-07 [virtual_shard_identity]: 0.00012379 [virtual_dataset]: 0.00016691 [get_grad_eliminate_]: 0.00011575 [virtual_output]: 0.00011492 [merge_forward]: 7.847e-05 [cell_reuse_recompute_pass]: 3.05998e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0002436 [before_grad]: 0.0001979 [inplace_validation]: 0.00012588 [parallel_renormalize]: 0.0218669 [update_top_fg]: 7.29924e-07 [cast_eliminate]: 0.00014847 [meta_fg_expand]: 0.262177 [inplace_validation_after_expand]: 0.00152861 [flash_sp_send_recv_attached]: 0.00117623 [receive_attached]: 8.11e-05 [after_resolve]: 0.00198491 [a_after_grad]: 0.00381775 [special_op_eliminate]: 0.00180904 [renormalize]: 0.144014 [add_forward_monad_depend]: 0.00037076 [auto_monad_grad]: 0.00022228 [auto_monad_eliminator]: 0.00185123 [cse]: 0.00420008 [a_3]: 0.0240965 [Cycle 2]: 0.0704641, [46] [expand_dump_flag]: 5.15501e-05 [switch_simplify]: 0.00181949 [loop_unroll]: 0.00148219 [a_1]: 0.0312819 [recompute_prepare]: 0.00017524 [updatestate_depend_eliminate]: 0.00023089 [updatestate_assign_eliminate]: 0.00010123 [updatestate_loads_eliminate]: 0.00016234 [parameter_eliminate]: 3.24007e-06 [a_2]: 0.00428171 [accelerated_algorithm]: 0.00016178 [shard]: 1.92004e-06 [meta_shard_fg_expand]: 8.68801e-05 [shard_inline]: 0.00013849 [auto_parallel]: 0.00011294 [parallel]: 1.012e-05 [flash_sp]: 0.00012417 [merge_comm]: 0.00011028 [allreduce_fusion]: 9.30401e-05 [matmul_add_comm_reduction]: 0.00011357 [allreduce_slice_to_reducescatter]: 4.1991e-07 [virtual_shard_identity]: 0.00014107 [virtual_dataset]: 0.00013569 [get_grad_eliminate_]: 0.0001311 [virtual_output]: 0.00013396 [merge_forward]: 8.924e-05 [cell_reuse_recompute_pass]: 1.89e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024806 [before_grad]: 0.00026437 [inplace_validation]: 8.514e-05 [parallel_renormalize]: 7.99773e-08 [update_top_fg]: 6.00005e-07 [cast_eliminate]: 0.00015538 [meta_fg_expand]: 0.00028903 [inplace_validation_after_expand]: 0.00017778 [flash_sp_send_recv_attached]: 1.83994e-06 [receive_attached]: 1.13004e-06 [after_resolve]: 0.0001578 [a_after_grad]: 0.00022549 [special_op_eliminate]: 0.00013382 [renormalize]: 0.0187339 [add_forward_monad_depend]: 6.37001e-06 [auto_monad_grad]: 2.39001e-06 [auto_monad_eliminator]: 0.00028607 [cse]: 0.00712229 [a_3]: 0.00097606 [Cycle 3]: 0.0118043, [46] [expand_dump_flag]: 2.48e-06 [switch_simplify]: 0.00013224 [loop_unroll]: 0.00012896 [a_1]: 0.00428983 [recompute_prepare]: 0.00013462 [updatestate_depend_eliminate]: 0.00014819 [updatestate_assign_eliminate]: 9.55199e-05 [updatestate_loads_eliminate]: 9.258e-05 [parameter_eliminate]: 4.14997e-06 [a_2]: 0.00204001 [accelerated_algorithm]: 0.00018608 [shard]: 1.92004e-06 [meta_shard_fg_expand]: 5.67801e-05 [shard_inline]: 0.00013412 [auto_parallel]: 0.00011723 [parallel]: 9.74e-06 [flash_sp]: 2.52004e-06 [merge_comm]: 0.00010873 [allreduce_fusion]: 9.488e-05 [matmul_add_comm_reduction]: 0.0001188 [allreduce_slice_to_reducescatter]: 5.80098e-07 [virtual_shard_identity]: 0.00013684 [virtual_dataset]: 0.00013241 [get_grad_eliminate_]: 0.00013321 [virtual_output]: 0.00012885 [merge_forward]: 9.45e-05 [cell_reuse_recompute_pass]: 3.63996e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024718 [before_grad]: 0.00023223 [inplace_validation]: 8.942e-05 [parallel_renormalize]: 1.50059e-07 [update_top_fg]: 5.69969e-07 [cast_eliminate]: 0.00014874 [meta_fg_expand]: 0.00011459 [inplace_validation_after_expand]: 0.00011872 [flash_sp_send_recv_attached]: 1.77999e-06 [receive_attached]: 1.59e-06 [after_resolve]: 0.00014597 [a_after_grad]: 0.00021561 [special_op_eliminate]: 0.00012916 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 2.26998e-06 [auto_monad_grad]: 2.63995e-06 [auto_monad_eliminator]: 0.00017411 [cse]: 0.00040553 [a_3]: 0.0009506 [py_interpret_to_execute_after_opt_a]: 0.00014183 [slice_cell_reuse_recomputed_activation]: 1.40001e-06 [rewriter_after_opt_a]: 0.00094326 [convert_after_rewriter]: 0.00011322 [order_py_execute_after_rewriter]: 8.224e-05 [opt_b]: 0.00392871, [1] [Cycle 1]: 0.00392099, [7] [b_1]: 0.00305385 [b_2]: 0.00013497 [updatestate_depend_eliminate]: 0.0001007 [updatestate_assign_eliminate]: 8.741e-05 [updatestate_loads_eliminate]: 9.027e-05 [renormalize]: 4.00003e-07 [cse]: 0.00039967 [optimize_parallel_all_gather_comm]: 0.0001442 [overlap_param_gather]: 1.51002e-06 [cconv]: 6.83899e-05 [loop_unroll]: 0.00096813 [opt_after_cconv]: 0.00158367, [1] [Cycle 1]: 0.00157644, [7] [c_1]: 0.00077517 [parameter_eliminate]: 2.32994e-06 [updatestate_depend_eliminate]: 0.00013458 [updatestate_assign_eliminate]: 0.0001166 [updatestate_loads_eliminate]: 9.598e-05 [cse]: 0.00039546 [renormalize]: 4.4005e-07 [remove_dup_value]: 0.00061319 [tuple_transform]: 0.00093166, [1] [Cycle 1]: 0.00092429, [2] [d_1]: 0.00090828 [renormalize]: 4.10015e-07 [partial_unused_args_eliminate]: 2.23995e-06 [add_cache_embedding]: 0.00014957 [add_recomputation]: 0.00071065 [cse_after_recomputation]: 0.00030937, [1] [Cycle 1]: 0.00030164, [1] [cse]: 0.00028938 [environ_conv]: 9.083e-05 [swap_dp_allreduce_reducescatter]: 0.00013181 [bias_add_comm_swap]: 2.33995e-06 [label_micro_interleaved_index]: 1.31002e-06 [label_fine_grained_interleaved_index]: 0.00053063 [merge_cast_opt]: 1.16008e-06 [slice_recompute_activation]: 0.00014911 [micro_interleaved_order_control]: 1.30001e-06 [assign_add_opt]: 0.00037728 [ForceFp32Comm]: 1.00001e-06 [remove_cast_before_assign_add]: 0.00010696 [full_micro_interleaved_order_control]: 1.42993e-06 [reorder_send_recv_between_fp_bp]: 1.04995e-06 [comm_op_add_attrs]: 0.00014107 [add_comm_op_reuse_tag]: 0.00014361 [interleave_split_concat_branches]: 6.50063e-07 [interleave_parallel_branches]: 5.79981e-07 [overlap_opt_shard_in_pipeline]: 9.71009e-06 [overlap_opt_shard_grad_in_pipeline]: 2.11003e-06 [control_data_broadcast_order]: 8.10018e-07 [grouped_pairwise_exchange_alltoall]: 8.42009e-06 [offloading_packed_experts]: 1.37999e-06 [overlap_recompute_and_grad_model_parallel]: 1.25007e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.79981e-07 [overlap_recompute_allgather_and_fa_grad]: 5.43699e-05 [overlap_grad_ring_attention]: 0.00014773 [overlap_grad_flash_sp]: 0.00011798 [begin_end_overlap_inline]: 7.69971e-07 [split_matmul_comm_elemetwise]: 1.20001e-06 [split_layernorm_comm]: 1.07998e-06 [handle_group_info]: 3.24997e-06 [symbol_engine_optimizer]: 0.00080417, [1] [Cycle 1]: 0.00079743, [6] [build]: 5.413e-05 [elim_shapecalc]: 0.00014077 [elim_not_effective]: 0.00022378 [opt_reshape]: 0.00013023 [fold_const_symbol]: 0.00020962 [renormalize]: 3.30037e-07 [pipeline_parallel_scheduler]: 2.69001e-06 [auto_monad_reorder]: 0.00029715 [get_jit_bprop_graph]: 6.79982e-07 [rewriter_after_jit_bprop_graph]: 3.30037e-07 [eliminate_special_op_node]: 0.00142051 [distribtued_split]: 0.00035212 [validate]: 0.0002775 [task_emit]: 13.0947 [execute]: 8.20996e-06 Sums bootstrap : 0.001228s : 0.01% type_inference : 0.727409s : 5.03% auto_monad : 0.001868s : 0.01% graph_reusing : 0.000025s : 0.00% inline.rewriter_before_opt_a : 0.001455s : 0.01% inline.a1a2.expand_dump_flag : 0.000039s : 0.00% inline.a1a2.switch_simplify : 0.001202s : 0.01% inline.a1a2.loop_unroll : 0.000770s : 0.01% inline.a1a2.a_1 : 0.025463s : 0.18% inline.a1a2.recompute_prepare : 0.000260s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000596s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000162s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000274s : 0.00% inline.a1a2.parameter_eliminate : 0.000007s : 0.00% inline.a1a2.a_2 : 0.005046s : 0.03% inline.a1a2.parallel_inline_pass : 0.000204s : 0.00% parallel-infer-symbol : 0.000176s : 0.00% pre_auto_parallel : 0.000095s : 0.00% insert-virtual-dataset : 0.001281s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000077s : 0.00% pipeline_split : 0.000100s : 0.00% optimize.py_interpret_to_execute : 0.000138s : 0.00% optimize.rewriter_before_opt_a : 0.000276s : 0.00% optimize.opt_a.expand_dump_flag : 0.000056s : 0.00% optimize.opt_a.switch_simplify : 0.002061s : 0.01% optimize.opt_a.loop_unroll : 0.001707s : 0.01% optimize.opt_a.a_1 : 0.038892s : 0.27% optimize.opt_a.recompute_prepare : 0.000412s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000484s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000260s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000324s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.007929s : 0.05% optimize.opt_a.accelerated_algorithm : 0.000656s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000194s : 0.00% optimize.opt_a.shard_inline : 0.000379s : 0.00% optimize.opt_a.auto_parallel : 0.000312s : 0.00% optimize.opt_a.parallel : 0.014361s : 0.10% optimize.opt_a.flash_sp : 0.000183s : 0.00% optimize.opt_a.merge_comm : 0.000347s : 0.00% optimize.opt_a.allreduce_fusion : 0.000263s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000330s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000402s : 0.00% optimize.opt_a.virtual_dataset : 0.000435s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000380s : 0.00% optimize.opt_a.virtual_output : 0.000378s : 0.00% optimize.opt_a.merge_forward : 0.000262s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000739s : 0.01% optimize.opt_a.before_grad : 0.000694s : 0.00% optimize.opt_a.inplace_validation : 0.000300s : 0.00% optimize.opt_a.parallel_renormalize : 0.021867s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000453s : 0.00% optimize.opt_a.meta_fg_expand : 0.262580s : 1.82% optimize.opt_a.inplace_validation_after_expand : 0.001825s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001180s : 0.01% optimize.opt_a.receive_attached : 0.000084s : 0.00% optimize.opt_a.after_resolve : 0.002289s : 0.02% optimize.opt_a.a_after_grad : 0.004259s : 0.03% optimize.opt_a.special_op_eliminate : 0.002072s : 0.01% optimize.opt_a.renormalize : 0.162748s : 1.13% optimize.opt_a.add_forward_monad_depend : 0.000379s : 0.00% optimize.opt_a.auto_monad_grad : 0.000227s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002311s : 0.02% optimize.opt_a.cse : 0.011728s : 0.08% optimize.opt_a.a_3 : 0.026023s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000142s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000943s : 0.01% optimize.convert_after_rewriter : 0.000113s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.003054s : 0.02% optimize.opt_b.b_2 : 0.000135s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000101s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000087s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000400s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000144s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000068s : 0.00% optimize.loop_unroll : 0.000968s : 0.01% optimize.opt_after_cconv.c_1 : 0.000775s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000135s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000117s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000096s : 0.00% optimize.opt_after_cconv.cse : 0.000395s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000613s : 0.00% optimize.tuple_transform.d_1 : 0.000908s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000150s : 0.00% optimize.add_recomputation : 0.000711s : 0.00% optimize.cse_after_recomputation.cse : 0.000289s : 0.00% optimize.environ_conv : 0.000091s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000132s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000531s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000149s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000377s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000107s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000141s : 0.00% optimize.add_comm_op_reuse_tag : 0.000144s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000010s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000054s : 0.00% optimize.overlap_grad_ring_attention : 0.000148s : 0.00% optimize.overlap_grad_flash_sp : 0.000118s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000054s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000141s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000224s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000210s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000297s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001421s : 0.01% distribtued_split : 0.000352s : 0.00% validate : 0.000278s : 0.00% task_emit : 13.094730s : 90.61% execute : 0.000008s : 0.00% Time group info: ------[substitution.] 0.049243 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.11% : 0.000052s : 7: substitution.addn_zero_filter 0.03% : 0.000014s : 7: substitution.adjust_all_reduce_mul_add 0.62% : 0.000305s : 71: substitution.arithmetic_simplify 0.11% : 0.000053s : 10: substitution.cast_eliminate 0.11% : 0.000054s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000025s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000017s : 12: substitution.environ_get_depend_swap 0.06% : 0.000028s : 27: substitution.environ_get_eliminate 0.07% : 0.000034s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000020s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.03% : 0.000013s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000029s : 107: substitution.fold_const_symbol 64.42% : 0.031721s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000082s : 126: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 24.02% : 0.011830s : 331: substitution.inline 1.36% : 0.000667s : 112: substitution.inline_without_move 0.25% : 0.000123s : 309: substitution.j_node_and_user_rematch 0.36% : 0.000177s : 40: substitution.less_batch_normalization 0.09% : 0.000046s : 90: substitution.load_eliminater 0.10% : 0.000049s : 10: substitution.merge_addn 0.24% : 0.000116s : 101: substitution.minmaximum_grad 0.00% : 0.000001s : 4: substitution.opt_reshape 0.03% : 0.000016s : 1: substitution.partial_defer_inline 0.14% : 0.000067s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000032s : 15: substitution.reduce_eliminate 0.32% : 0.000158s : 309: substitution.remove_not_recompute_node 1.99% : 0.000981s : 508: substitution.replace_applicator 0.22% : 0.000108s : 251: substitution.replace_old_param 0.08% : 0.000038s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000010s : 4: substitution.specialize_transform 0.03% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000080s : 34: substitution.switch_simplify 0.06% : 0.000028s : 11: substitution.tile_eliminate 0.52% : 0.000255s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000135s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000210s : 107: substitution.tuple_list_get_item_depend_reorder 1.58% : 0.000777s : 308: substitution.tuple_list_get_item_eliminator 0.38% : 0.000187s : 107: substitution.tuple_list_get_set_item_eliminator 0.40% : 0.000197s : 210: substitution.updatestate_pure_node_eliminater 0.72% : 0.000356s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.726950 2 96.45% : 0.701115s : 1: type_inference.infer 3.55% : 0.025834s : 1: type_inference.specialize ------[replace.] 0.009784 775 0.42% : 0.000041s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.56% : 0.000054s : 7: replace.depend_value_elim 0.44% : 0.000043s : 3: replace.environ_get_set_eliminate 28.37% : 0.002776s : 183: replace.getattr_setattr_resolve 30.12% : 0.002947s : 310: replace.inline 0.22% : 0.000021s : 1: replace.merge_addn 1.18% : 0.000116s : 7: replace.partial_eliminate 4.09% : 0.000400s : 25: replace.replace_applicator 4.00% : 0.000392s : 34: replace.switch_simplify 0.53% : 0.000052s : 6: replace.tuple_list_get_item_depend_reorder 29.62% : 0.002898s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.21% : 0.000021s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041840 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000012s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 70.55% : 0.029519s : 183: match.getattr_setattr_resolve 27.72% : 0.011597s : 310: match.inline 0.05% : 0.000022s : 1: match.merge_addn 0.09% : 0.000039s : 7: match.partial_eliminate 0.24% : 0.000099s : 25: match.replace_applicator 0.14% : 0.000059s : 34: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 0.98% : 0.000409s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020616131318 0.76% : 0.000157s : 1198: predicate.accumulaten_eliminater 0.29% : 0.000059s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000113s : 835: predicate.addn_check_dump 0.82% : 0.000168s : 1198: predicate.addn_zero_filter 0.75% : 0.000155s : 1198: predicate.adjust_all_reduce_mul_add 1.75% : 0.000361s : 2034: predicate.arithmetic_simplify 1.10% : 0.000228s : 1586: predicate.cast_eliminate 3.11% : 0.000641s : 3484: predicate.check_bprop_eliminate 0.57% : 0.000117s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.15% : 0.000238s : 1399: predicate.convert_tensor_eliminate 0.58% : 0.000119s : 838: predicate.depend_value_elim 0.86% : 0.000178s : 1202: predicate.dict_get_item_const_eliminator 0.94% : 0.000194s : 1202: predicate.dict_get_item_eliminator 0.97% : 0.000200s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000010s : 126: predicate.elim_not_effective 0.11% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.84% : 0.000173s : 1334: predicate.environ_add_const_eliminate 0.85% : 0.000175s : 1337: predicate.environ_get_add_eliminate 0.85% : 0.000175s : 1334: predicate.environ_get_depend_swap 1.45% : 0.000299s : 2172: predicate.environ_get_eliminate 0.83% : 0.000172s : 1337: predicate.environ_get_set_eliminate 1.16% : 0.000239s : 1717: predicate.exchange_switch_depend_value 1.50% : 0.000310s : 1717: predicate.float_depend_g_call 0.56% : 0.000116s : 835: predicate.float_environ_get_switch 0.65% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.29% : 0.000059s : 395: predicate.get_grad_eliminate 2.42% : 0.000499s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.56% : 0.000116s : 835: predicate.incorporate_call 0.54% : 0.000112s : 835: predicate.incorporate_call_switch 3.98% : 0.000820s : 4602: predicate.inline 2.28% : 0.000470s : 2203: predicate.inline_without_move 0.15% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.47% : 0.000097s : 388: predicate.less_batch_normalization 1.11% : 0.000229s : 1660: predicate.list_to_tuple_eliminator_ 1.95% : 0.000402s : 2874: predicate.load_eliminater 0.20% : 0.000042s : 135: predicate.loop_unroll_after_grad 2.33% : 0.000481s : 2640: predicate.loop_unroll_before_grad 0.98% : 0.000201s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000116s : 837: predicate.merge_addn 3.09% : 0.000638s : 3380: predicate.micro_step_allgather_replace 2.99% : 0.000617s : 3380: predicate.mini_step_allgather_replace 0.86% : 0.000177s : 1199: predicate.minmaximum_grad 0.18% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.11% : 0.000434s : 1717: predicate.partial_defer_inline 1.13% : 0.000233s : 1541: predicate.partial_eliminate 0.79% : 0.000162s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000116s : 824: predicate.reduce_all_const_elim 0.95% : 0.000196s : 1199: predicate.reduce_eliminate 0.15% : 0.000030s : 395: predicate.remove_not_recompute_node 1.96% : 0.000405s : 4829: predicate.replace_applicator 0.94% : 0.000194s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.84% : 0.000174s : 1199: predicate.reshape_eliminate 3.02% : 0.000622s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000022s : 135: predicate.row_tensor_eliminate 3.19% : 0.000658s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000060s : 395: predicate.shard_identity_eliminate 2.08% : 0.000428s : 2338: predicate.special_op_eliminate 0.64% : 0.000131s : 837: predicate.specialize_transform 3.33% : 0.000687s : 3380: predicate.split_environ_get_set_with_tuple_value 1.57% : 0.000325s : 2203: predicate.stack_unstack_eliminate 1.85% : 0.000381s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.33% : 0.000274s : 1717: predicate.switch_defer_inline 4.33% : 0.000893s : 5201: predicate.switch_layer_defer_inline 4.36% : 0.000900s : 5262: predicate.switch_simplify 0.77% : 0.000159s : 1199: predicate.tile_eliminate 0.75% : 0.000155s : 1199: predicate.transpose_eliminate 1.06% : 0.000219s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.08% : 0.000223s : 1469: predicate.tuple_list_get_item_const_eliminator 0.96% : 0.000199s : 1469: predicate.tuple_list_get_item_depend_reorder 1.92% : 0.000396s : 2495: predicate.tuple_list_get_item_eliminator 1.04% : 0.000214s : 1469: predicate.tuple_list_get_set_item_eliminator 1.75% : 0.000360s : 2304: predicate.tuple_list_set_item_eliminator 1.08% : 0.000223s : 1660: predicate.tuple_to_list_eliminator_ 1.88% : 0.000388s : 2874: predicate.updatestate_pure_node_eliminater 2.51% : 0.000517s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.29% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000057s : 395: predicate.virtual_output_eliminate 0.10% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.056979 747 67.74% : 0.038600s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.50% : 0.001427s : 22: func_graph_cloner_run.FuncGraphClonerNode 29.75% : 0.016952s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.445858 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.042122s : 1: a1a2 0.00% : 0.000157s : 1: add_cache_embedding 0.00% : 0.000150s : 1: add_comm_op_reuse_tag 0.00% : 0.000722s : 1: add_recomputation 0.00% : 0.000386s : 1: assign_add_opt 0.01% : 0.001890s : 1: auto_monad 0.00% : 0.000310s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001269s : 1: bootstrap 0.00% : 0.000075s : 1: cconv 0.00% : 0.000148s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000120s : 1: convert_after_rewriter 0.00% : 0.000314s : 1: cse_after_recomputation 0.00% : 0.000086s : 1: dataset_repeat_opt 0.00% : 0.000365s : 1: distribtued_split 0.01% : 0.001435s : 1: eliminate_special_op_node 0.00% : 0.000099s : 1: environ_conv 0.00% : 0.000018s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000033s : 1: graph_reusing 0.00% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000006s : 1: handle_group_info 0.28% : 0.043627s : 1: inline 0.01% : 0.001301s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000540s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000979s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.21% : 0.032868s : 61: opt.transform.a1a2 0.00% : 0.000176s : 1: opt.transform.loop_unroll_optimizer 0.58% : 0.089917s : 148: opt.transform.opt_a 0.01% : 0.000773s : 1: opt.transform.opt_after_cconv 0.02% : 0.003157s : 27: opt.transform.opt_b 0.24% : 0.037358s : 16: opt.transform.opt_resolve 0.01% : 0.000906s : 1: opt.transform.opt_trans_graph 0.01% : 0.000851s : 6: opt.transform.special_op_eliminate 0.00% : 0.000698s : 4: opt.transform.symbol_engine_opt 3.72% : 0.574081s : 1: opt_a 0.01% : 0.001590s : 1: opt_after_cconv 0.03% : 0.003933s : 1: opt_b 3.81% : 0.588431s : 1: optimize 0.00% : 0.000151s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000123s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000154s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000014s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000060s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000186s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000111s : 1: pipeline_split 0.00% : 0.000104s : 1: pre_auto_parallel 0.00% : 0.000146s : 1: py_interpret_to_execute 0.00% : 0.000151s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000113s : 1: remove_cast_before_assign_add 0.00% : 0.000625s : 1: remove_dup_value 0.84% : 0.130306s : 3: renormalize.infer 0.35% : 0.054266s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000954s : 1: rewriter_after_opt_a 0.01% : 0.001753s : 2: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000156s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000139s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000808s : 1: symbol_engine_optimizer 84.78% : 13.094765s : 1: task_emit 0.01% : 0.000936s : 1: tuple_transform 4.71% : 0.727442s : 1: type_inference 0.01% : 0.001285s : 1: validate TotalTime = 14.3639, [21] [bootstrap]: 0.00155441 [type_inference]: 0.747248 [auto_monad]: 0.0019822 [graph_reusing]: 2.691e-05 [inline]: 0.0429994, [2] [rewriter_before_opt_a]: 0.00155831 [a1a2]: 0.041365, [2] [Cycle 1]: 0.0283012, [11] [expand_dump_flag]: 3.61e-05 [switch_simplify]: 0.00107839 [loop_unroll]: 0.00067601 [a_1]: 0.0221169 [recompute_prepare]: 0.00016033 [updatestate_depend_eliminate]: 0.00036372 [updatestate_assign_eliminate]: 0.00012044 [updatestate_loads_eliminate]: 0.0002087 [parameter_eliminate]: 7.40995e-06 [a_2]: 0.00322217 [parallel_inline_pass]: 0.00010148 [Cycle 2]: 0.00527207, [11] [expand_dump_flag]: 2.10991e-06 [switch_simplify]: 9.351e-05 [loop_unroll]: 9.17399e-05 [a_1]: 0.00313306 [recompute_prepare]: 9.857e-05 [updatestate_depend_eliminate]: 7.236e-05 [updatestate_assign_eliminate]: 6.004e-05 [updatestate_loads_eliminate]: 6.28199e-05 [parameter_eliminate]: 3.60003e-06 [a_2]: 0.00148522 [parallel_inline_pass]: 9.952e-05 [parallel-infer-symbol]: 0.00018484 [pre_auto_parallel]: 0.00010974 [insert-virtual-dataset]: 0.00137077 [parallel-infer-symbol-second]: 2.46998e-06 [dataset_repeat_opt]: 0.00012418 [pipeline_split]: 9.97101e-05 [optimize]: 0.590364, [52] [py_interpret_to_execute]: 0.00013395 [rewriter_before_opt_a]: 0.00028192 [opt_a]: 0.57558, [3] [Cycle 1]: 0.495885, [46] [expand_dump_flag]: 1.94996e-06 [switch_simplify]: 0.00011198 [loop_unroll]: 9.665e-05 [a_1]: 0.0033163 [recompute_prepare]: 0.00010248 [updatestate_depend_eliminate]: 0.0001022 [updatestate_assign_eliminate]: 6.576e-05 [updatestate_loads_eliminate]: 6.74999e-05 [parameter_eliminate]: 3.43996e-06 [a_2]: 0.00153813 [accelerated_algorithm]: 0.00023443 [shard]: 2.72994e-06 [meta_shard_fg_expand]: 4.95e-05 [shard_inline]: 0.00010612 [auto_parallel]: 7.37399e-05 [parallel]: 0.0158882 [flash_sp]: 5.73901e-05 [merge_comm]: 0.00013099 [allreduce_fusion]: 7.513e-05 [matmul_add_comm_reduction]: 9.91899e-05 [allreduce_slice_to_reducescatter]: 6.19912e-07 [virtual_shard_identity]: 0.00012357 [virtual_dataset]: 0.00016505 [get_grad_eliminate_]: 0.00015095 [virtual_output]: 0.00011229 [merge_forward]: 7.899e-05 [cell_reuse_recompute_pass]: 2.79001e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020771 [before_grad]: 0.00020189 [inplace_validation]: 0.00012922 [parallel_renormalize]: 0.0220561 [update_top_fg]: 8.89995e-07 [cast_eliminate]: 0.00014821 [meta_fg_expand]: 0.261442 [inplace_validation_after_expand]: 0.00150452 [flash_sp_send_recv_attached]: 0.00117372 [receive_attached]: 9.209e-05 [after_resolve]: 0.00194864 [a_after_grad]: 0.00382703 [special_op_eliminate]: 0.00181568 [renormalize]: 0.146703 [add_forward_monad_depend]: 0.00036495 [auto_monad_grad]: 0.00021495 [auto_monad_eliminator]: 0.00184168 [cse]: 0.00415127 [a_3]: 0.02474 [Cycle 2]: 0.0678889, [46] [expand_dump_flag]: 5.076e-05 [switch_simplify]: 0.00181127 [loop_unroll]: 0.00152303 [a_1]: 0.0307353 [recompute_prepare]: 0.00017119 [updatestate_depend_eliminate]: 0.00022408 [updatestate_assign_eliminate]: 0.00010224 [updatestate_loads_eliminate]: 0.00017916 [parameter_eliminate]: 4.18001e-06 [a_2]: 0.00429477 [accelerated_algorithm]: 0.00016237 [shard]: 2.51003e-06 [meta_shard_fg_expand]: 7.189e-05 [shard_inline]: 0.00013934 [auto_parallel]: 0.00011582 [parallel]: 1.462e-05 [flash_sp]: 0.00011865 [merge_comm]: 0.00010835 [allreduce_fusion]: 9.266e-05 [matmul_add_comm_reduction]: 0.00011117 [allreduce_slice_to_reducescatter]: 4.80097e-07 [virtual_shard_identity]: 0.00014121 [virtual_dataset]: 0.00013546 [get_grad_eliminate_]: 0.00013161 [virtual_output]: 0.00013418 [merge_forward]: 9.01e-05 [cell_reuse_recompute_pass]: 2.35008e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024847 [before_grad]: 0.00023646 [inplace_validation]: 8.31099e-05 [parallel_renormalize]: 7.99773e-08 [update_top_fg]: 7.29924e-07 [cast_eliminate]: 0.00015212 [meta_fg_expand]: 0.00027266 [inplace_validation_after_expand]: 0.00017715 [flash_sp_send_recv_attached]: 2.07999e-06 [receive_attached]: 1.75997e-06 [after_resolve]: 0.0001581 [a_after_grad]: 0.00022647 [special_op_eliminate]: 0.00013474 [renormalize]: 0.0173455 [add_forward_monad_depend]: 5.15999e-06 [auto_monad_grad]: 2.52004e-06 [auto_monad_eliminator]: 0.00028378 [cse]: 0.00651902 [a_3]: 0.00095047 [Cycle 3]: 0.0117825, [46] [expand_dump_flag]: 2.62004e-06 [switch_simplify]: 0.00013 [loop_unroll]: 0.00012638 [a_1]: 0.00422828 [recompute_prepare]: 0.00013381 [updatestate_depend_eliminate]: 0.00015004 [updatestate_assign_eliminate]: 9.47501e-05 [updatestate_loads_eliminate]: 9.16501e-05 [parameter_eliminate]: 2.94007e-06 [a_2]: 0.00212488 [accelerated_algorithm]: 0.0001568 [shard]: 1.67999e-06 [meta_shard_fg_expand]: 5.061e-05 [shard_inline]: 0.00013246 [auto_parallel]: 0.00011217 [parallel]: 1.014e-05 [flash_sp]: 2.26009e-06 [merge_comm]: 0.00010601 [allreduce_fusion]: 9.58199e-05 [matmul_add_comm_reduction]: 0.00011911 [allreduce_slice_to_reducescatter]: 4.50062e-07 [virtual_shard_identity]: 0.0001378 [virtual_dataset]: 0.00013082 [get_grad_eliminate_]: 0.00012616 [virtual_output]: 0.00012886 [merge_forward]: 9.146e-05 [cell_reuse_recompute_pass]: 2.50991e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024662 [before_grad]: 0.00023106 [inplace_validation]: 8.92101e-05 [parallel_renormalize]: 7.99773e-08 [update_top_fg]: 4.00003e-07 [cast_eliminate]: 0.00014731 [meta_fg_expand]: 0.00011456 [inplace_validation_after_expand]: 0.00011426 [flash_sp_send_recv_attached]: 1.69e-06 [receive_attached]: 1.43005e-06 [after_resolve]: 0.0001454 [a_after_grad]: 0.00021586 [special_op_eliminate]: 0.00012939 [renormalize]: 6.00703e-08 [add_forward_monad_depend]: 2.21992e-06 [auto_monad_grad]: 1.63005e-06 [auto_monad_eliminator]: 0.00017004 [cse]: 0.00040604 [a_3]: 0.00097518 [py_interpret_to_execute_after_opt_a]: 0.0001399 [slice_cell_reuse_recomputed_activation]: 2.78e-06 [rewriter_after_opt_a]: 0.00101729 [convert_after_rewriter]: 0.00011852 [order_py_execute_after_rewriter]: 8.781e-05 [opt_b]: 0.00395248, [1] [Cycle 1]: 0.00394446, [7] [b_1]: 0.00307077 [b_2]: 0.00013619 [updatestate_depend_eliminate]: 9.778e-05 [updatestate_assign_eliminate]: 8.825e-05 [updatestate_loads_eliminate]: 9.065e-05 [renormalize]: 5.50062e-07 [cse]: 0.00040336 [optimize_parallel_all_gather_comm]: 0.00013714 [overlap_param_gather]: 1.37999e-06 [cconv]: 7.04001e-05 [loop_unroll]: 0.00102556 [opt_after_cconv]: 0.00161932, [1] [Cycle 1]: 0.00161167, [7] [c_1]: 0.00083211 [parameter_eliminate]: 2.75997e-06 [updatestate_depend_eliminate]: 0.00013181 [updatestate_assign_eliminate]: 9.46401e-05 [updatestate_loads_eliminate]: 9.223e-05 [cse]: 0.00039841 [renormalize]: 5.40051e-07 [remove_dup_value]: 0.00058 [tuple_transform]: 0.00093281, [1] [Cycle 1]: 0.00092505, [2] [d_1]: 0.00090812 [renormalize]: 4.60073e-07 [partial_unused_args_eliminate]: 2.93995e-06 [add_cache_embedding]: 0.00015266 [add_recomputation]: 0.0006916 [cse_after_recomputation]: 0.00031244, [1] [Cycle 1]: 0.00030374, [1] [cse]: 0.00028962 [environ_conv]: 0.00014289 [swap_dp_allreduce_reducescatter]: 0.00013024 [bias_add_comm_swap]: 2.91993e-06 [label_micro_interleaved_index]: 1.74006e-06 [label_fine_grained_interleaved_index]: 0.00051659 [merge_cast_opt]: 1.69e-06 [slice_recompute_activation]: 0.00014635 [micro_interleaved_order_control]: 2.00002e-06 [assign_add_opt]: 0.00038988 [ForceFp32Comm]: 1.36998e-06 [remove_cast_before_assign_add]: 0.00010829 [full_micro_interleaved_order_control]: 2.84996e-06 [reorder_send_recv_between_fp_bp]: 1.71002e-06 [comm_op_add_attrs]: 0.00014933 [add_comm_op_reuse_tag]: 0.00015186 [interleave_split_concat_branches]: 9.59961e-07 [interleave_parallel_branches]: 1.02003e-06 [overlap_opt_shard_in_pipeline]: 3.296e-05 [overlap_opt_shard_grad_in_pipeline]: 3.52995e-06 [control_data_broadcast_order]: 1.39e-06 [grouped_pairwise_exchange_alltoall]: 1.13799e-05 [offloading_packed_experts]: 2.30002e-06 [overlap_recompute_and_grad_model_parallel]: 2.22004e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89994e-07 [overlap_recompute_allgather_and_fa_grad]: 0.00010536 [overlap_grad_ring_attention]: 0.00016111 [overlap_grad_flash_sp]: 0.00012128 [begin_end_overlap_inline]: 7.79983e-07 [split_matmul_comm_elemetwise]: 1.86998e-06 [split_layernorm_comm]: 2.26009e-06 [handle_group_info]: 5.77001e-06 [symbol_engine_optimizer]: 0.00079756, [1] [Cycle 1]: 0.00079047, [6] [build]: 5.044e-05 [elim_shapecalc]: 0.00014251 [elim_not_effective]: 0.00021819 [opt_reshape]: 0.0001302 [fold_const_symbol]: 0.00020988 [renormalize]: 5.39934e-07 [pipeline_parallel_scheduler]: 3.71004e-06 [auto_monad_reorder]: 0.00034651 [get_jit_bprop_graph]: 5.49946e-07 [rewriter_after_jit_bprop_graph]: 3.89991e-07 [eliminate_special_op_node]: 0.0014256 [distribtued_split]: 0.00038929 [validate]: 0.00029021 [task_emit]: 12.9739 [execute]: 1.25499e-05 Sums bootstrap : 0.001554s : 0.01% type_inference : 0.747248s : 5.21% auto_monad : 0.001982s : 0.01% graph_reusing : 0.000027s : 0.00% inline.rewriter_before_opt_a : 0.001558s : 0.01% inline.a1a2.expand_dump_flag : 0.000038s : 0.00% inline.a1a2.switch_simplify : 0.001172s : 0.01% inline.a1a2.loop_unroll : 0.000768s : 0.01% inline.a1a2.a_1 : 0.025250s : 0.18% inline.a1a2.recompute_prepare : 0.000259s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000436s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000180s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000272s : 0.00% inline.a1a2.parameter_eliminate : 0.000011s : 0.00% inline.a1a2.a_2 : 0.004707s : 0.03% inline.a1a2.parallel_inline_pass : 0.000201s : 0.00% parallel-infer-symbol : 0.000185s : 0.00% pre_auto_parallel : 0.000110s : 0.00% insert-virtual-dataset : 0.001371s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000124s : 0.00% pipeline_split : 0.000100s : 0.00% optimize.py_interpret_to_execute : 0.000134s : 0.00% optimize.rewriter_before_opt_a : 0.000282s : 0.00% optimize.opt_a.expand_dump_flag : 0.000055s : 0.00% optimize.opt_a.switch_simplify : 0.002053s : 0.01% optimize.opt_a.loop_unroll : 0.001746s : 0.01% optimize.opt_a.a_1 : 0.038280s : 0.27% optimize.opt_a.recompute_prepare : 0.000407s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000476s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000263s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000338s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.007958s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000554s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000172s : 0.00% optimize.opt_a.shard_inline : 0.000378s : 0.00% optimize.opt_a.auto_parallel : 0.000302s : 0.00% optimize.opt_a.parallel : 0.015913s : 0.11% optimize.opt_a.flash_sp : 0.000178s : 0.00% optimize.opt_a.merge_comm : 0.000345s : 0.00% optimize.opt_a.allreduce_fusion : 0.000264s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000329s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000403s : 0.00% optimize.opt_a.virtual_dataset : 0.000431s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000409s : 0.00% optimize.opt_a.virtual_output : 0.000375s : 0.00% optimize.opt_a.merge_forward : 0.000261s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000703s : 0.00% optimize.opt_a.before_grad : 0.000669s : 0.00% optimize.opt_a.inplace_validation : 0.000302s : 0.00% optimize.opt_a.parallel_renormalize : 0.022056s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000448s : 0.00% optimize.opt_a.meta_fg_expand : 0.261829s : 1.82% optimize.opt_a.inplace_validation_after_expand : 0.001796s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001177s : 0.01% optimize.opt_a.receive_attached : 0.000095s : 0.00% optimize.opt_a.after_resolve : 0.002252s : 0.02% optimize.opt_a.a_after_grad : 0.004269s : 0.03% optimize.opt_a.special_op_eliminate : 0.002080s : 0.01% optimize.opt_a.renormalize : 0.164049s : 1.14% optimize.opt_a.add_forward_monad_depend : 0.000372s : 0.00% optimize.opt_a.auto_monad_grad : 0.000219s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002295s : 0.02% optimize.opt_a.cse : 0.011076s : 0.08% optimize.opt_a.a_3 : 0.026666s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000140s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001017s : 0.01% optimize.convert_after_rewriter : 0.000119s : 0.00% optimize.order_py_execute_after_rewriter : 0.000088s : 0.00% optimize.opt_b.b_1 : 0.003071s : 0.02% optimize.opt_b.b_2 : 0.000136s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000098s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000088s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000403s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000137s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000070s : 0.00% optimize.loop_unroll : 0.001026s : 0.01% optimize.opt_after_cconv.c_1 : 0.000832s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000132s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000095s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.cse : 0.000398s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000580s : 0.00% optimize.tuple_transform.d_1 : 0.000908s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000153s : 0.00% optimize.add_recomputation : 0.000692s : 0.00% optimize.cse_after_recomputation.cse : 0.000290s : 0.00% optimize.environ_conv : 0.000143s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000130s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000517s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000146s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000390s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000108s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000149s : 0.00% optimize.add_comm_op_reuse_tag : 0.000152s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000033s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000105s : 0.00% optimize.overlap_grad_ring_attention : 0.000161s : 0.00% optimize.overlap_grad_flash_sp : 0.000121s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000050s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000143s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000218s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000210s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000347s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001426s : 0.01% distribtued_split : 0.000389s : 0.00% validate : 0.000290s : 0.00% task_emit : 12.973900s : 90.40% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.048155 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.05% : 0.000022s : 9: substitution.addn_check_dump 0.13% : 0.000063s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.66% : 0.000316s : 71: substitution.arithmetic_simplify 0.11% : 0.000054s : 10: substitution.cast_eliminate 0.12% : 0.000056s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000025s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.06% : 0.000030s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000020s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.02% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 64.01% : 0.030826s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000083s : 126: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 24.12% : 0.011613s : 331: substitution.inline 1.37% : 0.000660s : 112: substitution.inline_without_move 0.26% : 0.000123s : 309: substitution.j_node_and_user_rematch 0.27% : 0.000129s : 40: substitution.less_batch_normalization 0.09% : 0.000046s : 90: substitution.load_eliminater 0.11% : 0.000052s : 10: substitution.merge_addn 0.23% : 0.000111s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.07% : 0.000036s : 1: substitution.partial_defer_inline 0.15% : 0.000074s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000032s : 15: substitution.reduce_eliminate 0.33% : 0.000157s : 309: substitution.remove_not_recompute_node 2.05% : 0.000987s : 508: substitution.replace_applicator 0.22% : 0.000107s : 251: substitution.replace_old_param 0.08% : 0.000038s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000010s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000083s : 34: substitution.switch_simplify 0.06% : 0.000029s : 11: substitution.tile_eliminate 0.53% : 0.000253s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000132s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000209s : 107: substitution.tuple_list_get_item_depend_reorder 1.60% : 0.000773s : 308: substitution.tuple_list_get_item_eliminator 0.38% : 0.000182s : 107: substitution.tuple_list_get_set_item_eliminator 0.51% : 0.000244s : 210: substitution.updatestate_pure_node_eliminater 0.69% : 0.000334s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.746780 2 96.52% : 0.720767s : 1: type_inference.infer 3.48% : 0.026013s : 1: type_inference.specialize ------[replace.] 0.009570 775 0.43% : 0.000041s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000006s : 1: replace.arithmetic_simplify 0.47% : 0.000045s : 7: replace.depend_value_elim 0.41% : 0.000040s : 3: replace.environ_get_set_eliminate 29.77% : 0.002849s : 183: replace.getattr_setattr_resolve 30.56% : 0.002925s : 310: replace.inline 0.23% : 0.000022s : 1: replace.merge_addn 1.21% : 0.000116s : 7: replace.partial_eliminate 4.06% : 0.000388s : 25: replace.replace_applicator 3.86% : 0.000370s : 34: replace.switch_simplify 0.56% : 0.000054s : 6: replace.tuple_list_get_item_depend_reorder 27.99% : 0.002679s : 191: replace.tuple_list_get_item_eliminator 0.17% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.21% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.040870 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000013s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 70.28% : 0.028722s : 183: match.getattr_setattr_resolve 27.90% : 0.011403s : 310: match.inline 0.06% : 0.000025s : 1: match.merge_addn 0.10% : 0.000041s : 7: match.partial_eliminate 0.24% : 0.000099s : 25: match.replace_applicator 0.16% : 0.000064s : 34: match.switch_simplify 0.08% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 1.02% : 0.000418s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.021085131318 0.74% : 0.000156s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000056s : 254: predicate.ad_related_special_op_eliminate 0.54% : 0.000114s : 835: predicate.addn_check_dump 0.75% : 0.000158s : 1198: predicate.addn_zero_filter 0.93% : 0.000196s : 1198: predicate.adjust_all_reduce_mul_add 1.72% : 0.000362s : 2034: predicate.arithmetic_simplify 1.10% : 0.000232s : 1586: predicate.cast_eliminate 3.23% : 0.000682s : 3484: predicate.check_bprop_eliminate 0.54% : 0.000114s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.12% : 0.000236s : 1399: predicate.convert_tensor_eliminate 0.91% : 0.000191s : 838: predicate.depend_value_elim 0.82% : 0.000173s : 1202: predicate.dict_get_item_const_eliminator 0.82% : 0.000173s : 1202: predicate.dict_get_item_eliminator 1.03% : 0.000218s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000021s : 126: predicate.elim_shapecalc_of_broadcastargs 0.85% : 0.000179s : 1334: predicate.environ_add_const_eliminate 0.84% : 0.000177s : 1337: predicate.environ_get_add_eliminate 0.90% : 0.000191s : 1334: predicate.environ_get_depend_swap 1.45% : 0.000306s : 2172: predicate.environ_get_eliminate 0.89% : 0.000188s : 1337: predicate.environ_get_set_eliminate 1.10% : 0.000232s : 1717: predicate.exchange_switch_depend_value 1.39% : 0.000293s : 1717: predicate.float_depend_g_call 0.54% : 0.000114s : 835: predicate.float_environ_get_switch 0.63% : 0.000133s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000059s : 395: predicate.get_grad_eliminate 2.27% : 0.000479s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.54% : 0.000113s : 835: predicate.incorporate_call 0.53% : 0.000111s : 835: predicate.incorporate_call_switch 3.83% : 0.000808s : 4602: predicate.inline 2.25% : 0.000474s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.32% : 0.000067s : 388: predicate.less_batch_normalization 1.08% : 0.000228s : 1660: predicate.list_to_tuple_eliminator_ 1.84% : 0.000388s : 2874: predicate.load_eliminater 0.18% : 0.000039s : 135: predicate.loop_unroll_after_grad 2.37% : 0.000499s : 2640: predicate.loop_unroll_before_grad 1.22% : 0.000256s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000117s : 837: predicate.merge_addn 3.08% : 0.000650s : 3380: predicate.micro_step_allgather_replace 3.09% : 0.000651s : 3380: predicate.mini_step_allgather_replace 0.75% : 0.000159s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000022s : 135: predicate.parallel_virtual_node 2.01% : 0.000423s : 1717: predicate.partial_defer_inline 1.08% : 0.000228s : 1541: predicate.partial_eliminate 0.77% : 0.000162s : 1198: predicate.print_const_string_wrapper 0.55% : 0.000116s : 824: predicate.reduce_all_const_elim 0.98% : 0.000206s : 1199: predicate.reduce_eliminate 0.14% : 0.000029s : 395: predicate.remove_not_recompute_node 1.90% : 0.000401s : 4829: predicate.replace_applicator 0.79% : 0.000166s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.97% : 0.000205s : 1199: predicate.reshape_eliminate 3.18% : 0.000671s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.30% : 0.000696s : 3484: predicate.same_eliminate 0.23% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000061s : 395: predicate.shard_identity_eliminate 2.06% : 0.000435s : 2338: predicate.special_op_eliminate 0.62% : 0.000130s : 837: predicate.specialize_transform 3.44% : 0.000726s : 3380: predicate.split_environ_get_set_with_tuple_value 1.57% : 0.000330s : 2203: predicate.stack_unstack_eliminate 1.87% : 0.000394s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.29% : 0.000272s : 1717: predicate.switch_defer_inline 4.47% : 0.000942s : 5201: predicate.switch_layer_defer_inline 4.29% : 0.000905s : 5262: predicate.switch_simplify 0.74% : 0.000157s : 1199: predicate.tile_eliminate 0.76% : 0.000160s : 1199: predicate.transpose_eliminate 1.26% : 0.000266s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.16% : 0.000244s : 1469: predicate.tuple_list_get_item_const_eliminator 0.93% : 0.000195s : 1469: predicate.tuple_list_get_item_depend_reorder 1.97% : 0.000415s : 2495: predicate.tuple_list_get_item_eliminator 0.99% : 0.000208s : 1469: predicate.tuple_list_get_set_item_eliminator 1.64% : 0.000346s : 2304: predicate.tuple_list_set_item_eliminator 1.19% : 0.000250s : 1660: predicate.tuple_to_list_eliminator_ 1.87% : 0.000394s : 2874: predicate.updatestate_pure_node_eliminater 2.45% : 0.000516s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.28% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000057s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.058931 747 69.18% : 0.040769s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.46% : 0.001452s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.35% : 0.016709s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.348342 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041371s : 1: a1a2 0.00% : 0.000160s : 1: add_cache_embedding 0.00% : 0.000159s : 1: add_comm_op_reuse_tag 0.00% : 0.000705s : 1: add_recomputation 0.00% : 0.000400s : 1: assign_add_opt 0.01% : 0.002007s : 1: auto_monad 0.00% : 0.000362s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001605s : 1: bootstrap 0.00% : 0.000076s : 1: cconv 0.00% : 0.000157s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000126s : 1: convert_after_rewriter 0.00% : 0.000318s : 1: cse_after_recomputation 0.00% : 0.000133s : 1: dataset_repeat_opt 0.00% : 0.000405s : 1: distribtued_split 0.01% : 0.001442s : 1: eliminate_special_op_node 0.00% : 0.000153s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000035s : 1: graph_reusing 0.00% : 0.000015s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000009s : 1: handle_group_info 0.28% : 0.043012s : 1: inline 0.01% : 0.001395s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000526s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001037s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032283s : 61: opt.transform.a1a2 0.00% : 0.000172s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.089830s : 148: opt.transform.opt_a 0.01% : 0.000829s : 1: opt.transform.opt_after_cconv 0.02% : 0.003174s : 27: opt.transform.opt_b 0.24% : 0.036608s : 16: opt.transform.opt_resolve 0.01% : 0.000905s : 1: opt.transform.opt_trans_graph 0.01% : 0.000836s : 6: opt.transform.special_op_eliminate 0.00% : 0.000695s : 4: opt.transform.symbol_engine_opt 3.75% : 0.575586s : 1: opt_a 0.01% : 0.001625s : 1: opt_after_cconv 0.03% : 0.003957s : 1: opt_b 3.85% : 0.590377s : 1: optimize 0.00% : 0.000145s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000093s : 1: order_py_execute_after_rewriter 0.00% : 0.000126s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000167s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000038s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000111s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000198s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000109s : 1: pipeline_split 0.00% : 0.000120s : 1: pre_auto_parallel 0.00% : 0.000142s : 1: py_interpret_to_execute 0.00% : 0.000149s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000115s : 1: remove_cast_before_assign_add 0.00% : 0.000594s : 1: remove_dup_value 0.87% : 0.133377s : 3: renormalize.infer 0.34% : 0.052686s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001030s : 1: rewriter_after_opt_a 0.01% : 0.001865s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000153s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000137s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000802s : 1: symbol_engine_optimizer 84.53% : 12.973950s : 1: task_emit 0.01% : 0.000937s : 1: tuple_transform 4.87% : 0.747286s : 1: type_inference 0.01% : 0.001369s : 1: validate TotalTime = 14.5714, [21] [bootstrap]: 0.00130212 [type_inference]: 0.746543 [auto_monad]: 0.00374995 [graph_reusing]: 2.165e-05 [inline]: 0.0468994, [2] [rewriter_before_opt_a]: 0.00206267 [a1a2]: 0.0447583, [2] [Cycle 1]: 0.030308, [11] [expand_dump_flag]: 5.904e-05 [switch_simplify]: 0.00146388 [loop_unroll]: 0.0006369 [a_1]: 0.0231277 [recompute_prepare]: 0.00015612 [updatestate_depend_eliminate]: 0.00038447 [updatestate_assign_eliminate]: 0.00010762 [updatestate_loads_eliminate]: 0.0002283 [parameter_eliminate]: 6.37001e-06 [a_2]: 0.00381953 [parallel_inline_pass]: 0.00010511 [Cycle 2]: 0.00536997, [11] [expand_dump_flag]: 1.57999e-06 [switch_simplify]: 9.342e-05 [loop_unroll]: 9.251e-05 [a_1]: 0.00309812 [recompute_prepare]: 9.819e-05 [updatestate_depend_eliminate]: 0.00018984 [updatestate_assign_eliminate]: 6.253e-05 [updatestate_loads_eliminate]: 6.33299e-05 [parameter_eliminate]: 3.51004e-06 [a_2]: 0.00148788 [parallel_inline_pass]: 0.00010205 [parallel-infer-symbol]: 0.00020149 [pre_auto_parallel]: 9.198e-05 [insert-virtual-dataset]: 0.0015294 [parallel-infer-symbol-second]: 2.75997e-06 [dataset_repeat_opt]: 0.00015381 [pipeline_split]: 0.00010183 [optimize]: 0.609986, [52] [py_interpret_to_execute]: 0.00014236 [rewriter_before_opt_a]: 0.00028017 [opt_a]: 0.595448, [3] [Cycle 1]: 0.515082, [46] [expand_dump_flag]: 1.82993e-06 [switch_simplify]: 0.0001082 [loop_unroll]: 9.669e-05 [a_1]: 0.00328129 [recompute_prepare]: 0.00010047 [updatestate_depend_eliminate]: 9.53999e-05 [updatestate_assign_eliminate]: 6.208e-05 [updatestate_loads_eliminate]: 6.507e-05 [parameter_eliminate]: 3.41993e-06 [a_2]: 0.00154989 [accelerated_algorithm]: 0.00026347 [shard]: 2.48e-06 [meta_shard_fg_expand]: 4.294e-05 [shard_inline]: 0.00011978 [auto_parallel]: 8.564e-05 [parallel]: 0.0165089 [flash_sp]: 7.48399e-05 [merge_comm]: 0.00013058 [allreduce_fusion]: 7.51299e-05 [matmul_add_comm_reduction]: 0.00010097 [allreduce_slice_to_reducescatter]: 4.89992e-07 [virtual_shard_identity]: 0.00012696 [virtual_dataset]: 0.00015818 [get_grad_eliminate_]: 0.00011271 [virtual_output]: 0.00011687 [merge_forward]: 7.322e-05 [cell_reuse_recompute_pass]: 2.91003e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022 [before_grad]: 0.00020264 [inplace_validation]: 0.00012702 [parallel_renormalize]: 0.029066 [update_top_fg]: 9.89996e-07 [cast_eliminate]: 0.00013974 [meta_fg_expand]: 0.269897 [inplace_validation_after_expand]: 0.00149903 [flash_sp_send_recv_attached]: 0.00116984 [receive_attached]: 8.892e-05 [after_resolve]: 0.0019081 [a_after_grad]: 0.00384742 [special_op_eliminate]: 0.00175211 [renormalize]: 0.149759 [add_forward_monad_depend]: 0.00036935 [auto_monad_grad]: 0.00021704 [auto_monad_eliminator]: 0.00183624 [cse]: 0.00421501 [a_3]: 0.0248361 [Cycle 2]: 0.0686464, [46] [expand_dump_flag]: 4.878e-05 [switch_simplify]: 0.00182001 [loop_unroll]: 0.00157717 [a_1]: 0.0308141 [recompute_prepare]: 0.0001739 [updatestate_depend_eliminate]: 0.00022097 [updatestate_assign_eliminate]: 0.00010855 [updatestate_loads_eliminate]: 0.00016548 [parameter_eliminate]: 3.61993e-06 [a_2]: 0.00429408 [accelerated_algorithm]: 0.00016753 [shard]: 2.13005e-06 [meta_shard_fg_expand]: 7.315e-05 [shard_inline]: 0.00013966 [auto_parallel]: 0.00011195 [parallel]: 1.189e-05 [flash_sp]: 0.0001205 [merge_comm]: 0.00010926 [allreduce_fusion]: 9.248e-05 [matmul_add_comm_reduction]: 0.00010842 [allreduce_slice_to_reducescatter]: 4.89992e-07 [virtual_shard_identity]: 0.00014174 [virtual_dataset]: 0.00013493 [get_grad_eliminate_]: 0.00013199 [virtual_output]: 0.0001345 [merge_forward]: 9.041e-05 [cell_reuse_recompute_pass]: 2.01003e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024789 [before_grad]: 0.00023585 [inplace_validation]: 8.272e-05 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 7.79983e-07 [cast_eliminate]: 0.00015174 [meta_fg_expand]: 0.00029484 [inplace_validation_after_expand]: 0.00017207 [flash_sp_send_recv_attached]: 1.53994e-06 [receive_attached]: 1.37999e-06 [after_resolve]: 0.00015837 [a_after_grad]: 0.00022782 [special_op_eliminate]: 0.00016204 [renormalize]: 0.0178441 [add_forward_monad_depend]: 6.12997e-06 [auto_monad_grad]: 2.00991e-06 [auto_monad_eliminator]: 0.00027611 [cse]: 0.00659891 [a_3]: 0.00094469 [Cycle 3]: 0.0116985, [46] [expand_dump_flag]: 2.05997e-06 [switch_simplify]: 0.00013064 [loop_unroll]: 0.00012688 [a_1]: 0.00429956 [recompute_prepare]: 0.00013295 [updatestate_depend_eliminate]: 0.000137 [updatestate_assign_eliminate]: 9.30601e-05 [updatestate_loads_eliminate]: 8.842e-05 [parameter_eliminate]: 3.19991e-06 [a_2]: 0.00205935 [accelerated_algorithm]: 0.00015831 [shard]: 1.37999e-06 [meta_shard_fg_expand]: 4.75601e-05 [shard_inline]: 0.00013105 [auto_parallel]: 0.00011031 [parallel]: 9.15991e-06 [flash_sp]: 2.64996e-06 [merge_comm]: 0.00010268 [allreduce_fusion]: 9.31601e-05 [matmul_add_comm_reduction]: 0.00011587 [allreduce_slice_to_reducescatter]: 4.69969e-07 [virtual_shard_identity]: 0.00013616 [virtual_dataset]: 0.00013023 [get_grad_eliminate_]: 0.00012563 [virtual_output]: 0.00012834 [merge_forward]: 9.024e-05 [cell_reuse_recompute_pass]: 3.11993e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024334 [before_grad]: 0.00022979 [inplace_validation]: 8.739e-05 [parallel_renormalize]: 8.9989e-08 [update_top_fg]: 5.89993e-07 [cast_eliminate]: 0.00014658 [meta_fg_expand]: 0.00010905 [inplace_validation_after_expand]: 0.00011477 [flash_sp_send_recv_attached]: 1.70001e-06 [receive_attached]: 1.26997e-06 [after_resolve]: 0.00014546 [a_after_grad]: 0.00021693 [special_op_eliminate]: 0.0001592 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 2.35008e-06 [auto_monad_grad]: 1.59e-06 [auto_monad_eliminator]: 0.00016955 [cse]: 0.00039195 [a_3]: 0.00092091 [py_interpret_to_execute_after_opt_a]: 0.00012908 [slice_cell_reuse_recomputed_activation]: 2.37999e-06 [rewriter_after_opt_a]: 0.00104318 [convert_after_rewriter]: 0.00010855 [order_py_execute_after_rewriter]: 8.144e-05 [opt_b]: 0.00391921, [1] [Cycle 1]: 0.00391073, [7] [b_1]: 0.00305898 [b_2]: 0.00013589 [updatestate_depend_eliminate]: 9.519e-05 [updatestate_assign_eliminate]: 8.568e-05 [updatestate_loads_eliminate]: 8.785e-05 [renormalize]: 4.20026e-07 [cse]: 0.00038866 [optimize_parallel_all_gather_comm]: 0.00013175 [overlap_param_gather]: 1.11992e-06 [cconv]: 6.537e-05 [loop_unroll]: 0.00104193 [opt_after_cconv]: 0.00152026, [1] [Cycle 1]: 0.00151315, [7] [c_1]: 0.00076883 [parameter_eliminate]: 2.55008e-06 [updatestate_depend_eliminate]: 0.00012216 [updatestate_assign_eliminate]: 9.071e-05 [updatestate_loads_eliminate]: 8.95499e-05 [cse]: 0.0003819 [renormalize]: 4.29922e-07 [remove_dup_value]: 0.00058649 [tuple_transform]: 0.00093552, [1] [Cycle 1]: 0.00092788, [2] [d_1]: 0.00090985 [renormalize]: 3.59956e-07 [partial_unused_args_eliminate]: 3.20002e-06 [add_cache_embedding]: 0.00015465 [add_recomputation]: 0.00076623 [cse_after_recomputation]: 0.00030702, [1] [Cycle 1]: 0.00029865, [1] [cse]: 0.00028472 [environ_conv]: 8.731e-05 [swap_dp_allreduce_reducescatter]: 0.00012374 [bias_add_comm_swap]: 3.16009e-06 [label_micro_interleaved_index]: 1.76998e-06 [label_fine_grained_interleaved_index]: 0.00051444 [merge_cast_opt]: 1.53994e-06 [slice_recompute_activation]: 0.00014141 [micro_interleaved_order_control]: 1.59e-06 [assign_add_opt]: 0.00038195 [ForceFp32Comm]: 1.24995e-06 [remove_cast_before_assign_add]: 0.00010398 [full_micro_interleaved_order_control]: 2.74996e-06 [reorder_send_recv_between_fp_bp]: 2.70002e-06 [comm_op_add_attrs]: 0.00015017 [add_comm_op_reuse_tag]: 0.00014776 [interleave_split_concat_branches]: 9.30042e-07 [interleave_parallel_branches]: 9.50065e-07 [overlap_opt_shard_in_pipeline]: 3.466e-05 [overlap_opt_shard_grad_in_pipeline]: 3.66999e-06 [control_data_broadcast_order]: 1.12993e-06 [grouped_pairwise_exchange_alltoall]: 1.023e-05 [offloading_packed_experts]: 2.64996e-06 [overlap_recompute_and_grad_model_parallel]: 2.09e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.10018e-07 [overlap_recompute_allgather_and_fa_grad]: 9.11e-05 [overlap_grad_ring_attention]: 0.00014441 [overlap_grad_flash_sp]: 0.00012007 [begin_end_overlap_inline]: 8.89995e-07 [split_matmul_comm_elemetwise]: 1.94996e-06 [split_layernorm_comm]: 2.15997e-06 [handle_group_info]: 4.64008e-06 [symbol_engine_optimizer]: 0.00080015, [1] [Cycle 1]: 0.0007928, [6] [build]: 4.88999e-05 [elim_shapecalc]: 0.00014176 [elim_not_effective]: 0.0002208 [opt_reshape]: 0.00013044 [fold_const_symbol]: 0.00021175 [renormalize]: 4.10015e-07 [pipeline_parallel_scheduler]: 3.73996e-06 [auto_monad_reorder]: 0.00039114 [get_jit_bprop_graph]: 5.89993e-07 [rewriter_after_jit_bprop_graph]: 5.00004e-07 [eliminate_special_op_node]: 0.00139147 [distribtued_split]: 0.00037612 [validate]: 0.00029936 [task_emit]: 13.1568 [execute]: 1.338e-05 Sums bootstrap : 0.001302s : 0.01% type_inference : 0.746543s : 5.13% auto_monad : 0.003750s : 0.03% graph_reusing : 0.000022s : 0.00% inline.rewriter_before_opt_a : 0.002063s : 0.01% inline.a1a2.expand_dump_flag : 0.000061s : 0.00% inline.a1a2.switch_simplify : 0.001557s : 0.01% inline.a1a2.loop_unroll : 0.000729s : 0.01% inline.a1a2.a_1 : 0.026226s : 0.18% inline.a1a2.recompute_prepare : 0.000254s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000574s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000170s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000292s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : 0.005307s : 0.04% inline.a1a2.parallel_inline_pass : 0.000207s : 0.00% parallel-infer-symbol : 0.000201s : 0.00% pre_auto_parallel : 0.000092s : 0.00% insert-virtual-dataset : 0.001529s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000154s : 0.00% pipeline_split : 0.000102s : 0.00% optimize.py_interpret_to_execute : 0.000142s : 0.00% optimize.rewriter_before_opt_a : 0.000280s : 0.00% optimize.opt_a.expand_dump_flag : 0.000053s : 0.00% optimize.opt_a.switch_simplify : 0.002059s : 0.01% optimize.opt_a.loop_unroll : 0.001801s : 0.01% optimize.opt_a.a_1 : 0.038395s : 0.26% optimize.opt_a.recompute_prepare : 0.000407s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000453s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000264s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000319s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.007903s : 0.05% optimize.opt_a.accelerated_algorithm : 0.000589s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000164s : 0.00% optimize.opt_a.shard_inline : 0.000390s : 0.00% optimize.opt_a.auto_parallel : 0.000308s : 0.00% optimize.opt_a.parallel : 0.016530s : 0.11% optimize.opt_a.flash_sp : 0.000198s : 0.00% optimize.opt_a.merge_comm : 0.000343s : 0.00% optimize.opt_a.allreduce_fusion : 0.000261s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000325s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000405s : 0.00% optimize.opt_a.virtual_dataset : 0.000423s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000370s : 0.00% optimize.opt_a.virtual_output : 0.000380s : 0.00% optimize.opt_a.merge_forward : 0.000254s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000711s : 0.00% optimize.opt_a.before_grad : 0.000668s : 0.00% optimize.opt_a.inplace_validation : 0.000297s : 0.00% optimize.opt_a.parallel_renormalize : 0.029066s : 0.20% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000438s : 0.00% optimize.opt_a.meta_fg_expand : 0.270301s : 1.86% optimize.opt_a.inplace_validation_after_expand : 0.001786s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001173s : 0.01% optimize.opt_a.receive_attached : 0.000092s : 0.00% optimize.opt_a.after_resolve : 0.002212s : 0.02% optimize.opt_a.a_after_grad : 0.004292s : 0.03% optimize.opt_a.special_op_eliminate : 0.002073s : 0.01% optimize.opt_a.renormalize : 0.167603s : 1.15% optimize.opt_a.add_forward_monad_depend : 0.000378s : 0.00% optimize.opt_a.auto_monad_grad : 0.000221s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002282s : 0.02% optimize.opt_a.cse : 0.011206s : 0.08% optimize.opt_a.a_3 : 0.026702s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000129s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001043s : 0.01% optimize.convert_after_rewriter : 0.000109s : 0.00% optimize.order_py_execute_after_rewriter : 0.000081s : 0.00% optimize.opt_b.b_1 : 0.003059s : 0.02% optimize.opt_b.b_2 : 0.000136s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000095s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000086s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000088s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000389s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000132s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000065s : 0.00% optimize.loop_unroll : 0.001042s : 0.01% optimize.opt_after_cconv.c_1 : 0.000769s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000122s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_after_cconv.cse : 0.000382s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000586s : 0.00% optimize.tuple_transform.d_1 : 0.000910s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000155s : 0.00% optimize.add_recomputation : 0.000766s : 0.01% optimize.cse_after_recomputation.cse : 0.000285s : 0.00% optimize.environ_conv : 0.000087s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000124s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000514s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000141s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000382s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000104s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000150s : 0.00% optimize.add_comm_op_reuse_tag : 0.000148s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000035s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000091s : 0.00% optimize.overlap_grad_ring_attention : 0.000144s : 0.00% optimize.overlap_grad_flash_sp : 0.000120s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.00% optimize.symbol_engine_optimizer.build : 0.000049s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000142s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000221s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000212s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000391s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.001391s : 0.01% distribtued_split : 0.000376s : 0.00% validate : 0.000299s : 0.00% task_emit : 13.156802s : 90.37% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.049552 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000022s : 9: substitution.addn_check_dump 0.10% : 0.000051s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.64% : 0.000316s : 71: substitution.arithmetic_simplify 0.11% : 0.000055s : 10: substitution.cast_eliminate 0.11% : 0.000054s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000024s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.06% : 0.000028s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.03% : 0.000017s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.03% : 0.000014s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 62.72% : 0.031078s : 257: substitution.getattr_setattr_resolve 0.16% : 0.000081s : 126: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 25.72% : 0.012747s : 331: substitution.inline 1.39% : 0.000691s : 112: substitution.inline_without_move 0.25% : 0.000122s : 309: substitution.j_node_and_user_rematch 0.28% : 0.000140s : 40: substitution.less_batch_normalization 0.09% : 0.000045s : 90: substitution.load_eliminater 0.11% : 0.000053s : 10: substitution.merge_addn 0.22% : 0.000110s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.08% : 0.000038s : 1: substitution.partial_defer_inline 0.15% : 0.000077s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.06% : 0.000031s : 15: substitution.reduce_eliminate 0.32% : 0.000160s : 309: substitution.remove_not_recompute_node 2.03% : 0.001005s : 508: substitution.replace_applicator 0.22% : 0.000109s : 251: substitution.replace_old_param 0.08% : 0.000037s : 11: substitution.reshape_eliminate 0.02% : 0.000012s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.19% : 0.000097s : 34: substitution.switch_simplify 0.06% : 0.000030s : 11: substitution.tile_eliminate 0.51% : 0.000255s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000135s : 107: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000207s : 107: substitution.tuple_list_get_item_depend_reorder 1.54% : 0.000762s : 308: substitution.tuple_list_get_item_eliminator 0.36% : 0.000176s : 107: substitution.tuple_list_get_set_item_eliminator 0.39% : 0.000193s : 210: substitution.updatestate_pure_node_eliminater 0.70% : 0.000346s : 265: substitution.updatestate_useless_node_eliminater 0.03% : 0.000013s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.745968 2 95.76% : 0.714362s : 1: type_inference.infer 4.24% : 0.031606s : 1: type_inference.specialize ------[replace.] 0.010122 775 0.40% : 0.000041s : 5: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.65% : 0.000066s : 7: replace.depend_value_elim 0.40% : 0.000040s : 3: replace.environ_get_set_eliminate 28.59% : 0.002894s : 183: replace.getattr_setattr_resolve 30.17% : 0.003054s : 310: replace.inline 0.21% : 0.000021s : 1: replace.merge_addn 1.31% : 0.000132s : 7: replace.partial_eliminate 3.79% : 0.000384s : 25: replace.replace_applicator 5.00% : 0.000506s : 34: replace.switch_simplify 0.54% : 0.000054s : 6: replace.tuple_list_get_item_depend_reorder 28.52% : 0.002887s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.20% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042237 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.04% : 0.000016s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 68.53% : 0.028945s : 183: match.getattr_setattr_resolve 29.68% : 0.012536s : 310: match.inline 0.06% : 0.000025s : 1: match.merge_addn 0.10% : 0.000044s : 7: match.partial_eliminate 0.24% : 0.000100s : 25: match.replace_applicator 0.18% : 0.000075s : 34: match.switch_simplify 0.09% : 0.000036s : 6: match.tuple_list_get_item_depend_reorder 0.96% : 0.000404s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000011s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020875131318 0.76% : 0.000158s : 1198: predicate.accumulaten_eliminater 0.25% : 0.000052s : 254: predicate.ad_related_special_op_eliminate 0.54% : 0.000114s : 835: predicate.addn_check_dump 0.76% : 0.000158s : 1198: predicate.addn_zero_filter 0.75% : 0.000156s : 1198: predicate.adjust_all_reduce_mul_add 1.74% : 0.000362s : 2034: predicate.arithmetic_simplify 1.10% : 0.000231s : 1586: predicate.cast_eliminate 3.30% : 0.000688s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000114s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.14% : 0.000237s : 1399: predicate.convert_tensor_eliminate 0.56% : 0.000118s : 838: predicate.depend_value_elim 0.86% : 0.000180s : 1202: predicate.dict_get_item_const_eliminator 0.86% : 0.000179s : 1202: predicate.dict_get_item_eliminator 0.82% : 0.000170s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000010s : 126: predicate.elim_not_effective 0.10% : 0.000021s : 126: predicate.elim_shapecalc_of_broadcastargs 0.84% : 0.000176s : 1334: predicate.environ_add_const_eliminate 0.83% : 0.000174s : 1337: predicate.environ_get_add_eliminate 0.83% : 0.000174s : 1334: predicate.environ_get_depend_swap 1.44% : 0.000300s : 2172: predicate.environ_get_eliminate 0.84% : 0.000176s : 1337: predicate.environ_get_set_eliminate 1.14% : 0.000239s : 1717: predicate.exchange_switch_depend_value 1.44% : 0.000300s : 1717: predicate.float_depend_g_call 0.55% : 0.000114s : 835: predicate.float_environ_get_switch 0.64% : 0.000133s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000058s : 395: predicate.get_grad_eliminate 2.24% : 0.000467s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000114s : 835: predicate.incorporate_call 0.54% : 0.000112s : 835: predicate.incorporate_call_switch 3.91% : 0.000817s : 4602: predicate.inline 2.15% : 0.000450s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.33% : 0.000069s : 388: predicate.less_batch_normalization 1.13% : 0.000235s : 1660: predicate.list_to_tuple_eliminator_ 2.03% : 0.000424s : 2874: predicate.load_eliminater 0.17% : 0.000036s : 135: predicate.loop_unroll_after_grad 2.35% : 0.000490s : 2640: predicate.loop_unroll_before_grad 0.96% : 0.000200s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000117s : 837: predicate.merge_addn 3.18% : 0.000664s : 3380: predicate.micro_step_allgather_replace 3.20% : 0.000667s : 3380: predicate.mini_step_allgather_replace 0.77% : 0.000160s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 1.98% : 0.000413s : 1717: predicate.partial_defer_inline 1.09% : 0.000227s : 1541: predicate.partial_eliminate 0.78% : 0.000163s : 1198: predicate.print_const_string_wrapper 0.55% : 0.000116s : 824: predicate.reduce_all_const_elim 0.96% : 0.000201s : 1199: predicate.reduce_eliminate 0.15% : 0.000030s : 395: predicate.remove_not_recompute_node 1.94% : 0.000405s : 4829: predicate.replace_applicator 0.80% : 0.000168s : 2203: predicate.replace_old_param 0.05% : 0.000011s : 135: predicate.reset_defer_inline 0.79% : 0.000166s : 1199: predicate.reshape_eliminate 3.24% : 0.000676s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.36% : 0.000701s : 3484: predicate.same_eliminate 0.26% : 0.000053s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000060s : 395: predicate.shard_identity_eliminate 1.91% : 0.000399s : 2338: predicate.special_op_eliminate 0.63% : 0.000132s : 837: predicate.specialize_transform 3.75% : 0.000783s : 3380: predicate.split_environ_get_set_with_tuple_value 1.59% : 0.000331s : 2203: predicate.stack_unstack_eliminate 1.82% : 0.000379s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.22% : 0.000255s : 1717: predicate.switch_defer_inline 4.63% : 0.000966s : 5201: predicate.switch_layer_defer_inline 4.60% : 0.000960s : 5262: predicate.switch_simplify 0.79% : 0.000165s : 1199: predicate.tile_eliminate 0.76% : 0.000159s : 1199: predicate.transpose_eliminate 1.06% : 0.000222s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.02% : 0.000214s : 1469: predicate.tuple_list_get_item_const_eliminator 1.02% : 0.000214s : 1469: predicate.tuple_list_get_item_depend_reorder 2.03% : 0.000423s : 2495: predicate.tuple_list_get_item_eliminator 0.98% : 0.000204s : 1469: predicate.tuple_list_get_set_item_eliminator 1.64% : 0.000342s : 2304: predicate.tuple_list_set_item_eliminator 1.10% : 0.000229s : 1660: predicate.tuple_to_list_eliminator_ 1.86% : 0.000389s : 2874: predicate.updatestate_pure_node_eliminater 2.57% : 0.000537s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.28% : 0.000058s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000057s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.060434 747 69.09% : 0.041751s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.69% : 0.001628s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.22% : 0.017055s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.592239 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.29% : 0.044763s : 1: a1a2 0.00% : 0.000162s : 1: add_cache_embedding 0.00% : 0.000155s : 1: add_comm_op_reuse_tag 0.00% : 0.000779s : 1: add_recomputation 0.00% : 0.000392s : 1: assign_add_opt 0.02% : 0.003819s : 1: auto_monad 0.00% : 0.000407s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001345s : 1: bootstrap 0.00% : 0.000072s : 1: cconv 0.00% : 0.000158s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000116s : 1: convert_after_rewriter 0.00% : 0.000312s : 1: cse_after_recomputation 0.00% : 0.000164s : 1: dataset_repeat_opt 0.00% : 0.000392s : 1: distribtued_split 0.01% : 0.001408s : 1: eliminate_special_op_node 0.00% : 0.000097s : 1: environ_conv 0.00% : 0.000024s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000007s : 1: handle_group_info 0.30% : 0.046911s : 1: inline 0.01% : 0.001553s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000523s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001053s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.22% : 0.034203s : 61: opt.transform.a1a2 0.00% : 0.000231s : 1: opt.transform.loop_unroll_optimizer 0.58% : 0.089974s : 148: opt.transform.opt_a 0.00% : 0.000766s : 1: opt.transform.opt_after_cconv 0.02% : 0.003162s : 27: opt.transform.opt_b 0.24% : 0.036771s : 16: opt.transform.opt_resolve 0.01% : 0.000907s : 1: opt.transform.opt_trans_graph 0.01% : 0.000828s : 6: opt.transform.special_op_eliminate 0.00% : 0.000698s : 4: opt.transform.symbol_engine_opt 3.82% : 0.595453s : 1: opt_a 0.01% : 0.001526s : 1: opt_after_cconv 0.03% : 0.003924s : 1: opt_b 3.91% : 0.609998s : 1: optimize 0.00% : 0.000139s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000087s : 1: order_py_execute_after_rewriter 0.00% : 0.000124s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000150s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000039s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000097s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000214s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000113s : 1: pipeline_split 0.00% : 0.000102s : 1: pre_auto_parallel 0.00% : 0.000151s : 1: py_interpret_to_execute 0.00% : 0.000137s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000109s : 1: remove_cast_before_assign_add 0.00% : 0.000599s : 1: remove_dup_value 0.91% : 0.141902s : 3: renormalize.infer 0.35% : 0.054724s : 3: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001054s : 1: rewriter_after_opt_a 0.02% : 0.002368s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000148s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000131s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000804s : 1: symbol_engine_optimizer 84.38% : 13.156866s : 1: task_emit 0.01% : 0.000940s : 1: tuple_transform 4.79% : 0.746598s : 1: type_inference 0.01% : 0.001403s : 1: validate TotalTime = 14.2624, [21] [bootstrap]: 0.00132981 [type_inference]: 0.721161 [auto_monad]: 0.00210978 [graph_reusing]: 2.722e-05 [inline]: 0.0426438, [2] [rewriter_before_opt_a]: 0.00151589 [a1a2]: 0.0410553, [2] [Cycle 1]: 0.0280971, [11] [expand_dump_flag]: 3.282e-05 [switch_simplify]: 0.00106612 [loop_unroll]: 0.00072432 [a_1]: 0.0218937 [recompute_prepare]: 0.00016114 [updatestate_depend_eliminate]: 0.00034908 [updatestate_assign_eliminate]: 0.00011116 [updatestate_loads_eliminate]: 0.00020569 [parameter_eliminate]: 6.31995e-06 [a_2]: 0.00325128 [parallel_inline_pass]: 0.00010302 [Cycle 2]: 0.00535373, [11] [expand_dump_flag]: 1.26997e-06 [switch_simplify]: 9.72199e-05 [loop_unroll]: 9.426e-05 [a_1]: 0.00318684 [recompute_prepare]: 9.915e-05 [updatestate_depend_eliminate]: 7.402e-05 [updatestate_assign_eliminate]: 5.89601e-05 [updatestate_loads_eliminate]: 6.35501e-05 [parameter_eliminate]: 3.19001e-06 [a_2]: 0.00150606 [parallel_inline_pass]: 0.0001035 [parallel-infer-symbol]: 0.00018704 [pre_auto_parallel]: 0.00010426 [insert-virtual-dataset]: 0.00127905 [parallel-infer-symbol-second]: 2.49001e-06 [dataset_repeat_opt]: 0.00011606 [pipeline_split]: 9.588e-05 [optimize]: 0.5923, [52] [py_interpret_to_execute]: 0.00011877 [rewriter_before_opt_a]: 0.0002748 [opt_a]: 0.577814, [3] [Cycle 1]: 0.497811, [46] [expand_dump_flag]: 1.94006e-06 [switch_simplify]: 0.00011189 [loop_unroll]: 9.88999e-05 [a_1]: 0.00332687 [recompute_prepare]: 0.00010335 [updatestate_depend_eliminate]: 9.74899e-05 [updatestate_assign_eliminate]: 6.262e-05 [updatestate_loads_eliminate]: 6.683e-05 [parameter_eliminate]: 3.51004e-06 [a_2]: 0.0015944 [accelerated_algorithm]: 0.00023484 [shard]: 2.03995e-06 [meta_shard_fg_expand]: 4.86199e-05 [shard_inline]: 0.00010758 [auto_parallel]: 7.46801e-05 [parallel]: 0.0158179 [flash_sp]: 5.64e-05 [merge_comm]: 0.00012795 [allreduce_fusion]: 7.41299e-05 [matmul_add_comm_reduction]: 9.548e-05 [allreduce_slice_to_reducescatter]: 4.49945e-07 [virtual_shard_identity]: 0.00012481 [virtual_dataset]: 0.00015546 [get_grad_eliminate_]: 0.00011515 [virtual_output]: 0.00011335 [merge_forward]: 7.122e-05 [cell_reuse_recompute_pass]: 2.42994e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020497 [before_grad]: 0.0002094 [inplace_validation]: 0.00012989 [parallel_renormalize]: 0.0210736 [update_top_fg]: 6.39935e-07 [cast_eliminate]: 0.00014732 [meta_fg_expand]: 0.26159 [inplace_validation_after_expand]: 0.0015623 [flash_sp_send_recv_attached]: 0.0012043 [receive_attached]: 8.90701e-05 [after_resolve]: 0.00200646 [a_after_grad]: 0.00388056 [special_op_eliminate]: 0.00185404 [renormalize]: 0.149241 [add_forward_monad_depend]: 0.00035499 [auto_monad_grad]: 0.00020938 [auto_monad_eliminator]: 0.00179073 [cse]: 0.00414763 [a_3]: 0.0248901 [Cycle 2]: 0.0680967, [46] [expand_dump_flag]: 5.199e-05 [switch_simplify]: 0.00181796 [loop_unroll]: 0.00151667 [a_1]: 0.0308906 [recompute_prepare]: 0.00017069 [updatestate_depend_eliminate]: 0.00022154 [updatestate_assign_eliminate]: 0.00010318 [updatestate_loads_eliminate]: 0.00016214 [parameter_eliminate]: 3.80003e-06 [a_2]: 0.00436532 [accelerated_algorithm]: 0.00016524 [shard]: 2.44996e-06 [meta_shard_fg_expand]: 9.857e-05 [shard_inline]: 0.00014321 [auto_parallel]: 0.00011862 [parallel]: 1.402e-05 [flash_sp]: 0.00012043 [merge_comm]: 0.00010949 [allreduce_fusion]: 9.391e-05 [matmul_add_comm_reduction]: 0.00011006 [allreduce_slice_to_reducescatter]: 6.20028e-07 [virtual_shard_identity]: 0.0001437 [virtual_dataset]: 0.00013774 [get_grad_eliminate_]: 0.00013341 [virtual_output]: 0.00013554 [merge_forward]: 8.921e-05 [cell_reuse_recompute_pass]: 2.52004e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024989 [before_grad]: 0.00023944 [inplace_validation]: 8.574e-05 [parallel_renormalize]: 1.10012e-07 [update_top_fg]: 7.89994e-07 [cast_eliminate]: 0.00015435 [meta_fg_expand]: 0.00026463 [inplace_validation_after_expand]: 0.00018002 [flash_sp_send_recv_attached]: 2.11003e-06 [receive_attached]: 1.50001e-06 [after_resolve]: 0.00015915 [a_after_grad]: 0.0002304 [special_op_eliminate]: 0.00013699 [renormalize]: 0.0173804 [add_forward_monad_depend]: 4.52995e-06 [auto_monad_grad]: 2.25008e-06 [auto_monad_eliminator]: 0.00028699 [cse]: 0.00637491 [a_3]: 0.00100731 [Cycle 3]: 0.0118835, [46] [expand_dump_flag]: 1.95997e-06 [switch_simplify]: 0.00013408 [loop_unroll]: 0.00012958 [a_1]: 0.00439015 [recompute_prepare]: 0.00013558 [updatestate_depend_eliminate]: 0.00013808 [updatestate_assign_eliminate]: 9.011e-05 [updatestate_loads_eliminate]: 9.137e-05 [parameter_eliminate]: 2.86999e-06 [a_2]: 0.00207921 [accelerated_algorithm]: 0.00015835 [shard]: 1.27999e-06 [meta_shard_fg_expand]: 4.829e-05 [shard_inline]: 0.00015593 [auto_parallel]: 0.00011229 [parallel]: 1.02001e-05 [flash_sp]: 2.15997e-06 [merge_comm]: 0.00010455 [allreduce_fusion]: 9.505e-05 [matmul_add_comm_reduction]: 0.00011602 [allreduce_slice_to_reducescatter]: 7.89994e-07 [virtual_shard_identity]: 0.00013903 [virtual_dataset]: 0.00013423 [get_grad_eliminate_]: 0.0001283 [virtual_output]: 0.00013015 [merge_forward]: 9.08601e-05 [cell_reuse_recompute_pass]: 2.61003e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024488 [before_grad]: 0.00023337 [inplace_validation]: 8.79599e-05 [parallel_renormalize]: 7.99773e-08 [update_top_fg]: 5.59958e-07 [cast_eliminate]: 0.0001488 [meta_fg_expand]: 0.0001081 [inplace_validation_after_expand]: 0.00011406 [flash_sp_send_recv_attached]: 1.86998e-06 [receive_attached]: 1.32003e-06 [after_resolve]: 0.00014638 [a_after_grad]: 0.00021934 [special_op_eliminate]: 0.00013143 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 2.38011e-06 [auto_monad_grad]: 1.61992e-06 [auto_monad_eliminator]: 0.0001673 [cse]: 0.00039853 [a_3]: 0.00095845 [py_interpret_to_execute_after_opt_a]: 0.00013357 [slice_cell_reuse_recomputed_activation]: 2.33995e-06 [rewriter_after_opt_a]: 0.00099448 [convert_after_rewriter]: 0.00010878 [order_py_execute_after_rewriter]: 8.03299e-05 [opt_b]: 0.00397816, [1] [Cycle 1]: 0.00396986, [7] [b_1]: 0.00311136 [b_2]: 0.0001384 [updatestate_depend_eliminate]: 9.447e-05 [updatestate_assign_eliminate]: 8.482e-05 [updatestate_loads_eliminate]: 8.689e-05 [renormalize]: 4.60073e-07 [cse]: 0.00039522 [optimize_parallel_all_gather_comm]: 0.00013013 [overlap_param_gather]: 1.34006e-06 [cconv]: 6.553e-05 [loop_unroll]: 0.00089294 [opt_after_cconv]: 0.00163488, [1] [Cycle 1]: 0.00162748, [7] [c_1]: 0.00078568 [parameter_eliminate]: 2.56998e-06 [updatestate_depend_eliminate]: 0.00012709 [updatestate_assign_eliminate]: 9.049e-05 [updatestate_loads_eliminate]: 9.12501e-05 [cse]: 0.00047493 [renormalize]: 5.89993e-07 [remove_dup_value]: 0.00059199 [tuple_transform]: 0.00094987, [1] [Cycle 1]: 0.00094248, [2] [d_1]: 0.00092404 [renormalize]: 3.10014e-07 [partial_unused_args_eliminate]: 3.22005e-06 [add_cache_embedding]: 0.00014722 [add_recomputation]: 0.00068488 [cse_after_recomputation]: 0.00031003, [1] [Cycle 1]: 0.00030221, [1] [cse]: 0.00028825 [environ_conv]: 9.249e-05 [swap_dp_allreduce_reducescatter]: 0.00012597 [bias_add_comm_swap]: 2.68e-06 [label_micro_interleaved_index]: 1.71992e-06 [label_fine_grained_interleaved_index]: 0.00051272 [merge_cast_opt]: 1.414e-05 [slice_recompute_activation]: 0.00015158 [micro_interleaved_order_control]: 2.50002e-06 [assign_add_opt]: 0.00039378 [ForceFp32Comm]: 1.12003e-06 [remove_cast_before_assign_add]: 0.00010333 [full_micro_interleaved_order_control]: 2.11003e-06 [reorder_send_recv_between_fp_bp]: 1.85007e-06 [comm_op_add_attrs]: 0.00014421 [add_comm_op_reuse_tag]: 0.00014642 [interleave_split_concat_branches]: 1.07998e-06 [interleave_parallel_branches]: 9.10019e-07 [overlap_opt_shard_in_pipeline]: 3.111e-05 [overlap_opt_shard_grad_in_pipeline]: 3.54997e-06 [control_data_broadcast_order]: 1.12993e-06 [grouped_pairwise_exchange_alltoall]: 1.123e-05 [offloading_packed_experts]: 2.88e-06 [overlap_recompute_and_grad_model_parallel]: 2.30002e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.49948e-07 [overlap_recompute_allgather_and_fa_grad]: 9.48e-05 [overlap_grad_ring_attention]: 0.00015734 [overlap_grad_flash_sp]: 0.00011982 [begin_end_overlap_inline]: 1.12003e-06 [split_matmul_comm_elemetwise]: 2.05007e-06 [split_layernorm_comm]: 2.05997e-06 [handle_group_info]: 5.27001e-06 [symbol_engine_optimizer]: 0.0008051, [1] [Cycle 1]: 0.00079838, [6] [build]: 4.898e-05 [elim_shapecalc]: 0.00014315 [elim_not_effective]: 0.00022569 [opt_reshape]: 0.0001312 [fold_const_symbol]: 0.00021186 [renormalize]: 4.39934e-07 [pipeline_parallel_scheduler]: 3.53006e-06 [auto_monad_reorder]: 0.00035357 [get_jit_bprop_graph]: 4.69969e-07 [rewriter_after_jit_bprop_graph]: 4.30038e-07 [eliminate_special_op_node]: 0.00141477 [distribtued_split]: 0.00038459 [validate]: 0.00029762 [task_emit]: 12.8971 [execute]: 1.337e-05 Sums bootstrap : 0.001330s : 0.01% type_inference : 0.721161s : 5.06% auto_monad : 0.002110s : 0.01% graph_reusing : 0.000027s : 0.00% inline.rewriter_before_opt_a : 0.001516s : 0.01% inline.a1a2.expand_dump_flag : 0.000034s : 0.00% inline.a1a2.switch_simplify : 0.001163s : 0.01% inline.a1a2.loop_unroll : 0.000819s : 0.01% inline.a1a2.a_1 : 0.025081s : 0.18% inline.a1a2.recompute_prepare : 0.000260s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000423s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000170s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000269s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : 0.004757s : 0.03% inline.a1a2.parallel_inline_pass : 0.000207s : 0.00% parallel-infer-symbol : 0.000187s : 0.00% pre_auto_parallel : 0.000104s : 0.00% insert-virtual-dataset : 0.001279s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000116s : 0.00% pipeline_split : 0.000096s : 0.00% optimize.py_interpret_to_execute : 0.000119s : 0.00% optimize.rewriter_before_opt_a : 0.000275s : 0.00% optimize.opt_a.expand_dump_flag : 0.000056s : 0.00% optimize.opt_a.switch_simplify : 0.002064s : 0.01% optimize.opt_a.loop_unroll : 0.001745s : 0.01% optimize.opt_a.a_1 : 0.038608s : 0.27% optimize.opt_a.recompute_prepare : 0.000410s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000457s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000256s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000320s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.008039s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000558s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000195s : 0.00% optimize.opt_a.shard_inline : 0.000407s : 0.00% optimize.opt_a.auto_parallel : 0.000306s : 0.00% optimize.opt_a.parallel : 0.015842s : 0.11% optimize.opt_a.flash_sp : 0.000179s : 0.00% optimize.opt_a.merge_comm : 0.000342s : 0.00% optimize.opt_a.allreduce_fusion : 0.000263s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000322s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000408s : 0.00% optimize.opt_a.virtual_dataset : 0.000427s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000377s : 0.00% optimize.opt_a.virtual_output : 0.000379s : 0.00% optimize.opt_a.merge_forward : 0.000251s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000700s : 0.00% optimize.opt_a.before_grad : 0.000682s : 0.00% optimize.opt_a.inplace_validation : 0.000304s : 0.00% optimize.opt_a.parallel_renormalize : 0.021074s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000450s : 0.00% optimize.opt_a.meta_fg_expand : 0.261963s : 1.84% optimize.opt_a.inplace_validation_after_expand : 0.001856s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001208s : 0.01% optimize.opt_a.receive_attached : 0.000092s : 0.00% optimize.opt_a.after_resolve : 0.002312s : 0.02% optimize.opt_a.a_after_grad : 0.004330s : 0.03% optimize.opt_a.special_op_eliminate : 0.002122s : 0.01% optimize.opt_a.renormalize : 0.166622s : 1.17% optimize.opt_a.add_forward_monad_depend : 0.000362s : 0.00% optimize.opt_a.auto_monad_grad : 0.000213s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002245s : 0.02% optimize.opt_a.cse : 0.010921s : 0.08% optimize.opt_a.a_3 : 0.026856s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000134s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000994s : 0.01% optimize.convert_after_rewriter : 0.000109s : 0.00% optimize.order_py_execute_after_rewriter : 0.000080s : 0.00% optimize.opt_b.b_1 : 0.003111s : 0.02% optimize.opt_b.b_2 : 0.000138s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000094s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000085s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000087s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000395s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000130s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000066s : 0.00% optimize.loop_unroll : 0.000893s : 0.01% optimize.opt_after_cconv.c_1 : 0.000786s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000127s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000090s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.cse : 0.000475s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000592s : 0.00% optimize.tuple_transform.d_1 : 0.000924s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000147s : 0.00% optimize.add_recomputation : 0.000685s : 0.00% optimize.cse_after_recomputation.cse : 0.000288s : 0.00% optimize.environ_conv : 0.000092s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000126s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000513s : 0.00% optimize.merge_cast_opt : 0.000014s : 0.00% optimize.slice_recompute_activation : 0.000152s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000394s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000103s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000144s : 0.00% optimize.add_comm_op_reuse_tag : 0.000146s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000031s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000095s : 0.00% optimize.overlap_grad_ring_attention : 0.000157s : 0.00% optimize.overlap_grad_flash_sp : 0.000120s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.00% optimize.symbol_engine_optimizer.build : 0.000049s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000143s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000226s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000131s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000212s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000354s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001415s : 0.01% distribtued_split : 0.000385s : 0.00% validate : 0.000298s : 0.00% task_emit : 12.897128s : 90.50% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.047937 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.11% : 0.000051s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.64% : 0.000306s : 71: substitution.arithmetic_simplify 0.11% : 0.000055s : 10: substitution.cast_eliminate 0.11% : 0.000053s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.08% : 0.000039s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.06% : 0.000028s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000019s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.02% : 0.000010s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 64.50% : 0.030920s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000082s : 126: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.71% : 0.011366s : 331: substitution.inline 1.39% : 0.000665s : 112: substitution.inline_without_move 0.25% : 0.000122s : 309: substitution.j_node_and_user_rematch 0.27% : 0.000128s : 40: substitution.less_batch_normalization 0.09% : 0.000044s : 90: substitution.load_eliminater 0.11% : 0.000051s : 10: substitution.merge_addn 0.23% : 0.000112s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.07% : 0.000032s : 1: substitution.partial_defer_inline 0.12% : 0.000058s : 23: substitution.partial_eliminate 0.03% : 0.000017s : 26: substitution.reduce_all_const_elim 0.07% : 0.000033s : 15: substitution.reduce_eliminate 0.33% : 0.000156s : 309: substitution.remove_not_recompute_node 2.04% : 0.000977s : 508: substitution.replace_applicator 0.22% : 0.000106s : 251: substitution.replace_old_param 0.08% : 0.000038s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000010s : 4: substitution.specialize_transform 0.03% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.20% : 0.000096s : 34: substitution.switch_simplify 0.07% : 0.000031s : 11: substitution.tile_eliminate 0.53% : 0.000254s : 101: substitution.tuple_list_convert_item_index_to_positive 0.28% : 0.000136s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000205s : 107: substitution.tuple_list_get_item_depend_reorder 1.66% : 0.000794s : 308: substitution.tuple_list_get_item_eliminator 0.37% : 0.000176s : 107: substitution.tuple_list_get_set_item_eliminator 0.40% : 0.000193s : 210: substitution.updatestate_pure_node_eliminater 0.70% : 0.000338s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.720700 2 96.52% : 0.695608s : 1: type_inference.infer 3.48% : 0.025092s : 1: type_inference.specialize ------[replace.] 0.009516 775 0.44% : 0.000042s : 5: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.48% : 0.000046s : 7: replace.depend_value_elim 0.43% : 0.000041s : 3: replace.environ_get_set_eliminate 30.46% : 0.002899s : 183: replace.getattr_setattr_resolve 30.23% : 0.002877s : 310: replace.inline 0.22% : 0.000021s : 1: replace.merge_addn 1.17% : 0.000111s : 7: replace.partial_eliminate 3.97% : 0.000378s : 25: replace.replace_applicator 3.73% : 0.000355s : 34: replace.switch_simplify 0.51% : 0.000049s : 6: replace.tuple_list_get_item_depend_reorder 27.91% : 0.002656s : 191: replace.tuple_list_get_item_eliminator 0.18% : 0.000017s : 1: replace.updatestate_useless_node_eliminater 0.20% : 0.000019s : 1: replace.virtual_dataset_eliminate ------[match.] 0.040703 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000011s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 70.76% : 0.028803s : 183: match.getattr_setattr_resolve 27.39% : 0.011150s : 310: match.inline 0.06% : 0.000023s : 1: match.merge_addn 0.10% : 0.000039s : 7: match.partial_eliminate 0.24% : 0.000097s : 25: match.replace_applicator 0.19% : 0.000076s : 34: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.03% : 0.000420s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020899131318 0.78% : 0.000162s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000056s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000114s : 835: predicate.addn_check_dump 0.81% : 0.000169s : 1198: predicate.addn_zero_filter 0.76% : 0.000158s : 1198: predicate.adjust_all_reduce_mul_add 1.84% : 0.000385s : 2034: predicate.arithmetic_simplify 1.12% : 0.000233s : 1586: predicate.cast_eliminate 3.21% : 0.000671s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.15% : 0.000240s : 1399: predicate.convert_tensor_eliminate 0.56% : 0.000118s : 838: predicate.depend_value_elim 0.84% : 0.000176s : 1202: predicate.dict_get_item_const_eliminator 0.83% : 0.000174s : 1202: predicate.dict_get_item_eliminator 0.81% : 0.000169s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000021s : 126: predicate.elim_shapecalc_of_broadcastargs 0.88% : 0.000183s : 1334: predicate.environ_add_const_eliminate 0.86% : 0.000180s : 1337: predicate.environ_get_add_eliminate 0.85% : 0.000177s : 1334: predicate.environ_get_depend_swap 1.47% : 0.000307s : 2172: predicate.environ_get_eliminate 0.86% : 0.000180s : 1337: predicate.environ_get_set_eliminate 1.11% : 0.000231s : 1717: predicate.exchange_switch_depend_value 1.43% : 0.000298s : 1717: predicate.float_depend_g_call 0.55% : 0.000115s : 835: predicate.float_environ_get_switch 0.64% : 0.000133s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000058s : 395: predicate.get_grad_eliminate 2.35% : 0.000491s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.54% : 0.000113s : 835: predicate.incorporate_call 0.53% : 0.000111s : 835: predicate.incorporate_call_switch 3.91% : 0.000818s : 4602: predicate.inline 2.34% : 0.000488s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.32% : 0.000068s : 388: predicate.less_batch_normalization 1.08% : 0.000226s : 1660: predicate.list_to_tuple_eliminator_ 1.84% : 0.000385s : 2874: predicate.load_eliminater 0.18% : 0.000038s : 135: predicate.loop_unroll_after_grad 2.52% : 0.000527s : 2640: predicate.loop_unroll_before_grad 0.95% : 0.000198s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000117s : 837: predicate.merge_addn 3.12% : 0.000651s : 3380: predicate.micro_step_allgather_replace 3.13% : 0.000653s : 3380: predicate.mini_step_allgather_replace 0.78% : 0.000163s : 1199: predicate.minmaximum_grad 0.18% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 1.97% : 0.000412s : 1717: predicate.partial_defer_inline 1.09% : 0.000228s : 1541: predicate.partial_eliminate 0.77% : 0.000162s : 1198: predicate.print_const_string_wrapper 0.79% : 0.000165s : 824: predicate.reduce_all_const_elim 1.03% : 0.000216s : 1199: predicate.reduce_eliminate 0.14% : 0.000028s : 395: predicate.remove_not_recompute_node 1.93% : 0.000403s : 4829: predicate.replace_applicator 0.79% : 0.000165s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.81% : 0.000169s : 1199: predicate.reshape_eliminate 3.15% : 0.000658s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.30% : 0.000691s : 3484: predicate.same_eliminate 0.24% : 0.000050s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000061s : 395: predicate.shard_identity_eliminate 2.14% : 0.000448s : 2338: predicate.special_op_eliminate 0.63% : 0.000131s : 837: predicate.specialize_transform 3.50% : 0.000732s : 3380: predicate.split_environ_get_set_with_tuple_value 1.66% : 0.000346s : 2203: predicate.stack_unstack_eliminate 1.82% : 0.000380s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.24% : 0.000259s : 1717: predicate.switch_defer_inline 4.57% : 0.000955s : 5201: predicate.switch_layer_defer_inline 4.31% : 0.000901s : 5262: predicate.switch_simplify 0.78% : 0.000162s : 1199: predicate.tile_eliminate 0.76% : 0.000159s : 1199: predicate.transpose_eliminate 1.05% : 0.000220s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.06% : 0.000221s : 1469: predicate.tuple_list_get_item_const_eliminator 0.95% : 0.000198s : 1469: predicate.tuple_list_get_item_depend_reorder 1.93% : 0.000403s : 2495: predicate.tuple_list_get_item_eliminator 1.02% : 0.000214s : 1469: predicate.tuple_list_get_set_item_eliminator 1.72% : 0.000360s : 2304: predicate.tuple_list_set_item_eliminator 1.10% : 0.000231s : 1660: predicate.tuple_to_list_eliminator_ 1.85% : 0.000387s : 2874: predicate.updatestate_pure_node_eliminater 2.49% : 0.000520s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.28% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000057s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.058283 747 69.23% : 0.040351s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.46% : 0.001432s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.31% : 0.016500s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.250985 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041060s : 1: a1a2 0.00% : 0.000155s : 1: add_cache_embedding 0.00% : 0.000153s : 1: add_comm_op_reuse_tag 0.00% : 0.000697s : 1: add_recomputation 0.00% : 0.000403s : 1: assign_add_opt 0.01% : 0.002133s : 1: auto_monad 0.00% : 0.000368s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001374s : 1: bootstrap 0.00% : 0.000072s : 1: cconv 0.00% : 0.000151s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000116s : 1: convert_after_rewriter 0.00% : 0.000315s : 1: cse_after_recomputation 0.00% : 0.000125s : 1: dataset_repeat_opt 0.00% : 0.000401s : 1: distribtued_split 0.01% : 0.001430s : 1: eliminate_special_op_node 0.00% : 0.000101s : 1: environ_conv 0.00% : 0.000024s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000036s : 1: graph_reusing 0.00% : 0.000015s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000008s : 1: handle_group_info 0.28% : 0.042652s : 1: inline 0.01% : 0.001301s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000522s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000904s : 1: loop_unroll 0.00% : 0.000020s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.21% : 0.032219s : 61: opt.transform.a1a2 0.00% : 0.000173s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.090628s : 148: opt.transform.opt_a 0.01% : 0.000783s : 1: opt.transform.opt_after_cconv 0.02% : 0.003220s : 27: opt.transform.opt_b 0.24% : 0.036766s : 16: opt.transform.opt_resolve 0.01% : 0.000921s : 1: opt.transform.opt_trans_graph 0.01% : 0.000858s : 6: opt.transform.special_op_eliminate 0.00% : 0.000706s : 4: opt.transform.symbol_engine_opt 3.79% : 0.577820s : 1: opt_a 0.01% : 0.001641s : 1: opt_after_cconv 0.03% : 0.003983s : 1: opt_b 3.88% : 0.592312s : 1: optimize 0.00% : 0.000139s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000086s : 1: order_py_execute_after_rewriter 0.00% : 0.000124s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000164s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000036s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000100s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000198s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000106s : 1: pipeline_split 0.00% : 0.000113s : 1: pre_auto_parallel 0.00% : 0.000127s : 1: py_interpret_to_execute 0.00% : 0.000142s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000109s : 1: remove_cast_before_assign_add 0.00% : 0.000606s : 1: remove_dup_value 0.88% : 0.133503s : 3: renormalize.infer 0.36% : 0.054155s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001005s : 1: rewriter_after_opt_a 0.01% : 0.001814s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000158s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000133s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000809s : 1: symbol_engine_optimizer 84.57% : 12.897172s : 1: task_emit 0.01% : 0.000955s : 1: tuple_transform 4.73% : 0.721194s : 1: type_inference 0.01% : 0.001339s : 1: validate .. =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html================== 1 passed, 18 warnings in 93.18s (0:01:33) =================== ================== 1 passed, 18 warnings in 92.48s (0:01:32) =================== . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") ./home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 92.11s (0:01:32) ================================================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 92.69s (0:01:32) =================== . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") . /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 92.90s (0:01:32) =================== =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 93.38s (0:01:33) =================== .. =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") =============================== warnings summary ===============================/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") ================== 1 passed, 18 warnings in 92.38s (0:01:32) ===================/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 93.11s (0:01:33) =================== [WARNING] DEVICE(162741,ffffaec72c10,python3.7):2025-02-07-15:53:03.947.592 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x27fc68b0 is not exist. [WARNING] DEVICE(162724,ffff91263c10,python3.7):2025-02-07-15:53:04.046.391 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4fded060 is not exist. [WARNING] DEVICE(162678,ffffba1a1c10,python3.7):2025-02-07-15:53:04.054.718 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x33499f70 is not exist. [WARNING] DEVICE(162713,ffffb2ee9c10,python3.7):2025-02-07-15:53:04.060.890 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x47d1b770 is not exist. [WARNING] DEVICE(162691,ffff8c2f5c10,python3.7):2025-02-07-15:53:06.235.039 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x271ccd50 is not exist. [WARNING] DEVICE(162660,ffffa2124c10,python3.7):2025-02-07-15:53:06.242.046 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3b3d72d0 is not exist. [WARNING] DEVICE(162669,ffffaa555c10,python3.7):2025-02-07-15:53:06.313.497 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x251999b0 is not exist. [WARNING] DEVICE(162702,ffff9ecc0c10,python3.7):2025-02-07-15:53:06.343.790 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x1e6c4270 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 120.40s (0:02:00) ================== ff8c39f2e51611efac92c4447d93fe45/pass/test_deterministic_test_deterministic_allreduce.log0000644000175400017540000117310214751343157031027 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_deterministic.py ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 ============================= test session starts ============================== ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collecting ... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collecting ... rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collecting ... rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collecting ... collecting ... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collecting ... rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collecting ... rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/backend_ascend/debug plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collecting ...  collecting 1 item  collected 1 item  test_deterministic.py [WARNING] ME(59421:281473260715024,MainProcess):2025-02-07-15:55:20.246.368 [mindspore/context.py:1326] For 'context.set_context', the parameter 'deterministic' will be deprecated and removed in a future version. Please use the api mindspore.set_deterministic() instead.  collecting 1 item  collected 1 item  test_deterministic.py [WARNING] ME(59423:281473174141968,MainProcess):2025-02-07-15:55:20.328.295 [mindspore/context.py:1326] For 'context.set_context', the parameter 'deterministic' will be deprecated and removed in a future version. Please use the api mindspore.set_deterministic() instead.  collecting 1 item  collected 1 item  test_deterministic.py [WARNING] ME(59424:281473642343440,MainProcess):2025-02-07-15:55:20.365.608 [mindspore/context.py:1326] For 'context.set_context', the parameter 'deterministic' will be deprecated and removed in a future version. Please use the api mindspore.set_deterministic() instead.  collecting 1 item  collected 1 item  test_deterministic.py [WARNING] ME(59419:281473424288784,MainProcess):2025-02-07-15:55:20.435.294 [mindspore/context.py:1326] For 'context.set_context', the parameter 'deterministic' will be deprecated and removed in a future version. Please use the api mindspore.set_deterministic() instead.  collecting 1 item  collected 1 item  test_deterministic.py [WARNING] ME(59425:281473661516816,MainProcess):2025-02-07-15:55:20.448.624 [mindspore/context.py:1326] For 'context.set_context', the parameter 'deterministic' will be deprecated and removed in a future version. Please use the api mindspore.set_deterministic() instead.  collecting 1 item  collected 1 item  test_deterministic.py [WARNING] ME(59426:281473197317136,MainProcess):2025-02-07-15:55:20.457.053 [mindspore/context.py:1326] For 'context.set_context', the parameter 'deterministic' will be deprecated and removed in a future version. Please use the api mindspore.set_deterministic() instead.  collecting 1 item  collected 1 item  test_deterministic.py [WARNING] ME(59422:281473441430544,MainProcess):2025-02-07-15:55:20.471.504 [mindspore/context.py:1326] For 'context.set_context', the parameter 'deterministic' will be deprecated and removed in a future version. Please use the api mindspore.set_deterministic() instead.  collecting 1 item  collected 1 item  test_deterministic.py [WARNING] ME(59420:281473656433680,MainProcess):2025-02-07-15:55:20.480.247 [mindspore/context.py:1326] For 'context.set_context', the parameter 'deterministic' will be deprecated and removed in a future version. Please use the api mindspore.set_deterministic() instead. [WARNING] DISTRIBUTED(59425,ffffb19bbc10,python3.7):2025-02-07-15:55:24.962.256 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(59426,ffff95f09c10,python3.7):2025-02-07-15:55:24.995.835 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(59421,ffff99b7fc10,python3.7):2025-02-07-15:55:25.133.903 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(59423,ffff948efc10,python3.7):2025-02-07-15:55:25.189.929 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(59424,ffffb0772c10,python3.7):2025-02-07-15:55:25.209.682 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(59419,ffffa377ec10,python3.7):2025-02-07-15:55:25.264.550 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(59422,ffffa47d7c10,python3.7):2025-02-07-15:55:25.283.762 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(59420,ffffb14e2c10,python3.7):2025-02-07-15:55:25.312.097 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(59421,fffe96ffd0f0,python3.7):2025-02-07-15:55:25.316.356 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59422,fffeaa7fc0f0,python3.7):2025-02-07-15:55:25.316.362 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59420,fffeaf7fe0f0,python3.7):2025-02-07-15:55:25.316.354 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59424,fffeb67fc0f0,python3.7):2025-02-07-15:55:25.316.392 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59423,fffe9a7fc0f0,python3.7):2025-02-07-15:55:25.316.391 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59419,fffeb17fa0f0,python3.7):2025-02-07-15:55:25.316.381 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59425,fffeaf7fe0f0,python3.7):2025-02-07-15:55:25.316.401 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59426,fffe92ffd0f0,python3.7):2025-02-07-15:55:25.316.427 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(59420,fffeaeffd0f0,python3.7):2025-02-07-15:55:25.316.561 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 1 [WARNING] DEVICE(59422,fffea9ffb0f0,python3.7):2025-02-07-15:55:25.316.580 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 1 [WARNING] DEVICE(59421,fffe967fc0f0,python3.7):2025-02-07-15:55:25.316.580 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 1 [WARNING] DEVICE(59419,fffe8effd0f0,python3.7):2025-02-07-15:55:25.316.574 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 1 [WARNING] DEVICE(59424,fffeb5ffb0f0,python3.7):2025-02-07-15:55:25.316.590 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 1 [WARNING] DEVICE(59423,fffe99ffb0f0,python3.7):2025-02-07-15:55:25.316.594 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 1 [WARNING] DEVICE(59425,fffeaeffd0f0,python3.7):2025-02-07-15:55:25.316.617 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 1 [WARNING] DEVICE(59426,fffe927fc0f0,python3.7):2025-02-07-15:55:25.316.702 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 1 [WARNING] DEVICE(59419,fffe8effd0f0,python3.7):2025-02-07-15:55:25.619.455 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(59419,fffeb17fa0f0,python3.7):2025-02-07-15:55:25.619.653 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(59424,fffeb5ffb0f0,python3.7):2025-02-07-15:55:25.719.867 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(59421,fffe967fc0f0,python3.7):2025-02-07-15:55:25.720.049 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(59424,fffeb67fc0f0,python3.7):2025-02-07-15:55:25.721.969 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59421,fffe96ffd0f0,python3.7):2025-02-07-15:55:25.721.961 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(59422,fffea9ffb0f0,python3.7):2025-02-07-15:55:25.737.422 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(59422,fffeaa7fc0f0,python3.7):2025-02-07-15:55:25.738.109 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(59425,fffeaeffd0f0,python3.7):2025-02-07-15:55:25.763.115 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(59423,fffe99ffb0f0,python3.7):2025-02-07-15:55:25.763.096 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(59420,fffeaeffd0f0,python3.7):2025-02-07-15:55:25.763.101 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(59425,fffeaf7fe0f0,python3.7):2025-02-07-15:55:25.763.284 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59420,fffeaf7fe0f0,python3.7):2025-02-07-15:55:25.764.457 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(59423,fffe9a7fc0f0,python3.7):2025-02-07-15:55:25.764.453 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(59426,fffe927fc0f0,python3.7):2025-02-07-15:55:25.792.206 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(59426,fffe92ffd0f0,python3.7):2025-02-07-15:55:25.792.869 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group TotalTime = 6.13527, [21] [bootstrap]: 0.00120379 [type_inference]: 0.0143825 [auto_monad]: 9.84999e-05 [graph_reusing]: 2.21981e-06 [inline]: 1.97999e-06 [parallel-infer-symbol]: 2.39024e-06 [pre_auto_parallel]: 3.13404e-05 [insert-virtual-dataset]: 3.09013e-06 [parallel-infer-symbol-second]: 4.20026e-07 [dataset_repeat_opt]: 1.34017e-06 [pipeline_split]: 1.51014e-06 [optimize]: 0.0105787, [52] [py_interpret_to_execute]: 1.36998e-05 [rewriter_before_opt_a]: 2.96799e-05 [opt_a]: 0.00912604, [2] [Cycle 1]: 0.00119707, [43] [expand_dump_flag]: 3.95998e-06 [switch_simplify]: 2.46298e-05 [loop_unroll]: 1.01198e-05 [a_1]: 0.00025153 [recompute_prepare]: 5.93998e-06 [updatestate_depend_eliminate]: 5.83008e-06 [updatestate_assign_eliminate]: 3.70014e-06 [updatestate_loads_eliminate]: 4.40981e-06 [parameter_eliminate]: 4.01959e-06 [a_2]: 8.16402e-05 [accelerated_algorithm]: 5.63031e-06 [shard]: 2.65008e-06 [meta_shard_fg_expand]: 3.13018e-06 [shard_inline]: 5.53997e-06 [auto_parallel]: 1.02199e-05 [parallel]: 8.2301e-06 [flash_sp]: 1.792e-05 [merge_comm]: 5.81006e-06 [allreduce_fusion]: 3.57023e-06 [matmul_add_comm_reduction]: 8.31019e-06 [allreduce_slice_to_reducescatter]: 6.9011e-07 [virtual_shard_identity]: 6.37025e-06 [virtual_dataset]: 5.13019e-06 [get_grad_eliminate_]: 7.09994e-06 [virtual_output]: 5.20004e-06 [merge_forward]: 1.057e-05 [cell_reuse_recompute_pass]: 1.4198e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.15801e-05 [before_grad]: 8.67993e-06 [inplace_validation]: 3.65032e-06 [meta_fg_expand]: 3.64985e-06 [inplace_validation_after_expand]: 3.83006e-06 [flash_sp_send_recv_attached]: 3.74997e-06 [receive_attached]: 2.44007e-06 [after_resolve]: 8.38982e-06 [a_after_grad]: 8.1202e-06 [special_op_eliminate]: 5.14975e-06 [renormalize]: 0.00033609 [add_forward_monad_depend]: 3.41982e-06 [auto_monad_grad]: 1.83005e-06 [auto_monad_eliminator]: 1.13402e-05 [cse]: 2.51699e-05 [a_3]: 3.677e-05 [Cycle 2]: 0.00051413, [43] [expand_dump_flag]: 9.99775e-07 [switch_simplify]: 5.99027e-06 [loop_unroll]: 5.05988e-06 [a_1]: 0.00010213 [recompute_prepare]: 4.57e-06 [updatestate_depend_eliminate]: 3.45008e-06 [updatestate_assign_eliminate]: 2.40002e-06 [updatestate_loads_eliminate]: 2.16998e-06 [parameter_eliminate]: 1.01002e-06 [a_2]: 6.10701e-05 [accelerated_algorithm]: 5.27967e-06 [shard]: 1.07009e-06 [meta_shard_fg_expand]: 1.70013e-06 [shard_inline]: 5.09992e-06 [auto_parallel]: 8.29995e-06 [parallel]: 4.29014e-06 [flash_sp]: 5.47012e-06 [merge_comm]: 3.91994e-06 [allreduce_fusion]: 3.47011e-06 [matmul_add_comm_reduction]: 5.1898e-06 [allreduce_slice_to_reducescatter]: 2.89641e-07 [virtual_shard_identity]: 6.10994e-06 [virtual_dataset]: 5.1898e-06 [get_grad_eliminate_]: 4.94998e-06 [virtual_output]: 4.92018e-06 [merge_forward]: 3.08966e-06 [cell_reuse_recompute_pass]: 1.75973e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.04401e-05 [before_grad]: 7.89994e-06 [inplace_validation]: 2.26032e-06 [meta_fg_expand]: 2.92994e-06 [inplace_validation_after_expand]: 2.99979e-06 [flash_sp_send_recv_attached]: 9.4017e-07 [receive_attached]: 6.3004e-07 [after_resolve]: 7.09994e-06 [a_after_grad]: 7.51996e-06 [special_op_eliminate]: 4.76977e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.59959e-07 [auto_monad_grad]: 1.32993e-06 [auto_monad_eliminator]: 5.07012e-06 [cse]: 1.03703e-05 [a_3]: 2.89697e-05 [py_interpret_to_execute_after_opt_a]: 7.09016e-06 [slice_cell_reuse_recomputed_activation]: 2.29012e-06 [rewriter_after_opt_a]: 7.41999e-05 [convert_after_rewriter]: 5.70994e-06 [order_py_execute_after_rewriter]: 4.65009e-06 [opt_b]: 0.00014553, [1] [Cycle 1]: 0.00013992, [7] [b_1]: 9.08002e-05 [b_2]: 6.27991e-06 [updatestate_depend_eliminate]: 2.63005e-06 [updatestate_assign_eliminate]: 2.06009e-06 [updatestate_loads_eliminate]: 1.89012e-06 [renormalize]: 2.59839e-07 [cse]: 8.98028e-06 [optimize_parallel_all_gather_comm]: 5.56e-06 [overlap_param_gather]: 3.27965e-06 [cconv]: 2.26302e-05 [loop_unroll]: 0.00049942 [opt_after_cconv]: 8.485e-05, [1] [Cycle 1]: 7.91e-05, [7] [c_1]: 2.428e-05 [parameter_eliminate]: 2.57976e-06 [updatestate_depend_eliminate]: 5.53997e-06 [updatestate_assign_eliminate]: 2.2701e-06 [updatestate_loads_eliminate]: 2.00002e-06 [cse]: 1.35703e-05 [renormalize]: 4.50294e-07 [remove_dup_value]: 9.53022e-06 [tuple_transform]: 4.85699e-05, [1] [Cycle 1]: 4.43002e-05, [2] [d_1]: 3.538e-05 [renormalize]: 1.80211e-07 [partial_unused_args_eliminate]: 1.97999e-06 [add_cache_embedding]: 1.99201e-05 [add_recomputation]: 5.22598e-05 [cse_after_recomputation]: 1.85701e-05, [1] [Cycle 1]: 1.40504e-05, [1] [cse]: 9.22987e-06 [environ_conv]: 1.71796e-05 [swap_dp_allreduce_reducescatter]: 5.39003e-06 [bias_add_comm_swap]: 2.06009e-06 [label_micro_interleaved_index]: 2.38977e-06 [label_fine_grained_interleaved_index]: 1.83983e-06 [merge_cast_opt]: 1.24983e-06 [slice_recompute_activation]: 1.97999e-06 [micro_interleaved_order_control]: 1.8198e-06 [assign_add_opt]: 9.66992e-06 [ForceFp32Comm]: 1.32993e-06 [remove_cast_before_assign_add]: 9.49949e-07 [full_micro_interleaved_order_control]: 2.08989e-06 [reorder_send_recv_between_fp_bp]: 2.31992e-06 [comm_op_add_attrs]: 9.80217e-07 [add_comm_op_reuse_tag]: 1.07987e-06 [interleave_split_concat_branches]: 8.49832e-07 [interleave_parallel_branches]: 9.39704e-07 [overlap_opt_shard_in_pipeline]: 9.61032e-06 [overlap_opt_shard_grad_in_pipeline]: 2.69013e-06 [control_data_broadcast_order]: 1.07009e-06 [grouped_pairwise_exchange_alltoall]: 1.34995e-06 [offloading_packed_experts]: 1.63959e-06 [overlap_recompute_and_grad_model_parallel]: 1.90968e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89762e-07 [overlap_recompute_allgather_and_fa_grad]: 1.15996e-06 [overlap_grad_ring_attention]: 1.77976e-06 [overlap_grad_flash_sp]: 1.25398e-05 [begin_end_overlap_inline]: 7.79983e-07 [split_matmul_comm_elemetwise]: 1.81003e-06 [split_layernorm_comm]: 2.2999e-06 [handle_group_info]: 9.79751e-07 [symbol_engine_optimizer]: 6.476e-05, [1] [Cycle 1]: 6.042e-05, [6] [build]: 2.65008e-06 [elim_shapecalc]: 8.38004e-06 [elim_not_effective]: 1.074e-05 [opt_reshape]: 5.60982e-06 [fold_const_symbol]: 8.67015e-06 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.60979e-06 [auto_monad_reorder]: 2.13101e-05 [get_jit_bprop_graph]: 4.20026e-07 [rewriter_after_jit_bprop_graph]: 4.60073e-07 [eliminate_special_op_node]: 0.00047494 [distribtued_split]: 1.39e-06 [validate]: 4.05801e-05 [task_emit]: 6.10812 [execute]: 1.14799e-05 Sums bootstrap : 0.001204s : 0.02% type_inference : 0.014382s : 0.23% auto_monad : 0.000098s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000031s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.00% optimize.rewriter_before_opt_a : 0.000030s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000031s : 0.00% optimize.opt_a.loop_unroll : 0.000015s : 0.00% optimize.opt_a.a_1 : 0.000354s : 0.01% optimize.opt_a.recompute_prepare : 0.000011s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000009s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000007s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000143s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000011s : 0.00% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000011s : 0.00% optimize.opt_a.auto_parallel : 0.000019s : 0.00% optimize.opt_a.parallel : 0.000013s : 0.00% optimize.opt_a.flash_sp : 0.000023s : 0.00% optimize.opt_a.merge_comm : 0.000010s : 0.00% optimize.opt_a.allreduce_fusion : 0.000007s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000013s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000012s : 0.00% optimize.opt_a.virtual_dataset : 0.000010s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000012s : 0.00% optimize.opt_a.virtual_output : 0.000010s : 0.00% optimize.opt_a.merge_forward : 0.000014s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000022s : 0.00% optimize.opt_a.before_grad : 0.000017s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000007s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000015s : 0.00% optimize.opt_a.a_after_grad : 0.000016s : 0.00% optimize.opt_a.special_op_eliminate : 0.000010s : 0.00% optimize.opt_a.renormalize : 0.000336s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000016s : 0.00% optimize.opt_a.cse : 0.000036s : 0.00% optimize.opt_a.a_3 : 0.000066s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000007s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000074s : 0.00% optimize.convert_after_rewriter : 0.000006s : 0.00% optimize.order_py_execute_after_rewriter : 0.000005s : 0.00% optimize.opt_b.b_1 : 0.000091s : 0.00% optimize.opt_b.b_2 : 0.000006s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000009s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000006s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000023s : 0.00% optimize.loop_unroll : 0.000499s : 0.01% optimize.opt_after_cconv.c_1 : 0.000024s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000014s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.00% optimize.tuple_transform.d_1 : 0.000035s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000020s : 0.00% optimize.add_recomputation : 0.000052s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000017s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000005s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000010s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000010s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000008s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000011s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000006s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000009s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000021s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000475s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000041s : 0.00% task_emit : 6.108121s : 99.69% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.000089 20 2.03% : 0.000002s : 2: substitution.elim_not_effective 1.82% : 0.000002s : 2: substitution.fold_const_symbol 6.20% : 0.000006s : 3: substitution.graph_param_transform 63.59% : 0.000056s : 1: substitution.inline 3.86% : 0.000003s : 4: substitution.j_node_and_user_rematch 14.15% : 0.000013s : 2: substitution.reduce_all_const_elim 5.23% : 0.000005s : 4: substitution.remove_not_recompute_node 3.11% : 0.000003s : 2: substitution.replace_old_param ------[type_inference.] 0.014350 2 97.50% : 0.013991s : 1: type_inference.infer 2.50% : 0.000359s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000056 1 100.00% : 0.000056s : 1: match.inline ------[predicate.] 0.000140 740 0.89% : 0.000001s : 7: predicate.accumulaten_eliminater 1.00% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.69% : 0.000001s : 6: predicate.addn_check_dump 0.87% : 0.000001s : 7: predicate.addn_zero_filter 0.71% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.24% : 0.000003s : 13: predicate.arithmetic_simplify 0.75% : 0.000001s : 7: predicate.cast_eliminate 0.82% : 0.000001s : 6: predicate.check_bprop_eliminate 0.71% : 0.000001s : 6: predicate.compare_switch_simplify 0.22% : 0.000000s : 3: predicate.const_output_eliminate 0.42% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.67% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.66% : 0.000001s : 6: predicate.depend_value_elim 0.80% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.92% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.91% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.40% : 0.000001s : 3: predicate.elim_not_effective 0.64% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.08% : 0.000002s : 10: predicate.environ_add_const_eliminate 1.02% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.02% : 0.000001s : 10: predicate.environ_get_depend_swap 1.85% : 0.000003s : 16: predicate.environ_get_eliminate 1.11% : 0.000002s : 10: predicate.environ_get_set_eliminate 0.87% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.67% : 0.000002s : 8: predicate.float_depend_g_call 0.67% : 0.000001s : 6: predicate.float_environ_get_switch 1.04% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.27% : 0.000000s : 3: predicate.fold_const_symbol 0.82% : 0.000001s : 6: predicate.get_grad_eliminate 0.31% : 0.000000s : 3: predicate.graph_param_transform 0.72% : 0.000001s : 6: predicate.incorporate_call 0.66% : 0.000001s : 6: predicate.incorporate_call_switch 6.15% : 0.000009s : 33: predicate.inline 1.06% : 0.000001s : 6: predicate.inline_without_move 0.46% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.04% : 0.000001s : 6: predicate.less_batch_normalization 1.56% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.15% : 0.000003s : 20: predicate.load_eliminater 1.31% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.48% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.77% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.70% : 0.000001s : 6: predicate.merge_addn 0.74% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.97% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.71% : 0.000001s : 7: predicate.minmaximum_grad 0.95% : 0.000001s : 3: predicate.mutable_eliminate 0.47% : 0.000001s : 3: predicate.opt_reshape 0.51% : 0.000001s : 3: predicate.parallel_virtual_node 1.42% : 0.000002s : 8: predicate.partial_defer_inline 1.39% : 0.000002s : 10: predicate.partial_eliminate 0.82% : 0.000001s : 7: predicate.print_const_string_wrapper 0.87% : 0.000001s : 6: predicate.reduce_all_const_elim 1.02% : 0.000001s : 7: predicate.reduce_eliminate 0.76% : 0.000001s : 6: predicate.remove_not_recompute_node 1.37% : 0.000002s : 13: predicate.replace_applicator 0.56% : 0.000001s : 6: predicate.replace_old_param 0.25% : 0.000000s : 3: predicate.reset_defer_inline 0.76% : 0.000001s : 7: predicate.reshape_eliminate 0.92% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 3: predicate.row_tensor_eliminate 0.97% : 0.000001s : 6: predicate.same_eliminate 0.56% : 0.000001s : 6: predicate.set_cell_output_no_recompute 1.02% : 0.000001s : 6: predicate.shard_identity_eliminate 1.32% : 0.000002s : 9: predicate.special_op_eliminate 0.94% : 0.000001s : 6: predicate.specialize_transform 0.97% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 1.04% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.43% : 0.000003s : 20: predicate.stopgrad_eliminater 0.39% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.99% : 0.000001s : 8: predicate.switch_defer_inline 1.68% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.71% : 0.000007s : 24: predicate.switch_simplify 0.76% : 0.000001s : 7: predicate.tile_eliminate 0.77% : 0.000001s : 7: predicate.transpose_eliminate 1.59% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.31% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.65% : 0.000004s : 19: predicate.tuple_list_get_item_eliminator 1.42% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.32% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.57% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.11% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 3.10% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.44% : 0.000001s : 3: predicate.value_based_eliminate 0.87% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.83% : 0.000001s : 6: predicate.virtual_output_eliminate 0.49% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000200 4 7.59% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.41% : 0.000185s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.146973 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000024s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000057s : 1: add_recomputation 0.00% : 0.000013s : 1: assign_add_opt 0.00% : 0.000110s : 1: auto_monad 0.00% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001256s : 1: bootstrap 0.00% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000009s : 1: convert_after_rewriter 0.00% : 0.000021s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.000488s : 1: eliminate_special_op_node 0.00% : 0.000021s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000508s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000011s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000695s : 80: opt.transform.opt_a 0.00% : 0.000023s : 1: opt.transform.opt_after_cconv 0.00% : 0.000079s : 27: opt.transform.opt_b 0.00% : 0.000034s : 1: opt.transform.opt_trans_graph 0.00% : 0.000021s : 3: opt.transform.special_op_eliminate 0.00% : 0.000030s : 4: opt.transform.symbol_engine_opt 0.15% : 0.009130s : 1: opt_a 0.00% : 0.000089s : 1: opt_after_cconv 0.00% : 0.000149s : 1: opt_b 0.17% : 0.010586s : 1: optimize 0.00% : 0.000009s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000013s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000038s : 1: pre_auto_parallel 0.00% : 0.000020s : 1: py_interpret_to_execute 0.00% : 0.000011s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000013s : 1: remove_dup_value 0.00% : 0.000187s : 1: renormalize.infer 0.00% : 0.000143s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000080s : 1: rewriter_after_opt_a 0.00% : 0.000034s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000008s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000067s : 1: symbol_engine_optimizer 99.37% : 6.108157s : 1: task_emit 0.00% : 0.000052s : 1: tuple_transform 0.23% : 0.014405s : 1: type_inference 0.00% : 0.000083s : 1: validate TotalTime = 6.17516, [21] [bootstrap]: 0.00126764 [type_inference]: 0.0141103 [auto_monad]: 9.28203e-05 [graph_reusing]: 2.71015e-06 [inline]: 2.04006e-06 [parallel-infer-symbol]: 2.52994e-06 [pre_auto_parallel]: 4.94099e-05 [insert-virtual-dataset]: 3.11993e-06 [parallel-infer-symbol-second]: 3.90224e-07 [dataset_repeat_opt]: 1.68988e-06 [pipeline_split]: 1.50967e-06 [optimize]: 0.0103751, [52] [py_interpret_to_execute]: 1.28099e-05 [rewriter_before_opt_a]: 3.67598e-05 [opt_a]: 0.00897217, [2] [Cycle 1]: 0.00116816, [43] [expand_dump_flag]: 3.6601e-06 [switch_simplify]: 2.47997e-05 [loop_unroll]: 9.30997e-06 [a_1]: 0.00022744 [recompute_prepare]: 5.60004e-06 [updatestate_depend_eliminate]: 6.09038e-06 [updatestate_assign_eliminate]: 3.43984e-06 [updatestate_loads_eliminate]: 2.82982e-06 [parameter_eliminate]: 4.00003e-06 [a_2]: 7.356e-05 [accelerated_algorithm]: 5.01005e-06 [shard]: 2.12016e-06 [meta_shard_fg_expand]: 2.9197e-06 [shard_inline]: 4.90993e-06 [auto_parallel]: 9.09995e-06 [parallel]: 1.49803e-05 [flash_sp]: 1.072e-05 [merge_comm]: 1.19903e-05 [allreduce_fusion]: 3.41004e-06 [matmul_add_comm_reduction]: 8.34977e-06 [allreduce_slice_to_reducescatter]: 5.69969e-07 [virtual_shard_identity]: 1.508e-05 [virtual_dataset]: 4.81028e-06 [get_grad_eliminate_]: 4.31994e-06 [virtual_output]: 4.46988e-06 [merge_forward]: 4.13973e-06 [cell_reuse_recompute_pass]: 1.57999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.67401e-05 [before_grad]: 8.38982e-06 [inplace_validation]: 3.64007e-06 [meta_fg_expand]: 3.16976e-06 [inplace_validation_after_expand]: 4.26965e-06 [flash_sp_send_recv_attached]: 3.70992e-06 [receive_attached]: 8.38004e-06 [after_resolve]: 7.96979e-06 [a_after_grad]: 7.35e-06 [special_op_eliminate]: 6.59982e-06 [renormalize]: 0.00033224 [add_forward_monad_depend]: 3.72017e-06 [auto_monad_grad]: 2.2999e-06 [auto_monad_eliminator]: 1.15e-05 [cse]: 2.643e-05 [a_3]: 3.28603e-05 [Cycle 2]: 0.00050279, [43] [expand_dump_flag]: 9.80217e-07 [switch_simplify]: 5.34998e-06 [loop_unroll]: 4.35999e-06 [a_1]: 9.043e-05 [recompute_prepare]: 4.01028e-06 [updatestate_depend_eliminate]: 3.0403e-06 [updatestate_assign_eliminate]: 2.52994e-06 [updatestate_loads_eliminate]: 2.14996e-06 [parameter_eliminate]: 1.08965e-06 [a_2]: 5.368e-05 [accelerated_algorithm]: 4.47966e-06 [shard]: 9.99775e-07 [meta_shard_fg_expand]: 1.53016e-06 [shard_inline]: 4.25009e-06 [auto_parallel]: 7.50041e-06 [parallel]: 3.64985e-06 [flash_sp]: 5.70994e-06 [merge_comm]: 3.33972e-06 [allreduce_fusion]: 2.77022e-06 [matmul_add_comm_reduction]: 4.63007e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 5.07012e-06 [virtual_dataset]: 4.27989e-06 [get_grad_eliminate_]: 4.12995e-06 [virtual_output]: 3.85009e-06 [merge_forward]: 2.2701e-06 [cell_reuse_recompute_pass]: 1.58977e-06 [cell_reuse_handle_not_recompute_node_pass]: 3.74899e-05 [before_grad]: 7.49016e-06 [inplace_validation]: 2.52994e-06 [meta_fg_expand]: 2.52016e-06 [inplace_validation_after_expand]: 2.80002e-06 [flash_sp_send_recv_attached]: 1.09989e-06 [receive_attached]: 6.60308e-07 [after_resolve]: 6.51972e-06 [a_after_grad]: 6.23008e-06 [special_op_eliminate]: 4.17e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.79983e-07 [auto_monad_grad]: 1.05007e-06 [auto_monad_eliminator]: 4.82006e-06 [cse]: 1.14799e-05 [a_3]: 2.50498e-05 [py_interpret_to_execute_after_opt_a]: 6.54021e-06 [slice_cell_reuse_recomputed_activation]: 2.61003e-06 [rewriter_after_opt_a]: 7.02399e-05 [convert_after_rewriter]: 5.92973e-06 [order_py_execute_after_rewriter]: 3.66988e-06 [opt_b]: 0.00013399, [1] [Cycle 1]: 0.00012814, [7] [b_1]: 7.90497e-05 [b_2]: 5.43008e-06 [updatestate_depend_eliminate]: 2.4098e-06 [updatestate_assign_eliminate]: 2.23005e-06 [updatestate_loads_eliminate]: 2.14018e-06 [renormalize]: 2.10013e-07 [cse]: 9.42033e-06 [optimize_parallel_all_gather_comm]: 5.37001e-06 [overlap_param_gather]: 2.48989e-06 [cconv]: 2.10302e-05 [loop_unroll]: 0.00049047 [opt_after_cconv]: 8.16602e-05, [1] [Cycle 1]: 7.60397e-05, [7] [c_1]: 2.19699e-05 [parameter_eliminate]: 2.02004e-06 [updatestate_depend_eliminate]: 5.30994e-06 [updatestate_assign_eliminate]: 2.33995e-06 [updatestate_loads_eliminate]: 1.97021e-06 [cse]: 1.335e-05 [renormalize]: 3.39933e-07 [remove_dup_value]: 1.074e-05 [tuple_transform]: 4.39803e-05, [1] [Cycle 1]: 3.98699e-05, [2] [d_1]: 3.17898e-05 [renormalize]: 1.39698e-07 [partial_unused_args_eliminate]: 2.1602e-06 [add_cache_embedding]: 1.00504e-05 [add_recomputation]: 4.71799e-05 [cse_after_recomputation]: 1.811e-05, [1] [Cycle 1]: 1.36802e-05, [1] [cse]: 8.91974e-06 [environ_conv]: 1.53598e-05 [swap_dp_allreduce_reducescatter]: 5.24009e-06 [bias_add_comm_swap]: 2.2701e-06 [label_micro_interleaved_index]: 2.23983e-06 [label_fine_grained_interleaved_index]: 2.14018e-06 [merge_cast_opt]: 1.24006e-06 [slice_recompute_activation]: 2.16998e-06 [micro_interleaved_order_control]: 1.83983e-06 [assign_add_opt]: 9.43989e-06 [ForceFp32Comm]: 1.46963e-06 [remove_cast_before_assign_add]: 1.09011e-06 [full_micro_interleaved_order_control]: 1.95997e-06 [reorder_send_recv_between_fp_bp]: 2.37999e-06 [comm_op_add_attrs]: 1.06031e-06 [add_comm_op_reuse_tag]: 1.07987e-06 [interleave_split_concat_branches]: 8.60076e-07 [interleave_parallel_branches]: 8.90344e-07 [overlap_opt_shard_in_pipeline]: 7.18003e-06 [overlap_opt_shard_grad_in_pipeline]: 2.29012e-06 [control_data_broadcast_order]: 1.22003e-06 [grouped_pairwise_exchange_alltoall]: 1.32015e-06 [offloading_packed_experts]: 1.2801e-06 [overlap_recompute_and_grad_model_parallel]: 2.07964e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89762e-07 [overlap_recompute_allgather_and_fa_grad]: 1.15996e-06 [overlap_grad_ring_attention]: 1.7304e-06 [overlap_grad_flash_sp]: 2.03098e-05 [begin_end_overlap_inline]: 8.49832e-07 [split_matmul_comm_elemetwise]: 2.08011e-06 [split_layernorm_comm]: 1.97021e-06 [handle_group_info]: 1.38022e-06 [symbol_engine_optimizer]: 6.28997e-05, [1] [Cycle 1]: 5.86202e-05, [6] [build]: 2.2701e-06 [elim_shapecalc]: 7.15954e-06 [elim_not_effective]: 1.02101e-05 [opt_reshape]: 5.01005e-06 [fold_const_symbol]: 1.013e-05 [renormalize]: 2.59839e-07 [pipeline_parallel_scheduler]: 1.5297e-06 [auto_monad_reorder]: 2.21399e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00046349 [distribtued_split]: 1.55997e-06 [validate]: 3.57297e-05 [task_emit]: 6.14842 [execute]: 1.18003e-05 Sums bootstrap : 0.001268s : 0.02% type_inference : 0.014110s : 0.23% auto_monad : 0.000093s : 0.00% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000049s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.00% optimize.rewriter_before_opt_a : 0.000037s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000030s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000318s : 0.01% optimize.opt_a.recompute_prepare : 0.000010s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000009s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000127s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000009s : 0.00% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000004s : 0.00% optimize.opt_a.shard_inline : 0.000009s : 0.00% optimize.opt_a.auto_parallel : 0.000017s : 0.00% optimize.opt_a.parallel : 0.000019s : 0.00% optimize.opt_a.flash_sp : 0.000016s : 0.00% optimize.opt_a.merge_comm : 0.000015s : 0.00% optimize.opt_a.allreduce_fusion : 0.000006s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000013s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.00% optimize.opt_a.virtual_dataset : 0.000009s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000008s : 0.00% optimize.opt_a.virtual_output : 0.000008s : 0.00% optimize.opt_a.merge_forward : 0.000006s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000054s : 0.00% optimize.opt_a.before_grad : 0.000016s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000009s : 0.00% optimize.opt_a.after_resolve : 0.000014s : 0.00% optimize.opt_a.a_after_grad : 0.000014s : 0.00% optimize.opt_a.special_op_eliminate : 0.000011s : 0.00% optimize.opt_a.renormalize : 0.000332s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000016s : 0.00% optimize.opt_a.cse : 0.000038s : 0.00% optimize.opt_a.a_3 : 0.000058s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000007s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000070s : 0.00% optimize.convert_after_rewriter : 0.000006s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000079s : 0.00% optimize.opt_b.b_2 : 0.000005s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000009s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000005s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000021s : 0.00% optimize.loop_unroll : 0.000490s : 0.01% optimize.opt_after_cconv.c_1 : 0.000022s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000013s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.00% optimize.tuple_transform.d_1 : 0.000032s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000010s : 0.00% optimize.add_recomputation : 0.000047s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000015s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000005s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000009s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000007s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000020s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000002s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000007s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000010s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000005s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000010s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000022s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000463s : 0.01% distribtued_split : 0.000002s : 0.00% validate : 0.000036s : 0.00% task_emit : 6.148425s : 99.70% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.000124 20 1.32% : 0.000002s : 2: substitution.elim_not_effective 2.90% : 0.000004s : 2: substitution.fold_const_symbol 4.36% : 0.000005s : 3: substitution.graph_param_transform 45.07% : 0.000056s : 1: substitution.inline 2.85% : 0.000004s : 4: substitution.j_node_and_user_rematch 9.93% : 0.000012s : 2: substitution.reduce_all_const_elim 31.04% : 0.000038s : 4: substitution.remove_not_recompute_node 2.53% : 0.000003s : 2: substitution.replace_old_param ------[type_inference.] 0.014077 2 97.88% : 0.013779s : 1: type_inference.infer 2.12% : 0.000298s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000055 1 100.00% : 0.000055s : 1: match.inline ------[predicate.] 0.000119 740 0.80% : 0.000001s : 7: predicate.accumulaten_eliminater 0.99% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.66% : 0.000001s : 6: predicate.addn_check_dump 0.75% : 0.000001s : 7: predicate.addn_zero_filter 0.72% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.05% : 0.000002s : 13: predicate.arithmetic_simplify 0.83% : 0.000001s : 7: predicate.cast_eliminate 0.81% : 0.000001s : 6: predicate.check_bprop_eliminate 0.70% : 0.000001s : 6: predicate.compare_switch_simplify 0.22% : 0.000000s : 3: predicate.const_output_eliminate 0.46% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.87% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.72% : 0.000001s : 6: predicate.depend_value_elim 0.77% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.88% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.86% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.36% : 0.000000s : 3: predicate.elim_not_effective 0.58% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.04% : 0.000001s : 10: predicate.environ_add_const_eliminate 1.04% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.01% : 0.000001s : 10: predicate.environ_get_depend_swap 1.76% : 0.000002s : 16: predicate.environ_get_eliminate 1.01% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.88% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.57% : 0.000002s : 8: predicate.float_depend_g_call 0.65% : 0.000001s : 6: predicate.float_environ_get_switch 1.07% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.29% : 0.000000s : 3: predicate.fold_const_symbol 0.80% : 0.000001s : 6: predicate.get_grad_eliminate 0.47% : 0.000001s : 3: predicate.graph_param_transform 0.72% : 0.000001s : 6: predicate.incorporate_call 0.64% : 0.000001s : 6: predicate.incorporate_call_switch 6.06% : 0.000007s : 33: predicate.inline 0.99% : 0.000001s : 6: predicate.inline_without_move 0.50% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.05% : 0.000001s : 6: predicate.less_batch_normalization 1.68% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.25% : 0.000003s : 20: predicate.load_eliminater 1.36% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.57% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.89% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.72% : 0.000001s : 6: predicate.merge_addn 0.72% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.68% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.70% : 0.000001s : 7: predicate.minmaximum_grad 1.12% : 0.000001s : 3: predicate.mutable_eliminate 0.45% : 0.000001s : 3: predicate.opt_reshape 0.55% : 0.000001s : 3: predicate.parallel_virtual_node 1.34% : 0.000002s : 8: predicate.partial_defer_inline 1.31% : 0.000002s : 10: predicate.partial_eliminate 0.99% : 0.000001s : 7: predicate.print_const_string_wrapper 1.00% : 0.000001s : 6: predicate.reduce_all_const_elim 1.02% : 0.000001s : 7: predicate.reduce_eliminate 0.97% : 0.000001s : 6: predicate.remove_not_recompute_node 1.37% : 0.000002s : 13: predicate.replace_applicator 0.58% : 0.000001s : 6: predicate.replace_old_param 0.27% : 0.000000s : 3: predicate.reset_defer_inline 0.80% : 0.000001s : 7: predicate.reshape_eliminate 0.70% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 3: predicate.row_tensor_eliminate 1.10% : 0.000001s : 6: predicate.same_eliminate 0.55% : 0.000001s : 6: predicate.set_cell_output_no_recompute 1.09% : 0.000001s : 6: predicate.shard_identity_eliminate 1.43% : 0.000002s : 9: predicate.special_op_eliminate 0.89% : 0.000001s : 6: predicate.specialize_transform 1.15% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.38% : 0.000003s : 20: predicate.stopgrad_eliminater 0.40% : 0.000000s : 3: predicate.switch_call_monad_eliminater 1.04% : 0.000001s : 8: predicate.switch_defer_inline 1.68% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.80% : 0.000006s : 24: predicate.switch_simplify 0.73% : 0.000001s : 7: predicate.tile_eliminate 0.74% : 0.000001s : 7: predicate.transpose_eliminate 1.57% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.47% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.36% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.70% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.35% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.27% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.54% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.16% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 3.06% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.45% : 0.000001s : 3: predicate.value_based_eliminate 0.83% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.76% : 0.000001s : 6: predicate.virtual_output_eliminate 0.48% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000179 4 8.23% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.77% : 0.000164s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.186608 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000057s : 1: add_recomputation 0.00% : 0.000013s : 1: assign_add_opt 0.00% : 0.000102s : 1: auto_monad 0.00% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001323s : 1: bootstrap 0.00% : 0.000025s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000009s : 1: convert_after_rewriter 0.00% : 0.000021s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.000476s : 1: eliminate_special_op_node 0.00% : 0.000019s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000499s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000010s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000663s : 80: opt.transform.opt_a 0.00% : 0.000021s : 1: opt.transform.opt_after_cconv 0.00% : 0.000069s : 27: opt.transform.opt_b 0.00% : 0.000031s : 1: opt.transform.opt_trans_graph 0.00% : 0.000020s : 3: opt.transform.special_op_eliminate 0.00% : 0.000029s : 4: opt.transform.symbol_engine_opt 0.15% : 0.008976s : 1: opt_a 0.00% : 0.000085s : 1: opt_after_cconv 0.00% : 0.000137s : 1: opt_b 0.17% : 0.010383s : 1: optimize 0.00% : 0.000008s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000007s : 1: order_py_execute_after_rewriter 0.00% : 0.000024s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000010s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000006s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000056s : 1: pre_auto_parallel 0.00% : 0.000017s : 1: py_interpret_to_execute 0.00% : 0.000010s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000014s : 1: remove_dup_value 0.00% : 0.000162s : 1: renormalize.infer 0.00% : 0.000165s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000075s : 1: rewriter_after_opt_a 0.00% : 0.000041s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000008s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000066s : 1: symbol_engine_optimizer 99.38% : 6.148462s : 1: task_emit 0.00% : 0.000047s : 1: tuple_transform 0.23% : 0.014130s : 1: type_inference 0.00% : 0.000077s : 1: validate TotalTime = 6.17321, [21] [bootstrap]: 0.00119667 [type_inference]: 0.0139639 [auto_monad]: 9.597e-05 [graph_reusing]: 2.37999e-06 [inline]: 1.61026e-06 [parallel-infer-symbol]: 2.57976e-06 [pre_auto_parallel]: 3.06801e-05 [insert-virtual-dataset]: 3.19025e-06 [parallel-infer-symbol-second]: 4.29805e-07 [dataset_repeat_opt]: 1.55997e-06 [pipeline_split]: 1.60001e-06 [optimize]: 0.0104131, [52] [py_interpret_to_execute]: 1.24103e-05 [rewriter_before_opt_a]: 2.89697e-05 [opt_a]: 0.00897762, [2] [Cycle 1]: 0.00111187, [43] [expand_dump_flag]: 3.91016e-06 [switch_simplify]: 2.367e-05 [loop_unroll]: 9.18983e-06 [a_1]: 0.00023434 [recompute_prepare]: 5.42961e-06 [updatestate_depend_eliminate]: 5.69038e-06 [updatestate_assign_eliminate]: 3.11993e-06 [updatestate_loads_eliminate]: 2.81027e-06 [parameter_eliminate]: 3.6899e-06 [a_2]: 7.63303e-05 [accelerated_algorithm]: 4.94998e-06 [shard]: 2.46987e-06 [meta_shard_fg_expand]: 3.32994e-06 [shard_inline]: 5.20982e-06 [auto_parallel]: 9.96003e-06 [parallel]: 8.29995e-06 [flash_sp]: 1.78902e-05 [merge_comm]: 6.13974e-06 [allreduce_fusion]: 3.6899e-06 [matmul_add_comm_reduction]: 9.26014e-06 [allreduce_slice_to_reducescatter]: 5.20144e-07 [virtual_shard_identity]: 5.74021e-06 [virtual_dataset]: 5.11017e-06 [get_grad_eliminate_]: 4.40003e-06 [virtual_output]: 4.36977e-06 [merge_forward]: 4.04008e-06 [cell_reuse_recompute_pass]: 1.47987e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.03e-05 [before_grad]: 8.19983e-06 [inplace_validation]: 3.26987e-06 [meta_fg_expand]: 3.01003e-06 [inplace_validation_after_expand]: 4.23985e-06 [flash_sp_send_recv_attached]: 3.76999e-06 [receive_attached]: 8.94023e-06 [after_resolve]: 1.00797e-05 [a_after_grad]: 7.12043e-06 [special_op_eliminate]: 4.42006e-06 [renormalize]: 0.00029065 [add_forward_monad_depend]: 3.37977e-06 [auto_monad_grad]: 1.91992e-06 [auto_monad_eliminator]: 1.21198e-05 [cse]: 2.37497e-05 [a_3]: 3.31001e-05 [Cycle 2]: 0.00047708, [43] [expand_dump_flag]: 9.4017e-07 [switch_simplify]: 5.32018e-06 [loop_unroll]: 4.53973e-06 [a_1]: 9.29199e-05 [recompute_prepare]: 3.87011e-06 [updatestate_depend_eliminate]: 3.3197e-06 [updatestate_assign_eliminate]: 2.23005e-06 [updatestate_loads_eliminate]: 2.19001e-06 [parameter_eliminate]: 1.32015e-06 [a_2]: 5.43799e-05 [accelerated_algorithm]: 5.45988e-06 [shard]: 1.03982e-06 [meta_shard_fg_expand]: 1.48965e-06 [shard_inline]: 4.84008e-06 [auto_parallel]: 7.90041e-06 [parallel]: 3.79002e-06 [flash_sp]: 5.26011e-06 [merge_comm]: 3.59025e-06 [allreduce_fusion]: 2.84985e-06 [matmul_add_comm_reduction]: 5.08036e-06 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 5.29038e-06 [virtual_dataset]: 4.63007e-06 [get_grad_eliminate_]: 4.38001e-06 [virtual_output]: 4.0899e-06 [merge_forward]: 2.47033e-06 [cell_reuse_recompute_pass]: 1.64984e-06 [cell_reuse_handle_not_recompute_node_pass]: 9.09017e-06 [before_grad]: 6.9798e-06 [inplace_validation]: 2.4098e-06 [meta_fg_expand]: 2.46009e-06 [inplace_validation_after_expand]: 2.75997e-06 [flash_sp_send_recv_attached]: 1.01002e-06 [receive_attached]: 6.50063e-07 [after_resolve]: 6.31995e-06 [a_after_grad]: 6.4401e-06 [special_op_eliminate]: 3.94974e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.69738e-07 [auto_monad_grad]: 9.29926e-07 [auto_monad_eliminator]: 4.85033e-06 [cse]: 1.013e-05 [a_3]: 2.52402e-05 [py_interpret_to_execute_after_opt_a]: 6.65989e-06 [slice_cell_reuse_recomputed_activation]: 2.19001e-06 [rewriter_after_opt_a]: 6.77002e-05 [convert_after_rewriter]: 1.50399e-05 [order_py_execute_after_rewriter]: 3.95998e-06 [opt_b]: 0.00018486, [1] [Cycle 1]: 0.00017905, [7] [b_1]: 8.05198e-05 [b_2]: 5.24009e-06 [updatestate_depend_eliminate]: 2.38977e-06 [updatestate_assign_eliminate]: 2.37999e-06 [updatestate_loads_eliminate]: 2.11038e-06 [renormalize]: 1.8999e-07 [cse]: 9.60007e-06 [optimize_parallel_all_gather_comm]: 5.72996e-06 [overlap_param_gather]: 2.65008e-06 [cconv]: 2.169e-05 [loop_unroll]: 0.0004776 [opt_after_cconv]: 8.19601e-05, [1] [Cycle 1]: 7.60402e-05, [7] [c_1]: 2.27899e-05 [parameter_eliminate]: 2.31992e-06 [updatestate_depend_eliminate]: 5.15999e-06 [updatestate_assign_eliminate]: 2.35997e-06 [updatestate_loads_eliminate]: 2.08011e-06 [cse]: 1.30199e-05 [renormalize]: 3.1013e-07 [remove_dup_value]: 9.87016e-06 [tuple_transform]: 4.55803e-05, [1] [Cycle 1]: 4.10201e-05, [2] [d_1]: 3.275e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 2.12993e-06 [add_cache_embedding]: 9.85991e-06 [add_recomputation]: 5.189e-05 [cse_after_recomputation]: 1.81701e-05, [1] [Cycle 1]: 1.35601e-05, [1] [cse]: 8.90996e-06 [environ_conv]: 1.394e-05 [swap_dp_allreduce_reducescatter]: 5.58002e-06 [bias_add_comm_swap]: 2.31015e-06 [label_micro_interleaved_index]: 2.21981e-06 [label_fine_grained_interleaved_index]: 2.02982e-06 [merge_cast_opt]: 1.22003e-06 [slice_recompute_activation]: 1.84029e-06 [micro_interleaved_order_control]: 1.72993e-06 [assign_add_opt]: 8.46991e-06 [ForceFp32Comm]: 7.79983e-07 [remove_cast_before_assign_add]: 9.99775e-07 [full_micro_interleaved_order_control]: 2.08011e-06 [reorder_send_recv_between_fp_bp]: 2.34973e-06 [comm_op_add_attrs]: 1.02026e-06 [add_comm_op_reuse_tag]: 1.07009e-06 [interleave_split_concat_branches]: 7.89762e-07 [interleave_parallel_branches]: 9.20147e-07 [overlap_opt_shard_in_pipeline]: 6.61984e-06 [overlap_opt_shard_grad_in_pipeline]: 2.18023e-06 [control_data_broadcast_order]: 1.09989e-06 [grouped_pairwise_exchange_alltoall]: 1.53016e-06 [offloading_packed_experts]: 1.15018e-06 [overlap_recompute_and_grad_model_parallel]: 1.8701e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.00006e-07 [overlap_recompute_allgather_and_fa_grad]: 1.13016e-06 [overlap_grad_ring_attention]: 1.57999e-06 [overlap_grad_flash_sp]: 1.20001e-05 [begin_end_overlap_inline]: 7.59959e-07 [split_matmul_comm_elemetwise]: 2.02004e-06 [split_layernorm_comm]: 1.55019e-06 [handle_group_info]: 9.39704e-07 [symbol_engine_optimizer]: 6.26203e-05, [1] [Cycle 1]: 5.84899e-05, [6] [build]: 2.50014e-06 [elim_shapecalc]: 7.40029e-06 [elim_not_effective]: 1.16299e-05 [opt_reshape]: 5.29038e-06 [fold_const_symbol]: 8.30973e-06 [renormalize]: 2.59839e-07 [pipeline_parallel_scheduler]: 1.54972e-06 [auto_monad_reorder]: 2.11201e-05 [get_jit_bprop_graph]: 4.20026e-07 [rewriter_after_jit_bprop_graph]: 3.90224e-07 [eliminate_special_op_node]: 0.00044914 [distribtued_split]: 1.55019e-06 [validate]: 3.559e-05 [task_emit]: 6.14669 [execute]: 1.11102e-05 Sums bootstrap : 0.001197s : 0.02% type_inference : 0.013964s : 0.23% auto_monad : 0.000096s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000031s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.00% optimize.rewriter_before_opt_a : 0.000029s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000029s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000327s : 0.01% optimize.opt_a.recompute_prepare : 0.000009s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000009s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000131s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000010s : 0.00% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000018s : 0.00% optimize.opt_a.parallel : 0.000012s : 0.00% optimize.opt_a.flash_sp : 0.000023s : 0.00% optimize.opt_a.merge_comm : 0.000010s : 0.00% optimize.opt_a.allreduce_fusion : 0.000007s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000014s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000011s : 0.00% optimize.opt_a.virtual_dataset : 0.000010s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000008s : 0.00% optimize.opt_a.merge_forward : 0.000007s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000019s : 0.00% optimize.opt_a.before_grad : 0.000015s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000005s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000010s : 0.00% optimize.opt_a.after_resolve : 0.000016s : 0.00% optimize.opt_a.a_after_grad : 0.000014s : 0.00% optimize.opt_a.special_op_eliminate : 0.000008s : 0.00% optimize.opt_a.renormalize : 0.000291s : 0.00% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000017s : 0.00% optimize.opt_a.cse : 0.000034s : 0.00% optimize.opt_a.a_3 : 0.000058s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000007s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000068s : 0.00% optimize.convert_after_rewriter : 0.000015s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000081s : 0.00% optimize.opt_b.b_2 : 0.000005s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000010s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000006s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000022s : 0.00% optimize.loop_unroll : 0.000478s : 0.01% optimize.opt_after_cconv.c_1 : 0.000023s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000013s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.00% optimize.tuple_transform.d_1 : 0.000033s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000010s : 0.00% optimize.add_recomputation : 0.000052s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000014s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000007s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000007s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000012s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000005s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000021s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000449s : 0.01% distribtued_split : 0.000002s : 0.00% validate : 0.000036s : 0.00% task_emit : 6.146694s : 99.71% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.000091 20 3.47% : 0.000003s : 2: substitution.elim_not_effective 1.94% : 0.000002s : 2: substitution.fold_const_symbol 5.50% : 0.000005s : 3: substitution.graph_param_transform 59.29% : 0.000054s : 1: substitution.inline 3.60% : 0.000003s : 4: substitution.j_node_and_user_rematch 15.88% : 0.000014s : 2: substitution.reduce_all_const_elim 4.62% : 0.000004s : 4: substitution.remove_not_recompute_node 5.69% : 0.000005s : 2: substitution.replace_old_param ------[type_inference.] 0.013929 2 97.63% : 0.013600s : 1: type_inference.infer 2.37% : 0.000330s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000119 740 0.85% : 0.000001s : 7: predicate.accumulaten_eliminater 1.10% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.67% : 0.000001s : 6: predicate.addn_check_dump 0.78% : 0.000001s : 7: predicate.addn_zero_filter 0.67% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.01% : 0.000002s : 13: predicate.arithmetic_simplify 0.80% : 0.000001s : 7: predicate.cast_eliminate 0.79% : 0.000001s : 6: predicate.check_bprop_eliminate 0.71% : 0.000001s : 6: predicate.compare_switch_simplify 0.21% : 0.000000s : 3: predicate.const_output_eliminate 0.48% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.82% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.70% : 0.000001s : 6: predicate.depend_value_elim 0.77% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.88% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.84% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.37% : 0.000000s : 3: predicate.elim_not_effective 0.59% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.32% : 0.000002s : 10: predicate.environ_add_const_eliminate 1.03% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.05% : 0.000001s : 10: predicate.environ_get_depend_swap 1.88% : 0.000002s : 16: predicate.environ_get_eliminate 1.01% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.90% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.75% : 0.000002s : 8: predicate.float_depend_g_call 0.68% : 0.000001s : 6: predicate.float_environ_get_switch 1.05% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.24% : 0.000000s : 3: predicate.fold_const_symbol 0.83% : 0.000001s : 6: predicate.get_grad_eliminate 0.40% : 0.000000s : 3: predicate.graph_param_transform 0.77% : 0.000001s : 6: predicate.incorporate_call 0.64% : 0.000001s : 6: predicate.incorporate_call_switch 6.29% : 0.000007s : 33: predicate.inline 0.92% : 0.000001s : 6: predicate.inline_without_move 0.42% : 0.000000s : 6: predicate.j_node_and_user_rematch 0.95% : 0.000001s : 6: predicate.less_batch_normalization 1.82% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.17% : 0.000003s : 20: predicate.load_eliminater 1.57% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.49% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.82% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.73% : 0.000001s : 6: predicate.merge_addn 0.71% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.75% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.69% : 0.000001s : 7: predicate.minmaximum_grad 0.87% : 0.000001s : 3: predicate.mutable_eliminate 0.51% : 0.000001s : 3: predicate.opt_reshape 0.47% : 0.000001s : 3: predicate.parallel_virtual_node 1.42% : 0.000002s : 8: predicate.partial_defer_inline 1.13% : 0.000001s : 10: predicate.partial_eliminate 0.78% : 0.000001s : 7: predicate.print_const_string_wrapper 0.98% : 0.000001s : 6: predicate.reduce_all_const_elim 0.95% : 0.000001s : 7: predicate.reduce_eliminate 0.79% : 0.000001s : 6: predicate.remove_not_recompute_node 1.26% : 0.000001s : 13: predicate.replace_applicator 0.51% : 0.000001s : 6: predicate.replace_old_param 0.24% : 0.000000s : 3: predicate.reset_defer_inline 0.83% : 0.000001s : 7: predicate.reshape_eliminate 0.78% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.47% : 0.000001s : 3: predicate.row_tensor_eliminate 1.04% : 0.000001s : 6: predicate.same_eliminate 0.51% : 0.000001s : 6: predicate.set_cell_output_no_recompute 0.99% : 0.000001s : 6: predicate.shard_identity_eliminate 1.26% : 0.000002s : 9: predicate.special_op_eliminate 1.02% : 0.000001s : 6: predicate.specialize_transform 0.94% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.31% : 0.000003s : 20: predicate.stopgrad_eliminater 0.44% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.99% : 0.000001s : 8: predicate.switch_defer_inline 1.68% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.94% : 0.000006s : 24: predicate.switch_simplify 0.82% : 0.000001s : 7: predicate.tile_eliminate 0.83% : 0.000001s : 7: predicate.transpose_eliminate 1.58% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.47% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.33% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.65% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.39% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.44% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.50% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.24% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 3.02% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.49% : 0.000001s : 3: predicate.value_based_eliminate 1.06% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.70% : 0.000001s : 6: predicate.virtual_output_eliminate 0.47% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000165 4 9.13% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.87% : 0.000150s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.184633 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000013s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000056s : 1: add_recomputation 0.00% : 0.000012s : 1: assign_add_opt 0.00% : 0.000107s : 1: auto_monad 0.00% : 0.000026s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001248s : 1: bootstrap 0.00% : 0.000025s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000019s : 1: convert_after_rewriter 0.00% : 0.000021s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.000462s : 1: eliminate_special_op_node 0.00% : 0.000018s : 1: environ_conv 0.00% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000486s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000010s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000635s : 80: opt.transform.opt_a 0.00% : 0.000021s : 1: opt.transform.opt_after_cconv 0.00% : 0.000069s : 27: opt.transform.opt_b 0.00% : 0.000031s : 1: opt.transform.opt_trans_graph 0.00% : 0.000019s : 3: opt.transform.special_op_eliminate 0.00% : 0.000029s : 4: opt.transform.symbol_engine_opt 0.15% : 0.008981s : 1: opt_a 0.00% : 0.000087s : 1: opt_after_cconv 0.00% : 0.000188s : 1: opt_b 0.17% : 0.010421s : 1: optimize 0.00% : 0.000009s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000007s : 1: order_py_execute_after_rewriter 0.00% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000010s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000006s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000037s : 1: pre_auto_parallel 0.00% : 0.000017s : 1: py_interpret_to_execute 0.00% : 0.000010s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000014s : 1: remove_dup_value 0.00% : 0.000164s : 1: renormalize.infer 0.00% : 0.000122s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000073s : 1: rewriter_after_opt_a 0.00% : 0.000033s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000065s : 1: symbol_engine_optimizer 99.39% : 6.146732s : 1: task_emit 0.00% : 0.000053s : 1: tuple_transform 0.23% : 0.013984s : 1: type_inference 0.00% : 0.000078s : 1: validate TotalTime = 6.19867, [21] [bootstrap]: 0.00118931 [type_inference]: 0.0135887 [auto_monad]: 9.16398e-05 [graph_reusing]: 2.46009e-06 [inline]: 1.8701e-06 [parallel-infer-symbol]: 1.03e-05 [pre_auto_parallel]: 3.99402e-05 [insert-virtual-dataset]: 3.15998e-06 [parallel-infer-symbol-second]: 4.29805e-07 [dataset_repeat_opt]: 1.49012e-06 [pipeline_split]: 1.50967e-06 [optimize]: 0.0104684, [52] [py_interpret_to_execute]: 1.26902e-05 [rewriter_before_opt_a]: 2.999e-05 [opt_a]: 0.00901867, [2] [Cycle 1]: 0.00112954, [43] [expand_dump_flag]: 3.81004e-06 [switch_simplify]: 2.56603e-05 [loop_unroll]: 9.24012e-06 [a_1]: 0.00023071 [recompute_prepare]: 6.04009e-06 [updatestate_depend_eliminate]: 5.96046e-06 [updatestate_assign_eliminate]: 3.85987e-06 [updatestate_loads_eliminate]: 2.95043e-06 [parameter_eliminate]: 3.75975e-06 [a_2]: 7.712e-05 [accelerated_algorithm]: 5.24009e-06 [shard]: 2.37999e-06 [meta_shard_fg_expand]: 3.11015e-06 [shard_inline]: 5.34998e-06 [auto_parallel]: 9.77982e-06 [parallel]: 9.06968e-06 [flash_sp]: 1.18399e-05 [merge_comm]: 8.02008e-06 [allreduce_fusion]: 3.62005e-06 [matmul_add_comm_reduction]: 9.27038e-06 [allreduce_slice_to_reducescatter]: 4.89876e-07 [virtual_shard_identity]: 1.31298e-05 [virtual_dataset]: 5.95022e-06 [get_grad_eliminate_]: 4.45032e-06 [virtual_output]: 5.13019e-06 [merge_forward]: 4.27011e-06 [cell_reuse_recompute_pass]: 1.47987e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.13901e-05 [before_grad]: 8.84011e-06 [inplace_validation]: 3.28012e-06 [meta_fg_expand]: 3.66988e-06 [inplace_validation_after_expand]: 4.21982e-06 [flash_sp_send_recv_attached]: 3.57023e-06 [receive_attached]: 2.6701e-06 [after_resolve]: 8.99984e-06 [a_after_grad]: 7.71973e-06 [special_op_eliminate]: 4.67012e-06 [renormalize]: 0.00030064 [add_forward_monad_depend]: 3.56976e-06 [auto_monad_grad]: 2.07964e-06 [auto_monad_eliminator]: 1.16397e-05 [cse]: 2.664e-05 [a_3]: 3.28599e-05 [Cycle 2]: 0.0004812, [43] [expand_dump_flag]: 8.69855e-07 [switch_simplify]: 5.59958e-06 [loop_unroll]: 4.43961e-06 [a_1]: 9.06102e-05 [recompute_prepare]: 4.00003e-06 [updatestate_depend_eliminate]: 2.99001e-06 [updatestate_assign_eliminate]: 2.17976e-06 [updatestate_loads_eliminate]: 2.21003e-06 [parameter_eliminate]: 1.00024e-06 [a_2]: 5.41499e-05 [accelerated_algorithm]: 5.09014e-06 [shard]: 1.22981e-06 [meta_shard_fg_expand]: 1.51014e-06 [shard_inline]: 4.49969e-06 [auto_parallel]: 7.98982e-06 [parallel]: 3.73973e-06 [flash_sp]: 6.14021e-06 [merge_comm]: 3.9502e-06 [allreduce_fusion]: 2.82004e-06 [matmul_add_comm_reduction]: 4.63007e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 5.03007e-06 [virtual_dataset]: 4.25987e-06 [get_grad_eliminate_]: 4.21004e-06 [virtual_output]: 4.21982e-06 [merge_forward]: 2.63983e-06 [cell_reuse_recompute_pass]: 1.68988e-06 [cell_reuse_handle_not_recompute_node_pass]: 9.69972e-06 [before_grad]: 7.41985e-06 [inplace_validation]: 2.21003e-06 [meta_fg_expand]: 2.46009e-06 [inplace_validation_after_expand]: 2.86009e-06 [flash_sp_send_recv_attached]: 1.17021e-06 [receive_attached]: 6.19795e-07 [after_resolve]: 6.66035e-06 [a_after_grad]: 6.60028e-06 [special_op_eliminate]: 4.35999e-06 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 6.50063e-07 [auto_monad_grad]: 9.10368e-07 [auto_monad_eliminator]: 5.28013e-06 [cse]: 1.01798e-05 [a_3]: 2.59704e-05 [py_interpret_to_execute_after_opt_a]: 7.01984e-06 [slice_cell_reuse_recomputed_activation]: 2.41026e-06 [rewriter_after_opt_a]: 7.826e-05 [convert_after_rewriter]: 6.4699e-06 [order_py_execute_after_rewriter]: 4.63007e-06 [opt_b]: 0.00013478, [1] [Cycle 1]: 0.00012939, [7] [b_1]: 8.07401e-05 [b_2]: 5.39981e-06 [updatestate_depend_eliminate]: 2.35997e-06 [updatestate_assign_eliminate]: 2.11969e-06 [updatestate_loads_eliminate]: 1.90968e-06 [renormalize]: 2.70084e-07 [cse]: 8.78982e-06 [optimize_parallel_all_gather_comm]: 6.06012e-06 [overlap_param_gather]: 2.78978e-06 [cconv]: 2.46e-05 [loop_unroll]: 0.00047722 [opt_after_cconv]: 8.34004e-05, [1] [Cycle 1]: 7.71503e-05, [7] [c_1]: 2.35699e-05 [parameter_eliminate]: 2.39024e-06 [updatestate_depend_eliminate]: 4.97978e-06 [updatestate_assign_eliminate]: 2.10991e-06 [updatestate_loads_eliminate]: 1.79978e-06 [cse]: 1.35903e-05 [renormalize]: 3.30154e-07 [remove_dup_value]: 1.07298e-05 [tuple_transform]: 4.62402e-05, [1] [Cycle 1]: 4.16702e-05, [2] [d_1]: 3.26298e-05 [renormalize]: 2.59839e-07 [partial_unused_args_eliminate]: 2.18023e-06 [add_cache_embedding]: 1.08597e-05 [add_recomputation]: 4.81e-05 [cse_after_recomputation]: 2.10903e-05, [1] [Cycle 1]: 1.31601e-05, [1] [cse]: 8.6301e-06 [environ_conv]: 6.82101e-05 [swap_dp_allreduce_reducescatter]: 5.52041e-06 [bias_add_comm_swap]: 2.42982e-06 [label_micro_interleaved_index]: 2.19978e-06 [label_fine_grained_interleaved_index]: 2.37022e-06 [merge_cast_opt]: 1.39e-06 [slice_recompute_activation]: 2.09967e-06 [micro_interleaved_order_control]: 1.83005e-06 [assign_add_opt]: 1.01398e-05 [ForceFp32Comm]: 1.19023e-06 [remove_cast_before_assign_add]: 1.01002e-06 [full_micro_interleaved_order_control]: 2.14996e-06 [reorder_send_recv_between_fp_bp]: 2.52994e-06 [comm_op_add_attrs]: 1.08033e-06 [add_comm_op_reuse_tag]: 1.13016e-06 [interleave_split_concat_branches]: 8.99658e-07 [interleave_parallel_branches]: 8.60076e-07 [overlap_opt_shard_in_pipeline]: 1.20401e-05 [overlap_opt_shard_grad_in_pipeline]: 2.44984e-06 [control_data_broadcast_order]: 1.14972e-06 [grouped_pairwise_exchange_alltoall]: 1.45985e-06 [offloading_packed_experts]: 9.09902e-07 [overlap_recompute_and_grad_model_parallel]: 2.04006e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.79983e-07 [overlap_recompute_allgather_and_fa_grad]: 1.21025e-06 [overlap_grad_ring_attention]: 2.58023e-06 [overlap_grad_flash_sp]: 1.18501e-05 [begin_end_overlap_inline]: 7.79983e-07 [split_matmul_comm_elemetwise]: 2.04006e-06 [split_layernorm_comm]: 1.76998e-06 [handle_group_info]: 1.13994e-06 [symbol_engine_optimizer]: 6.24098e-05, [1] [Cycle 1]: 5.79599e-05, [6] [build]: 3.01003e-06 [elim_shapecalc]: 7.37002e-06 [elim_not_effective]: 1.04001e-05 [opt_reshape]: 5.16977e-06 [fold_const_symbol]: 8.40984e-06 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.83005e-06 [auto_monad_reorder]: 2.11e-05 [get_jit_bprop_graph]: 6.9011e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00045088 [distribtued_split]: 1.3602e-06 [validate]: 4.71198e-05 [task_emit]: 6.17245 [execute]: 1.23801e-05 Sums bootstrap : 0.001189s : 0.02% type_inference : 0.013589s : 0.22% auto_monad : 0.000092s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000010s : 0.00% pre_auto_parallel : 0.000040s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.00% optimize.rewriter_before_opt_a : 0.000030s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000031s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000321s : 0.01% optimize.opt_a.recompute_prepare : 0.000010s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000009s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000131s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000010s : 0.00% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000018s : 0.00% optimize.opt_a.parallel : 0.000013s : 0.00% optimize.opt_a.flash_sp : 0.000018s : 0.00% optimize.opt_a.merge_comm : 0.000012s : 0.00% optimize.opt_a.allreduce_fusion : 0.000006s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000014s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.00% optimize.opt_a.virtual_dataset : 0.000010s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000007s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000021s : 0.00% optimize.opt_a.before_grad : 0.000016s : 0.00% optimize.opt_a.inplace_validation : 0.000005s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000016s : 0.00% optimize.opt_a.a_after_grad : 0.000014s : 0.00% optimize.opt_a.special_op_eliminate : 0.000009s : 0.00% optimize.opt_a.renormalize : 0.000301s : 0.00% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000017s : 0.00% optimize.opt_a.cse : 0.000037s : 0.00% optimize.opt_a.a_3 : 0.000059s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000007s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000078s : 0.00% optimize.convert_after_rewriter : 0.000006s : 0.00% optimize.order_py_execute_after_rewriter : 0.000005s : 0.00% optimize.opt_b.b_1 : 0.000081s : 0.00% optimize.opt_b.b_2 : 0.000005s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000009s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000006s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000025s : 0.00% optimize.loop_unroll : 0.000477s : 0.01% optimize.opt_after_cconv.c_1 : 0.000024s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000014s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.00% optimize.tuple_transform.d_1 : 0.000033s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.00% optimize.add_recomputation : 0.000048s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000068s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000010s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000012s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000007s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000010s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000005s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000021s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000451s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000047s : 0.00% task_emit : 6.172446s : 99.71% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.000096 20 1.86% : 0.000002s : 2: substitution.elim_not_effective 1.78% : 0.000002s : 2: substitution.fold_const_symbol 5.30% : 0.000005s : 3: substitution.graph_param_transform 64.48% : 0.000062s : 1: substitution.inline 3.49% : 0.000003s : 4: substitution.j_node_and_user_rematch 14.76% : 0.000014s : 2: substitution.reduce_all_const_elim 4.88% : 0.000005s : 4: substitution.remove_not_recompute_node 3.45% : 0.000003s : 2: substitution.replace_old_param ------[type_inference.] 0.013555 2 97.74% : 0.013249s : 1: type_inference.infer 2.26% : 0.000306s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000061 1 100.00% : 0.000061s : 1: match.inline ------[predicate.] 0.000122 740 0.79% : 0.000001s : 7: predicate.accumulaten_eliminater 1.16% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.70% : 0.000001s : 6: predicate.addn_check_dump 0.81% : 0.000001s : 7: predicate.addn_zero_filter 0.71% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.05% : 0.000003s : 13: predicate.arithmetic_simplify 0.83% : 0.000001s : 7: predicate.cast_eliminate 0.89% : 0.000001s : 6: predicate.check_bprop_eliminate 0.76% : 0.000001s : 6: predicate.compare_switch_simplify 0.24% : 0.000000s : 3: predicate.const_output_eliminate 0.48% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.87% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.69% : 0.000001s : 6: predicate.depend_value_elim 0.75% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.81% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.89% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.39% : 0.000000s : 3: predicate.elim_not_effective 0.53% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.20% : 0.000001s : 10: predicate.environ_add_const_eliminate 1.03% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.01% : 0.000001s : 10: predicate.environ_get_depend_swap 1.68% : 0.000002s : 16: predicate.environ_get_eliminate 0.95% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.84% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.66% : 0.000002s : 8: predicate.float_depend_g_call 0.71% : 0.000001s : 6: predicate.float_environ_get_switch 0.99% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.24% : 0.000000s : 3: predicate.fold_const_symbol 0.80% : 0.000001s : 6: predicate.get_grad_eliminate 0.44% : 0.000001s : 3: predicate.graph_param_transform 0.77% : 0.000001s : 6: predicate.incorporate_call 0.65% : 0.000001s : 6: predicate.incorporate_call_switch 5.96% : 0.000007s : 33: predicate.inline 1.01% : 0.000001s : 6: predicate.inline_without_move 0.49% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.02% : 0.000001s : 6: predicate.less_batch_normalization 1.56% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.22% : 0.000003s : 20: predicate.load_eliminater 1.42% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.48% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.88% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.73% : 0.000001s : 6: predicate.merge_addn 0.74% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.75% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.70% : 0.000001s : 7: predicate.minmaximum_grad 0.98% : 0.000001s : 3: predicate.mutable_eliminate 0.50% : 0.000001s : 3: predicate.opt_reshape 0.48% : 0.000001s : 3: predicate.parallel_virtual_node 1.35% : 0.000002s : 8: predicate.partial_defer_inline 1.15% : 0.000001s : 10: predicate.partial_eliminate 0.84% : 0.000001s : 7: predicate.print_const_string_wrapper 0.97% : 0.000001s : 6: predicate.reduce_all_const_elim 1.06% : 0.000001s : 7: predicate.reduce_eliminate 0.80% : 0.000001s : 6: predicate.remove_not_recompute_node 1.32% : 0.000002s : 13: predicate.replace_applicator 0.47% : 0.000001s : 6: predicate.replace_old_param 0.26% : 0.000000s : 3: predicate.reset_defer_inline 0.88% : 0.000001s : 7: predicate.reshape_eliminate 0.74% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.44% : 0.000001s : 3: predicate.row_tensor_eliminate 0.92% : 0.000001s : 6: predicate.same_eliminate 0.52% : 0.000001s : 6: predicate.set_cell_output_no_recompute 0.98% : 0.000001s : 6: predicate.shard_identity_eliminate 1.37% : 0.000002s : 9: predicate.special_op_eliminate 1.01% : 0.000001s : 6: predicate.specialize_transform 1.38% : 0.000002s : 6: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.16% : 0.000003s : 20: predicate.stopgrad_eliminater 0.41% : 0.000000s : 3: predicate.switch_call_monad_eliminater 0.95% : 0.000001s : 8: predicate.switch_defer_inline 1.75% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.89% : 0.000006s : 24: predicate.switch_simplify 0.87% : 0.000001s : 7: predicate.tile_eliminate 0.78% : 0.000001s : 7: predicate.transpose_eliminate 1.67% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.54% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.33% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.34% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.41% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.52% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.12% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 3.09% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 3: predicate.value_based_eliminate 0.81% : 0.000001s : 6: predicate.virtual_dataset_eliminate 1.02% : 0.000001s : 6: predicate.virtual_output_eliminate 0.45% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000136 4 12.56% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 87.44% : 0.000119s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.210171 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000052s : 1: add_recomputation 0.00% : 0.000013s : 1: assign_add_opt 0.00% : 0.000102s : 1: auto_monad 0.00% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001246s : 1: bootstrap 0.00% : 0.000028s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000010s : 1: convert_after_rewriter 0.00% : 0.000024s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.000463s : 1: eliminate_special_op_node 0.00% : 0.000072s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000486s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.00% : 0.000010s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000644s : 80: opt.transform.opt_a 0.00% : 0.000022s : 1: opt.transform.opt_after_cconv 0.00% : 0.000069s : 27: opt.transform.opt_b 0.00% : 0.000031s : 1: opt.transform.opt_trans_graph 0.00% : 0.000020s : 3: opt.transform.special_op_eliminate 0.00% : 0.000028s : 4: opt.transform.symbol_engine_opt 0.15% : 0.009022s : 1: opt_a 0.00% : 0.000087s : 1: opt_after_cconv 0.00% : 0.000138s : 1: opt_b 0.17% : 0.010479s : 1: optimize 0.00% : 0.000009s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000016s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000015s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000046s : 1: pre_auto_parallel 0.00% : 0.000017s : 1: py_interpret_to_execute 0.00% : 0.000010s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000014s : 1: remove_dup_value 0.00% : 0.000168s : 1: renormalize.infer 0.00% : 0.000128s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000083s : 1: rewriter_after_opt_a 0.00% : 0.000034s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000065s : 1: symbol_engine_optimizer 99.39% : 6.172486s : 1: task_emit 0.00% : 0.000049s : 1: tuple_transform 0.22% : 0.013609s : 1: type_inference 0.00% : 0.000090s : 1: validate TotalTime = 6.31397, [21] [bootstrap]: 0.0013253 [type_inference]: 0.0143524 [auto_monad]: 0.00012076 [graph_reusing]: 2.44007e-06 [inline]: 2.01026e-06 [parallel-infer-symbol]: 2.65986e-06 [pre_auto_parallel]: 4.08702e-05 [insert-virtual-dataset]: 3.69037e-06 [parallel-infer-symbol-second]: 8.49832e-07 [dataset_repeat_opt]: 1.62981e-06 [pipeline_split]: 1.50036e-06 [optimize]: 0.0108398, [52] [py_interpret_to_execute]: 1.37198e-05 [rewriter_before_opt_a]: 3.15099e-05 [opt_a]: 0.00934237, [2] [Cycle 1]: 0.00124765, [43] [expand_dump_flag]: 3.76999e-06 [switch_simplify]: 2.60998e-05 [loop_unroll]: 1.14897e-05 [a_1]: 0.00026289 [recompute_prepare]: 6.3898e-06 [updatestate_depend_eliminate]: 6.11972e-06 [updatestate_assign_eliminate]: 3.32994e-06 [updatestate_loads_eliminate]: 8.40984e-06 [parameter_eliminate]: 4.21004e-06 [a_2]: 9.062e-05 [accelerated_algorithm]: 6.66035e-06 [shard]: 2.13971e-06 [meta_shard_fg_expand]: 3.20002e-06 [shard_inline]: 6.00005e-06 [auto_parallel]: 1.08802e-05 [parallel]: 8.43965e-06 [flash_sp]: 1.986e-05 [merge_comm]: 6.3004e-06 [allreduce_fusion]: 3.95998e-06 [matmul_add_comm_reduction]: 8.65012e-06 [allreduce_slice_to_reducescatter]: 5.30388e-07 [virtual_shard_identity]: 7.17025e-06 [virtual_dataset]: 5.79003e-06 [get_grad_eliminate_]: 7.45989e-06 [virtual_output]: 5.64987e-06 [merge_forward]: 1.16299e-05 [cell_reuse_recompute_pass]: 1.82958e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.30003e-05 [before_grad]: 1.04704e-05 [inplace_validation]: 3.83006e-06 [meta_fg_expand]: 4.04986e-06 [inplace_validation_after_expand]: 5.24987e-06 [flash_sp_send_recv_attached]: 3.76999e-06 [receive_attached]: 2.5304e-06 [after_resolve]: 9.37004e-06 [a_after_grad]: 8.99984e-06 [special_op_eliminate]: 5.56e-06 [renormalize]: 0.00031122 [add_forward_monad_depend]: 3.68012e-06 [auto_monad_grad]: 2.04984e-06 [auto_monad_eliminator]: 1.15302e-05 [cse]: 3.12496e-05 [a_3]: 3.978e-05 [Cycle 2]: 0.00057094, [43] [expand_dump_flag]: 1.13994e-06 [switch_simplify]: 6.4699e-06 [loop_unroll]: 5.77979e-06 [a_1]: 0.00011677 [recompute_prepare]: 5.13997e-06 [updatestate_depend_eliminate]: 3.39979e-06 [updatestate_assign_eliminate]: 2.48989e-06 [updatestate_loads_eliminate]: 2.69013e-06 [parameter_eliminate]: 1.34017e-06 [a_2]: 6.67097e-05 [accelerated_algorithm]: 5.76023e-06 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 1.83005e-06 [shard_inline]: 5.56977e-06 [auto_parallel]: 8.59005e-06 [parallel]: 3.83006e-06 [flash_sp]: 5.51995e-06 [merge_comm]: 4.05964e-06 [allreduce_fusion]: 3.47989e-06 [matmul_add_comm_reduction]: 5.37001e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 6.44987e-06 [virtual_dataset]: 5.43008e-06 [get_grad_eliminate_]: 5.14975e-06 [virtual_output]: 5.08968e-06 [merge_forward]: 2.63005e-06 [cell_reuse_recompute_pass]: 1.91992e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.13803e-05 [before_grad]: 9.04966e-06 [inplace_validation]: 2.50014e-06 [meta_fg_expand]: 3.1204e-06 [inplace_validation_after_expand]: 3.01003e-06 [flash_sp_send_recv_attached]: 9.89996e-07 [receive_attached]: 8.09785e-07 [after_resolve]: 7.83987e-06 [a_after_grad]: 8.17003e-06 [special_op_eliminate]: 5.13997e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.70204e-07 [auto_monad_grad]: 1.07009e-06 [auto_monad_eliminator]: 5.64009e-06 [cse]: 1.09798e-05 [a_3]: 3.16398e-05 [py_interpret_to_execute_after_opt_a]: 7.3798e-06 [slice_cell_reuse_recomputed_activation]: 4.6799e-06 [rewriter_after_opt_a]: 9.22601e-05 [convert_after_rewriter]: 6.02007e-06 [order_py_execute_after_rewriter]: 4.00003e-06 [opt_b]: 0.00015858, [1] [Cycle 1]: 0.00015326, [7] [b_1]: 9.76799e-05 [b_2]: 6.63009e-06 [updatestate_depend_eliminate]: 2.83029e-06 [updatestate_assign_eliminate]: 2.6999e-06 [updatestate_loads_eliminate]: 1.97999e-06 [renormalize]: 2.5006e-07 [cse]: 9.47993e-06 [optimize_parallel_all_gather_comm]: 5.66989e-06 [overlap_param_gather]: 3.57023e-06 [cconv]: 2.226e-05 [loop_unroll]: 0.00048464 [opt_after_cconv]: 9.35597e-05, [1] [Cycle 1]: 8.75499e-05, [7] [c_1]: 2.63103e-05 [parameter_eliminate]: 2.42004e-06 [updatestate_depend_eliminate]: 5.39003e-06 [updatestate_assign_eliminate]: 2.65008e-06 [updatestate_loads_eliminate]: 2.17976e-06 [cse]: 1.40704e-05 [renormalize]: 3.59956e-07 [remove_dup_value]: 1.08299e-05 [tuple_transform]: 5.381e-05, [1] [Cycle 1]: 4.91696e-05, [2] [d_1]: 3.89302e-05 [renormalize]: 1.99769e-07 [partial_unused_args_eliminate]: 2.66964e-06 [add_cache_embedding]: 1.99098e-05 [add_recomputation]: 4.72302e-05 [cse_after_recomputation]: 1.908e-05, [1] [Cycle 1]: 1.45203e-05, [1] [cse]: 9.46037e-06 [environ_conv]: 1.84397e-05 [swap_dp_allreduce_reducescatter]: 5.13019e-06 [bias_add_comm_swap]: 2.83029e-06 [label_micro_interleaved_index]: 2.38977e-06 [label_fine_grained_interleaved_index]: 2.20025e-06 [merge_cast_opt]: 1.30991e-06 [slice_recompute_activation]: 1.91014e-06 [micro_interleaved_order_control]: 2.06986e-06 [assign_add_opt]: 9.96003e-06 [ForceFp32Comm]: 9.09902e-07 [remove_cast_before_assign_add]: 9.30391e-07 [full_micro_interleaved_order_control]: 2.19001e-06 [reorder_send_recv_between_fp_bp]: 2.19001e-06 [comm_op_add_attrs]: 1.39e-06 [add_comm_op_reuse_tag]: 1.11992e-06 [interleave_split_concat_branches]: 8.79634e-07 [interleave_parallel_branches]: 8.60076e-07 [overlap_opt_shard_in_pipeline]: 1.15498e-05 [overlap_opt_shard_grad_in_pipeline]: 1.89012e-06 [control_data_broadcast_order]: 1.2503e-06 [grouped_pairwise_exchange_alltoall]: 1.33971e-06 [offloading_packed_experts]: 1.13016e-06 [overlap_recompute_and_grad_model_parallel]: 2.21981e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.20147e-07 [overlap_recompute_allgather_and_fa_grad]: 1.13994e-06 [overlap_grad_ring_attention]: 1.81003e-06 [overlap_grad_flash_sp]: 1.255e-05 [begin_end_overlap_inline]: 7.30157e-07 [split_matmul_comm_elemetwise]: 2.42004e-06 [split_layernorm_comm]: 1.70013e-06 [handle_group_info]: 1.11014e-06 [symbol_engine_optimizer]: 7.00201e-05, [1] [Cycle 1]: 6.53304e-05, [6] [build]: 2.26032e-06 [elim_shapecalc]: 8.89972e-06 [elim_not_effective]: 1.213e-05 [opt_reshape]: 6.08014e-06 [fold_const_symbol]: 9.85013e-06 [renormalize]: 2.39816e-07 [pipeline_parallel_scheduler]: 1.49012e-06 [auto_monad_reorder]: 2.12099e-05 [get_jit_bprop_graph]: 4.50294e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00054918 [distribtued_split]: 1.47009e-06 [validate]: 3.75197e-05 [task_emit]: 6.28636 [execute]: 1.09901e-05 Sums bootstrap : 0.001325s : 0.02% type_inference : 0.014352s : 0.23% auto_monad : 0.000121s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000041s : 0.00% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.00% optimize.rewriter_before_opt_a : 0.000032s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000033s : 0.00% optimize.opt_a.loop_unroll : 0.000017s : 0.00% optimize.opt_a.a_1 : 0.000380s : 0.01% optimize.opt_a.recompute_prepare : 0.000012s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000010s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.00% optimize.opt_a.parameter_eliminate : 0.000006s : 0.00% optimize.opt_a.a_2 : 0.000157s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000012s : 0.00% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.00% optimize.opt_a.shard_inline : 0.000012s : 0.00% optimize.opt_a.auto_parallel : 0.000019s : 0.00% optimize.opt_a.parallel : 0.000012s : 0.00% optimize.opt_a.flash_sp : 0.000025s : 0.00% optimize.opt_a.merge_comm : 0.000010s : 0.00% optimize.opt_a.allreduce_fusion : 0.000007s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000014s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000014s : 0.00% optimize.opt_a.virtual_dataset : 0.000011s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000013s : 0.00% optimize.opt_a.virtual_output : 0.000011s : 0.00% optimize.opt_a.merge_forward : 0.000014s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000024s : 0.00% optimize.opt_a.before_grad : 0.000020s : 0.00% optimize.opt_a.inplace_validation : 0.000006s : 0.00% optimize.opt_a.meta_fg_expand : 0.000007s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000008s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000017s : 0.00% optimize.opt_a.a_after_grad : 0.000017s : 0.00% optimize.opt_a.special_op_eliminate : 0.000011s : 0.00% optimize.opt_a.renormalize : 0.000311s : 0.00% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000017s : 0.00% optimize.opt_a.cse : 0.000042s : 0.00% optimize.opt_a.a_3 : 0.000071s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000007s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000005s : 0.00% optimize.rewriter_after_opt_a : 0.000092s : 0.00% optimize.convert_after_rewriter : 0.000006s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000098s : 0.00% optimize.opt_b.b_2 : 0.000007s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000009s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000006s : 0.00% optimize.overlap_param_gather : 0.000004s : 0.00% optimize.cconv : 0.000022s : 0.00% optimize.loop_unroll : 0.000485s : 0.01% optimize.opt_after_cconv.c_1 : 0.000026s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000014s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.00% optimize.tuple_transform.d_1 : 0.000039s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000020s : 0.00% optimize.add_recomputation : 0.000047s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000018s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000005s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000010s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000012s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000002s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000009s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000012s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000006s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000010s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000021s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000549s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000038s : 0.00% task_emit : 6.286358s : 99.70% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.000095 20 2.12% : 0.000002s : 2: substitution.elim_not_effective 2.25% : 0.000002s : 2: substitution.fold_const_symbol 6.08% : 0.000006s : 3: substitution.graph_param_transform 59.52% : 0.000057s : 1: substitution.inline 4.23% : 0.000004s : 4: substitution.j_node_and_user_rematch 16.58% : 0.000016s : 2: substitution.reduce_all_const_elim 5.81% : 0.000006s : 4: substitution.remove_not_recompute_node 3.41% : 0.000003s : 2: substitution.replace_old_param ------[type_inference.] 0.014319 2 97.74% : 0.013996s : 1: type_inference.infer 2.26% : 0.000323s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000056 1 100.00% : 0.000056s : 1: match.inline ------[predicate.] 0.000141 740 0.79% : 0.000001s : 7: predicate.accumulaten_eliminater 1.07% : 0.000002s : 3: predicate.ad_related_special_op_eliminate 0.65% : 0.000001s : 6: predicate.addn_check_dump 0.79% : 0.000001s : 7: predicate.addn_zero_filter 0.70% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.17% : 0.000003s : 13: predicate.arithmetic_simplify 0.77% : 0.000001s : 7: predicate.cast_eliminate 0.79% : 0.000001s : 6: predicate.check_bprop_eliminate 0.72% : 0.000001s : 6: predicate.compare_switch_simplify 0.23% : 0.000000s : 3: predicate.const_output_eliminate 0.51% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.61% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.74% : 0.000001s : 6: predicate.depend_value_elim 0.82% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.86% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.83% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.35% : 0.000001s : 3: predicate.elim_not_effective 0.59% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.24% : 0.000002s : 10: predicate.environ_add_const_eliminate 1.12% : 0.000002s : 10: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 10: predicate.environ_get_depend_swap 1.79% : 0.000003s : 16: predicate.environ_get_eliminate 1.11% : 0.000002s : 10: predicate.environ_get_set_eliminate 0.89% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.64% : 0.000002s : 8: predicate.float_depend_g_call 0.66% : 0.000001s : 6: predicate.float_environ_get_switch 1.05% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.25% : 0.000000s : 3: predicate.fold_const_symbol 0.76% : 0.000001s : 6: predicate.get_grad_eliminate 0.40% : 0.000001s : 3: predicate.graph_param_transform 0.75% : 0.000001s : 6: predicate.incorporate_call 0.68% : 0.000001s : 6: predicate.incorporate_call_switch 6.19% : 0.000009s : 33: predicate.inline 1.13% : 0.000002s : 6: predicate.inline_without_move 0.52% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.12% : 0.000002s : 6: predicate.less_batch_normalization 1.67% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.14% : 0.000003s : 20: predicate.load_eliminater 1.22% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.57% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.73% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.66% : 0.000001s : 6: predicate.merge_addn 0.71% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.73% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.77% : 0.000001s : 7: predicate.minmaximum_grad 0.92% : 0.000001s : 3: predicate.mutable_eliminate 0.50% : 0.000001s : 3: predicate.opt_reshape 0.45% : 0.000001s : 3: predicate.parallel_virtual_node 1.39% : 0.000002s : 8: predicate.partial_defer_inline 1.34% : 0.000002s : 10: predicate.partial_eliminate 0.79% : 0.000001s : 7: predicate.print_const_string_wrapper 0.91% : 0.000001s : 6: predicate.reduce_all_const_elim 1.05% : 0.000001s : 7: predicate.reduce_eliminate 0.78% : 0.000001s : 6: predicate.remove_not_recompute_node 1.29% : 0.000002s : 13: predicate.replace_applicator 0.53% : 0.000001s : 6: predicate.replace_old_param 0.24% : 0.000000s : 3: predicate.reset_defer_inline 0.77% : 0.000001s : 7: predicate.reshape_eliminate 0.73% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.45% : 0.000001s : 3: predicate.row_tensor_eliminate 1.05% : 0.000001s : 6: predicate.same_eliminate 0.53% : 0.000001s : 6: predicate.set_cell_output_no_recompute 0.92% : 0.000001s : 6: predicate.shard_identity_eliminate 1.42% : 0.000002s : 9: predicate.special_op_eliminate 1.01% : 0.000001s : 6: predicate.specialize_transform 1.11% : 0.000002s : 6: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.09% : 0.000003s : 20: predicate.stopgrad_eliminater 0.42% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.93% : 0.000001s : 8: predicate.switch_defer_inline 1.67% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.93% : 0.000007s : 24: predicate.switch_simplify 0.75% : 0.000001s : 7: predicate.tile_eliminate 0.81% : 0.000001s : 7: predicate.transpose_eliminate 1.73% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.60% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.34% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.74% : 0.000004s : 19: predicate.tuple_list_get_item_eliminator 1.39% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.69% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.21% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 3.03% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.49% : 0.000001s : 3: predicate.value_based_eliminate 0.75% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.75% : 0.000001s : 6: predicate.virtual_output_eliminate 0.51% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000128 4 11.64% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 88.36% : 0.000113s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.325992 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000024s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000052s : 1: add_recomputation 0.00% : 0.000014s : 1: assign_add_opt 0.00% : 0.000133s : 1: auto_monad 0.00% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.02% : 0.001375s : 1: bootstrap 0.00% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000011s : 1: convert_after_rewriter 0.00% : 0.000022s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: distribtued_split 0.01% : 0.000563s : 1: eliminate_special_op_node 0.00% : 0.000023s : 1: environ_conv 0.00% : 0.000020s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000009s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000494s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000011s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000750s : 80: opt.transform.opt_a 0.00% : 0.000025s : 1: opt.transform.opt_after_cconv 0.00% : 0.000084s : 27: opt.transform.opt_b 0.00% : 0.000038s : 1: opt.transform.opt_trans_graph 0.00% : 0.000023s : 3: opt.transform.special_op_eliminate 0.00% : 0.000033s : 4: opt.transform.symbol_engine_opt 0.15% : 0.009347s : 1: opt_a 0.00% : 0.000098s : 1: opt_after_cconv 0.00% : 0.000162s : 1: opt_b 0.17% : 0.010848s : 1: optimize 0.00% : 0.000010s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000008s : 1: order_py_execute_after_rewriter 0.00% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000016s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000048s : 1: pre_auto_parallel 0.00% : 0.000018s : 1: py_interpret_to_execute 0.00% : 0.000011s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000015s : 1: remove_dup_value 0.00% : 0.000174s : 1: renormalize.infer 0.00% : 0.000132s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000098s : 1: rewriter_after_opt_a 0.00% : 0.000036s : 1: rewriter_before_opt_a 0.00% : 0.000008s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000008s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000073s : 1: symbol_engine_optimizer 99.37% : 6.286397s : 1: task_emit 0.00% : 0.000057s : 1: tuple_transform 0.23% : 0.014373s : 1: type_inference 0.00% : 0.000082s : 1: validate TotalTime = 6.32129, [21] [bootstrap]: 0.00136649 [type_inference]: 0.0142148 [auto_monad]: 8.91201e-05 [graph_reusing]: 2.46009e-06 [inline]: 1.23028e-06 [parallel-infer-symbol]: 1.32015e-06 [pre_auto_parallel]: 2.49897e-05 [insert-virtual-dataset]: 2.10013e-06 [parallel-infer-symbol-second]: 3.89758e-07 [dataset_repeat_opt]: 1.01002e-06 [pipeline_split]: 9.79751e-07 [optimize]: 0.0102917, [52] [py_interpret_to_execute]: 9.13022e-06 [rewriter_before_opt_a]: 2.386e-05 [opt_a]: 0.00900071, [2] [Cycle 1]: 0.00103526, [43] [expand_dump_flag]: 3.09013e-06 [switch_simplify]: 1.97198e-05 [loop_unroll]: 9.33977e-06 [a_1]: 0.00020324 [recompute_prepare]: 5.39003e-06 [updatestate_depend_eliminate]: 5.55022e-06 [updatestate_assign_eliminate]: 2.97977e-06 [updatestate_loads_eliminate]: 2.44984e-06 [parameter_eliminate]: 2.88012e-06 [a_2]: 6.977e-05 [accelerated_algorithm]: 5.19957e-06 [shard]: 1.74018e-06 [meta_shard_fg_expand]: 2.21003e-06 [shard_inline]: 4.97e-06 [auto_parallel]: 9.03988e-06 [parallel]: 5.0799e-06 [flash_sp]: 1.30502e-05 [merge_comm]: 4.99003e-06 [allreduce_fusion]: 3.30992e-06 [matmul_add_comm_reduction]: 6.54021e-06 [allreduce_slice_to_reducescatter]: 4.29805e-07 [virtual_shard_identity]: 6.45965e-06 [virtual_dataset]: 5.02961e-06 [get_grad_eliminate_]: 4.5402e-06 [virtual_output]: 4.59002e-06 [merge_forward]: 2.86987e-06 [cell_reuse_recompute_pass]: 1.22003e-06 [cell_reuse_handle_not_recompute_node_pass]: 9.47993e-06 [before_grad]: 8.2897e-06 [inplace_validation]: 2.54018e-06 [meta_fg_expand]: 2.96021e-06 [inplace_validation_after_expand]: 3.11993e-06 [flash_sp_send_recv_attached]: 2.48011e-06 [receive_attached]: 1.95997e-06 [after_resolve]: 7.64988e-06 [a_after_grad]: 7.12974e-06 [special_op_eliminate]: 6.15977e-06 [renormalize]: 0.00029348 [add_forward_monad_depend]: 2.35997e-06 [auto_monad_grad]: 1.91014e-06 [auto_monad_eliminator]: 8.41031e-06 [cse]: 2.39802e-05 [a_3]: 3.12701e-05 [Cycle 2]: 0.00051545, [43] [expand_dump_flag]: 8.40053e-07 [switch_simplify]: 5.41005e-06 [loop_unroll]: 4.50993e-06 [a_1]: 9.22899e-05 [recompute_prepare]: 3.86965e-06 [updatestate_depend_eliminate]: 2.99979e-06 [updatestate_assign_eliminate]: 2.12993e-06 [updatestate_loads_eliminate]: 2.03028e-06 [parameter_eliminate]: 8.09785e-07 [a_2]: 5.347e-05 [accelerated_algorithm]: 4.69992e-06 [shard]: 1.15018e-06 [meta_shard_fg_expand]: 1.41002e-06 [shard_inline]: 4.5998e-06 [auto_parallel]: 6.9798e-06 [parallel]: 2.9197e-06 [flash_sp]: 4.04008e-06 [merge_comm]: 3.43006e-06 [allreduce_fusion]: 2.6701e-06 [matmul_add_comm_reduction]: 4.48013e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 5.15021e-06 [virtual_dataset]: 4.49969e-06 [get_grad_eliminate_]: 4.29014e-06 [virtual_output]: 4.20026e-06 [merge_forward]: 2.59979e-06 [cell_reuse_recompute_pass]: 1.42027e-06 [cell_reuse_handle_not_recompute_node_pass]: 8.80007e-06 [before_grad]: 6.83032e-06 [inplace_validation]: 2.13971e-06 [meta_fg_expand]: 2.59979e-06 [inplace_validation_after_expand]: 2.44984e-06 [flash_sp_send_recv_attached]: 8.89879e-07 [receive_attached]: 5.99772e-07 [after_resolve]: 6.40005e-06 [a_after_grad]: 6.76978e-06 [special_op_eliminate]: 4.04986e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 6.10016e-07 [auto_monad_grad]: 7.89762e-07 [auto_monad_eliminator]: 4.5104e-06 [cse]: 5.60102e-05 [a_3]: 2.57101e-05 [py_interpret_to_execute_after_opt_a]: 6.14021e-06 [slice_cell_reuse_recomputed_activation]: 1.47009e-06 [rewriter_after_opt_a]: 7.57799e-05 [convert_after_rewriter]: 5.46034e-06 [order_py_execute_after_rewriter]: 3.39001e-06 [opt_b]: 0.00013202, [1] [Cycle 1]: 0.00012719, [7] [b_1]: 8.01501e-05 [b_2]: 5.41005e-06 [updatestate_depend_eliminate]: 2.36975e-06 [updatestate_assign_eliminate]: 1.91992e-06 [updatestate_loads_eliminate]: 1.75973e-06 [renormalize]: 2.00234e-07 [cse]: 8.33999e-06 [optimize_parallel_all_gather_comm]: 5.26989e-06 [overlap_param_gather]: 2.42004e-06 [cconv]: 1.348e-05 [loop_unroll]: 0.00045005 [opt_after_cconv]: 7.811e-05, [1] [Cycle 1]: 7.26599e-05, [7] [c_1]: 2.17799e-05 [parameter_eliminate]: 1.5297e-06 [updatestate_depend_eliminate]: 4.08012e-06 [updatestate_assign_eliminate]: 2.26032e-06 [updatestate_loads_eliminate]: 1.8999e-06 [cse]: 1.13598e-05 [renormalize]: 2.79862e-07 [remove_dup_value]: 6.53975e-06 [tuple_transform]: 4.37899e-05, [1] [Cycle 1]: 3.95598e-05, [2] [d_1]: 3.11099e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.28988e-06 [add_cache_embedding]: 1.46097e-05 [add_recomputation]: 3.42303e-05 [cse_after_recomputation]: 1.67401e-05, [1] [Cycle 1]: 1.24597e-05, [1] [cse]: 7.99028e-06 [environ_conv]: 1.53701e-05 [swap_dp_allreduce_reducescatter]: 4.69014e-06 [bias_add_comm_swap]: 1.72993e-06 [label_micro_interleaved_index]: 1.35973e-06 [label_fine_grained_interleaved_index]: 1.63028e-06 [merge_cast_opt]: 7.39936e-07 [slice_recompute_activation]: 1.13994e-06 [micro_interleaved_order_control]: 1.10967e-06 [assign_add_opt]: 8.10996e-06 [ForceFp32Comm]: 5.89993e-07 [remove_cast_before_assign_add]: 5.80214e-07 [full_micro_interleaved_order_control]: 1.24006e-06 [reorder_send_recv_between_fp_bp]: 1.76998e-06 [comm_op_add_attrs]: 5.69969e-07 [add_comm_op_reuse_tag]: 5.89993e-07 [interleave_split_concat_branches]: 5.39701e-07 [interleave_parallel_branches]: 5.59725e-07 [overlap_opt_shard_in_pipeline]: 8.25012e-06 [overlap_opt_shard_grad_in_pipeline]: 1.97999e-06 [control_data_broadcast_order]: 7.60425e-07 [grouped_pairwise_exchange_alltoall]: 7.59959e-07 [offloading_packed_experts]: 7.00355e-07 [overlap_recompute_and_grad_model_parallel]: 1.18045e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.49829e-07 [overlap_recompute_allgather_and_fa_grad]: 6.59842e-07 [overlap_grad_ring_attention]: 1.11992e-06 [overlap_grad_flash_sp]: 9.83989e-06 [begin_end_overlap_inline]: 4.49829e-07 [split_matmul_comm_elemetwise]: 1.31968e-06 [split_layernorm_comm]: 1.24006e-06 [handle_group_info]: 7.89762e-07 [symbol_engine_optimizer]: 6.16899e-05, [1] [Cycle 1]: 5.743e-05, [6] [build]: 2.1602e-06 [elim_shapecalc]: 7.67037e-06 [elim_not_effective]: 9.67039e-06 [opt_reshape]: 5.43008e-06 [fold_const_symbol]: 8.33999e-06 [renormalize]: 2.39816e-07 [pipeline_parallel_scheduler]: 1.05007e-06 [auto_monad_reorder]: 1.77003e-05 [get_jit_bprop_graph]: 4.00003e-07 [rewriter_after_jit_bprop_graph]: 2.70084e-07 [eliminate_special_op_node]: 0.0004395 [distribtued_split]: 9.59728e-07 [validate]: 3.18801e-05 [task_emit]: 6.29454 [execute]: 7.70995e-06 Sums bootstrap : 0.001366s : 0.02% type_inference : 0.014215s : 0.23% auto_monad : 0.000089s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000025s : 0.00% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000009s : 0.00% optimize.rewriter_before_opt_a : 0.000024s : 0.00% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000025s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000296s : 0.00% optimize.opt_a.recompute_prepare : 0.000009s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000009s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000004s : 0.00% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000123s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000010s : 0.00% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000004s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000016s : 0.00% optimize.opt_a.parallel : 0.000008s : 0.00% optimize.opt_a.flash_sp : 0.000017s : 0.00% optimize.opt_a.merge_comm : 0.000008s : 0.00% optimize.opt_a.allreduce_fusion : 0.000006s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000011s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000012s : 0.00% optimize.opt_a.virtual_dataset : 0.000010s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000005s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000018s : 0.00% optimize.opt_a.before_grad : 0.000015s : 0.00% optimize.opt_a.inplace_validation : 0.000005s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000006s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000014s : 0.00% optimize.opt_a.a_after_grad : 0.000014s : 0.00% optimize.opt_a.special_op_eliminate : 0.000010s : 0.00% optimize.opt_a.renormalize : 0.000294s : 0.00% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000013s : 0.00% optimize.opt_a.cse : 0.000080s : 0.00% optimize.opt_a.a_3 : 0.000057s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000006s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000076s : 0.00% optimize.convert_after_rewriter : 0.000005s : 0.00% optimize.order_py_execute_after_rewriter : 0.000003s : 0.00% optimize.opt_b.b_1 : 0.000080s : 0.00% optimize.opt_b.b_2 : 0.000005s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000008s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000005s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000013s : 0.00% optimize.loop_unroll : 0.000450s : 0.01% optimize.opt_after_cconv.c_1 : 0.000022s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000011s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000007s : 0.00% optimize.tuple_transform.d_1 : 0.000031s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.00% optimize.add_recomputation : 0.000034s : 0.00% optimize.cse_after_recomputation.cse : 0.000008s : 0.00% optimize.environ_conv : 0.000015s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000005s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000008s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000008s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000010s : 0.00% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000002s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000008s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000010s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000005s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000018s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000439s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000032s : 0.00% task_emit : 6.294543s : 99.71% execute : 0.000008s : 0.00% Time group info: ------[substitution.] 0.000064 20 2.01% : 0.000001s : 2: substitution.elim_not_effective 2.35% : 0.000001s : 2: substitution.fold_const_symbol 5.62% : 0.000004s : 3: substitution.graph_param_transform 60.23% : 0.000038s : 1: substitution.inline 4.61% : 0.000003s : 4: substitution.j_node_and_user_rematch 15.21% : 0.000010s : 2: substitution.reduce_all_const_elim 6.26% : 0.000004s : 4: substitution.remove_not_recompute_node 3.70% : 0.000002s : 2: substitution.replace_old_param ------[type_inference.] 0.014189 2 98.30% : 0.013948s : 1: type_inference.infer 1.70% : 0.000241s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000038 1 100.00% : 0.000038s : 1: match.inline ------[predicate.] 0.000120 740 0.75% : 0.000001s : 7: predicate.accumulaten_eliminater 1.08% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.72% : 0.000001s : 6: predicate.addn_check_dump 0.85% : 0.000001s : 7: predicate.addn_zero_filter 0.72% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.38% : 0.000003s : 13: predicate.arithmetic_simplify 0.89% : 0.000001s : 7: predicate.cast_eliminate 0.82% : 0.000001s : 6: predicate.check_bprop_eliminate 0.69% : 0.000001s : 6: predicate.compare_switch_simplify 0.23% : 0.000000s : 3: predicate.const_output_eliminate 0.42% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.46% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.80% : 0.000001s : 6: predicate.depend_value_elim 0.75% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.85% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.86% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.32% : 0.000000s : 3: predicate.elim_not_effective 0.63% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.20% : 0.000001s : 10: predicate.environ_add_const_eliminate 1.08% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.02% : 0.000001s : 10: predicate.environ_get_depend_swap 1.81% : 0.000002s : 16: predicate.environ_get_eliminate 1.11% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.88% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.45% : 0.000002s : 8: predicate.float_depend_g_call 0.65% : 0.000001s : 6: predicate.float_environ_get_switch 1.08% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.25% : 0.000000s : 3: predicate.fold_const_symbol 0.80% : 0.000001s : 6: predicate.get_grad_eliminate 0.35% : 0.000000s : 3: predicate.graph_param_transform 0.80% : 0.000001s : 6: predicate.incorporate_call 0.65% : 0.000001s : 6: predicate.incorporate_call_switch 5.81% : 0.000007s : 33: predicate.inline 1.00% : 0.000001s : 6: predicate.inline_without_move 0.43% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.18% : 0.000001s : 6: predicate.less_batch_normalization 1.77% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.07% : 0.000002s : 20: predicate.load_eliminater 1.36% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.66% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.82% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.73% : 0.000001s : 6: predicate.merge_addn 0.66% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.71% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.76% : 0.000001s : 7: predicate.minmaximum_grad 0.88% : 0.000001s : 3: predicate.mutable_eliminate 0.52% : 0.000001s : 3: predicate.opt_reshape 0.50% : 0.000001s : 3: predicate.parallel_virtual_node 1.29% : 0.000002s : 8: predicate.partial_defer_inline 1.23% : 0.000001s : 10: predicate.partial_eliminate 0.86% : 0.000001s : 7: predicate.print_const_string_wrapper 1.22% : 0.000001s : 6: predicate.reduce_all_const_elim 1.14% : 0.000001s : 7: predicate.reduce_eliminate 0.82% : 0.000001s : 6: predicate.remove_not_recompute_node 1.30% : 0.000002s : 13: predicate.replace_applicator 0.49% : 0.000001s : 6: predicate.replace_old_param 0.23% : 0.000000s : 3: predicate.reset_defer_inline 0.83% : 0.000001s : 7: predicate.reshape_eliminate 0.77% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 3: predicate.row_tensor_eliminate 1.07% : 0.000001s : 6: predicate.same_eliminate 0.54% : 0.000001s : 6: predicate.set_cell_output_no_recompute 1.13% : 0.000001s : 6: predicate.shard_identity_eliminate 1.28% : 0.000002s : 9: predicate.special_op_eliminate 0.96% : 0.000001s : 6: predicate.specialize_transform 0.89% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.11% : 0.000003s : 20: predicate.stopgrad_eliminater 0.42% : 0.000000s : 3: predicate.switch_call_monad_eliminater 0.93% : 0.000001s : 8: predicate.switch_defer_inline 1.80% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.66% : 0.000006s : 24: predicate.switch_simplify 0.83% : 0.000001s : 7: predicate.tile_eliminate 0.77% : 0.000001s : 7: predicate.transpose_eliminate 1.60% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.60% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.32% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.84% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.34% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.36% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.63% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.06% : 0.000002s : 20: predicate.updatestate_pure_node_eliminater 3.15% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 3: predicate.value_based_eliminate 0.93% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.81% : 0.000001s : 6: predicate.virtual_output_eliminate 0.55% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000139 4 6.11% : 0.000008s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.89% : 0.000131s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.332544 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.00% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.00% : 0.000038s : 1: add_recomputation 0.00% : 0.000011s : 1: assign_add_opt 0.00% : 0.000099s : 1: auto_monad 0.00% : 0.000024s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001410s : 1: bootstrap 0.00% : 0.000017s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000009s : 1: convert_after_rewriter 0.00% : 0.000020s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000006s : 1: distribtued_split 0.01% : 0.000450s : 1: eliminate_special_op_node 0.00% : 0.000021s : 1: environ_conv 0.00% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.000458s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.00% : 0.000009s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000593s : 80: opt.transform.opt_a 0.00% : 0.000020s : 1: opt.transform.opt_after_cconv 0.00% : 0.000069s : 27: opt.transform.opt_b 0.00% : 0.000030s : 1: opt.transform.opt_trans_graph 0.00% : 0.000019s : 3: opt.transform.special_op_eliminate 0.00% : 0.000028s : 4: opt.transform.symbol_engine_opt 0.14% : 0.009004s : 1: opt_a 0.00% : 0.000082s : 1: opt_after_cconv 0.00% : 0.000135s : 1: opt_b 0.16% : 0.010299s : 1: optimize 0.00% : 0.000008s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000006s : 1: order_py_execute_after_rewriter 0.00% : 0.000013s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000011s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.00% : 0.000006s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.00% : 0.000031s : 1: pre_auto_parallel 0.00% : 0.000013s : 1: py_interpret_to_execute 0.00% : 0.000010s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.00% : 0.000010s : 1: remove_dup_value 0.00% : 0.000153s : 1: renormalize.infer 0.00% : 0.000136s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000081s : 1: rewriter_after_opt_a 0.00% : 0.000028s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000008s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000064s : 1: symbol_engine_optimizer 99.40% : 6.294571s : 1: task_emit 0.00% : 0.000047s : 1: tuple_transform 0.22% : 0.014232s : 1: type_inference 0.00% : 0.000066s : 1: validate TotalTime = 6.34365, [21] [bootstrap]: 0.00134305 [type_inference]: 0.0146114 [auto_monad]: 9.46899e-05 [graph_reusing]: 2.23005e-06 [inline]: 1.81003e-06 [parallel-infer-symbol]: 1.29202e-05 [pre_auto_parallel]: 4.349e-05 [insert-virtual-dataset]: 3.43984e-06 [parallel-infer-symbol-second]: 5.49946e-07 [dataset_repeat_opt]: 1.62981e-06 [pipeline_split]: 1.51014e-06 [optimize]: 0.0105033, [52] [py_interpret_to_execute]: 1.34599e-05 [rewriter_before_opt_a]: 2.93399e-05 [opt_a]: 0.0090902, [2] [Cycle 1]: 0.00111338, [43] [expand_dump_flag]: 3.74997e-06 [switch_simplify]: 2.45296e-05 [loop_unroll]: 9.07015e-06 [a_1]: 0.00022488 [recompute_prepare]: 5.50039e-06 [updatestate_depend_eliminate]: 5.87991e-06 [updatestate_assign_eliminate]: 3.47989e-06 [updatestate_loads_eliminate]: 2.86987e-06 [parameter_eliminate]: 3.79002e-06 [a_2]: 7.94199e-05 [accelerated_algorithm]: 5.11995e-06 [shard]: 2.61003e-06 [meta_shard_fg_expand]: 2.92016e-06 [shard_inline]: 4.84986e-06 [auto_parallel]: 9.72021e-06 [parallel]: 8.55001e-06 [flash_sp]: 1.095e-05 [merge_comm]: 5.95022e-06 [allreduce_fusion]: 3.10969e-06 [matmul_add_comm_reduction]: 9.08971e-06 [allreduce_slice_to_reducescatter]: 5.20144e-07 [virtual_shard_identity]: 5.9898e-06 [virtual_dataset]: 4.80004e-06 [get_grad_eliminate_]: 4.44986e-06 [virtual_output]: 4.44986e-06 [merge_forward]: 4.31016e-06 [cell_reuse_recompute_pass]: 1.2801e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.04099e-05 [before_grad]: 1.091e-05 [inplace_validation]: 3.14042e-06 [meta_fg_expand]: 3.49013e-06 [inplace_validation_after_expand]: 4.10993e-06 [flash_sp_send_recv_attached]: 3.65032e-06 [receive_attached]: 2.68966e-06 [after_resolve]: 8.59983e-06 [a_after_grad]: 7.47014e-06 [special_op_eliminate]: 4.52995e-06 [renormalize]: 0.00030021 [add_forward_monad_depend]: 3.33972e-06 [auto_monad_grad]: 2.12993e-06 [auto_monad_eliminator]: 1.22897e-05 [cse]: 2.65101e-05 [a_3]: 3.26899e-05 [Cycle 2]: 0.00047403, [43] [expand_dump_flag]: 1.03004e-06 [switch_simplify]: 5.60982e-06 [loop_unroll]: 4.48013e-06 [a_1]: 9.005e-05 [recompute_prepare]: 4.0303e-06 [updatestate_depend_eliminate]: 3.37977e-06 [updatestate_assign_eliminate]: 2.25985e-06 [updatestate_loads_eliminate]: 2.08989e-06 [parameter_eliminate]: 1.00024e-06 [a_2]: 5.45797e-05 [accelerated_algorithm]: 4.88991e-06 [shard]: 1.23028e-06 [meta_shard_fg_expand]: 1.55997e-06 [shard_inline]: 4.68967e-06 [auto_parallel]: 7.81007e-06 [parallel]: 3.58978e-06 [flash_sp]: 5.5097e-06 [merge_comm]: 3.33041e-06 [allreduce_fusion]: 2.59001e-06 [matmul_add_comm_reduction]: 4.61005e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 5.11017e-06 [virtual_dataset]: 4.34974e-06 [get_grad_eliminate_]: 4.34974e-06 [virtual_output]: 4.12995e-06 [merge_forward]: 2.61981e-06 [cell_reuse_recompute_pass]: 1.34017e-06 [cell_reuse_handle_not_recompute_node_pass]: 9.11998e-06 [before_grad]: 6.88992e-06 [inplace_validation]: 2.23983e-06 [meta_fg_expand]: 2.4098e-06 [inplace_validation_after_expand]: 2.94996e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 6.50063e-07 [after_resolve]: 6.42007e-06 [a_after_grad]: 6.42985e-06 [special_op_eliminate]: 4.14019e-06 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 6.9011e-07 [auto_monad_grad]: 1.08033e-06 [auto_monad_eliminator]: 4.96022e-06 [cse]: 1.07102e-05 [a_3]: 2.527e-05 [py_interpret_to_execute_after_opt_a]: 6.98026e-06 [slice_cell_reuse_recomputed_activation]: 2.31992e-06 [rewriter_after_opt_a]: 8.17901e-05 [convert_after_rewriter]: 6.23008e-06 [order_py_execute_after_rewriter]: 4.29014e-06 [opt_b]: 0.00013749, [1] [Cycle 1]: 0.00013188, [7] [b_1]: 8.19997e-05 [b_2]: 5.9302e-06 [updatestate_depend_eliminate]: 2.61003e-06 [updatestate_assign_eliminate]: 2.21003e-06 [updatestate_loads_eliminate]: 2.02004e-06 [renormalize]: 2.30037e-07 [cse]: 9.37982e-06 [optimize_parallel_all_gather_comm]: 5.43008e-06 [overlap_param_gather]: 2.47965e-06 [cconv]: 2.23098e-05 [loop_unroll]: 0.0004881 [opt_after_cconv]: 8.39098e-05, [1] [Cycle 1]: 7.805e-05, [7] [c_1]: 2.23699e-05 [parameter_eliminate]: 2.52016e-06 [updatestate_depend_eliminate]: 5.23962e-06 [updatestate_assign_eliminate]: 2.46009e-06 [updatestate_loads_eliminate]: 2.07033e-06 [cse]: 1.35903e-05 [renormalize]: 4.4005e-07 [remove_dup_value]: 8.88994e-06 [tuple_transform]: 4.547e-05, [1] [Cycle 1]: 4.13498e-05, [2] [d_1]: 3.31099e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 2.40002e-06 [add_cache_embedding]: 1.057e-05 [add_recomputation]: 4.59598e-05 [cse_after_recomputation]: 1.78101e-05, [1] [Cycle 1]: 1.33803e-05, [1] [cse]: 8.78004e-06 [environ_conv]: 1.76798e-05 [swap_dp_allreduce_reducescatter]: 5.66989e-06 [bias_add_comm_swap]: 2.39024e-06 [label_micro_interleaved_index]: 2.12016e-06 [label_fine_grained_interleaved_index]: 2.02004e-06 [merge_cast_opt]: 1.39978e-06 [slice_recompute_activation]: 2.29012e-06 [micro_interleaved_order_control]: 1.74996e-06 [assign_add_opt]: 9.29972e-06 [ForceFp32Comm]: 1.41002e-06 [remove_cast_before_assign_add]: 9.69972e-07 [full_micro_interleaved_order_control]: 2.02004e-06 [reorder_send_recv_between_fp_bp]: 2.35997e-06 [comm_op_add_attrs]: 9.29926e-07 [add_comm_op_reuse_tag]: 1.07987e-06 [interleave_split_concat_branches]: 9.00123e-07 [interleave_parallel_branches]: 9.00123e-07 [overlap_opt_shard_in_pipeline]: 9.61963e-06 [overlap_opt_shard_grad_in_pipeline]: 2.34973e-06 [control_data_broadcast_order]: 1.14972e-06 [grouped_pairwise_exchange_alltoall]: 1.41002e-06 [offloading_packed_experts]: 1.13994e-06 [overlap_recompute_and_grad_model_parallel]: 2.2701e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.09785e-07 [overlap_recompute_allgather_and_fa_grad]: 1.22003e-06 [overlap_grad_ring_attention]: 2.22027e-06 [overlap_grad_flash_sp]: 1.28504e-05 [begin_end_overlap_inline]: 7.90227e-07 [split_matmul_comm_elemetwise]: 2.05031e-06 [split_layernorm_comm]: 1.95997e-06 [handle_group_info]: 1.02026e-06 [symbol_engine_optimizer]: 6.47698e-05, [1] [Cycle 1]: 6.03399e-05, [6] [build]: 2.15042e-06 [elim_shapecalc]: 7.53021e-06 [elim_not_effective]: 1.29901e-05 [opt_reshape]: 5.41983e-06 [fold_const_symbol]: 8.06991e-06 [renormalize]: 2.10013e-07 [pipeline_parallel_scheduler]: 1.3602e-06 [auto_monad_reorder]: 2.25999e-05 [get_jit_bprop_graph]: 4.89876e-07 [rewriter_after_jit_bprop_graph]: 5.60191e-07 [eliminate_special_op_node]: 0.00045706 [distribtued_split]: 1.59023e-06 [validate]: 4.86099e-05 [task_emit]: 6.31619 [execute]: 1.23e-05 Sums bootstrap : 0.001343s : 0.02% type_inference : 0.014611s : 0.23% auto_monad : 0.000095s : 0.00% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000013s : 0.00% pre_auto_parallel : 0.000043s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.00% optimize.rewriter_before_opt_a : 0.000029s : 0.00% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000030s : 0.00% optimize.opt_a.loop_unroll : 0.000014s : 0.00% optimize.opt_a.a_1 : 0.000315s : 0.00% optimize.opt_a.recompute_prepare : 0.000010s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000009s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000134s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000010s : 0.00% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000004s : 0.00% optimize.opt_a.shard_inline : 0.000010s : 0.00% optimize.opt_a.auto_parallel : 0.000018s : 0.00% optimize.opt_a.parallel : 0.000012s : 0.00% optimize.opt_a.flash_sp : 0.000016s : 0.00% optimize.opt_a.merge_comm : 0.000009s : 0.00% optimize.opt_a.allreduce_fusion : 0.000006s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000014s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000011s : 0.00% optimize.opt_a.virtual_dataset : 0.000009s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000009s : 0.00% optimize.opt_a.virtual_output : 0.000009s : 0.00% optimize.opt_a.merge_forward : 0.000007s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000020s : 0.00% optimize.opt_a.before_grad : 0.000018s : 0.00% optimize.opt_a.inplace_validation : 0.000005s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000015s : 0.00% optimize.opt_a.a_after_grad : 0.000014s : 0.00% optimize.opt_a.special_op_eliminate : 0.000009s : 0.00% optimize.opt_a.renormalize : 0.000300s : 0.00% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000017s : 0.00% optimize.opt_a.cse : 0.000037s : 0.00% optimize.opt_a.a_3 : 0.000058s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000007s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000082s : 0.00% optimize.convert_after_rewriter : 0.000006s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000082s : 0.00% optimize.opt_b.b_2 : 0.000006s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000009s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000005s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000022s : 0.00% optimize.loop_unroll : 0.000488s : 0.01% optimize.opt_after_cconv.c_1 : 0.000022s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000014s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.00% optimize.tuple_transform.d_1 : 0.000033s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.00% optimize.add_recomputation : 0.000046s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000018s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000009s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000010s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000002s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000008s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000013s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000005s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000008s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000457s : 0.01% distribtued_split : 0.000002s : 0.00% validate : 0.000049s : 0.00% task_emit : 6.316189s : 99.70% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.000092 20 4.28% : 0.000004s : 2: substitution.elim_not_effective 1.71% : 0.000002s : 2: substitution.fold_const_symbol 5.31% : 0.000005s : 3: substitution.graph_param_transform 61.19% : 0.000057s : 1: substitution.inline 3.82% : 0.000004s : 4: substitution.j_node_and_user_rematch 15.75% : 0.000015s : 2: substitution.reduce_all_const_elim 4.56% : 0.000004s : 4: substitution.remove_not_recompute_node 3.38% : 0.000003s : 2: substitution.replace_old_param ------[type_inference.] 0.014577 2 97.93% : 0.014276s : 1: type_inference.infer 2.07% : 0.000301s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000056 1 100.00% : 0.000056s : 1: match.inline ------[predicate.] 0.000119 740 0.82% : 0.000001s : 7: predicate.accumulaten_eliminater 1.10% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.66% : 0.000001s : 6: predicate.addn_check_dump 0.78% : 0.000001s : 7: predicate.addn_zero_filter 0.69% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.10% : 0.000002s : 13: predicate.arithmetic_simplify 0.74% : 0.000001s : 7: predicate.cast_eliminate 0.77% : 0.000001s : 6: predicate.check_bprop_eliminate 0.70% : 0.000001s : 6: predicate.compare_switch_simplify 0.24% : 0.000000s : 3: predicate.const_output_eliminate 0.51% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.61% : 0.000002s : 7: predicate.convert_tensor_eliminate 0.74% : 0.000001s : 6: predicate.depend_value_elim 0.78% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.88% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.85% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.37% : 0.000000s : 3: predicate.elim_not_effective 0.61% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000001s : 10: predicate.environ_add_const_eliminate 1.02% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.01% : 0.000001s : 10: predicate.environ_get_depend_swap 1.83% : 0.000002s : 16: predicate.environ_get_eliminate 1.03% : 0.000001s : 10: predicate.environ_get_set_eliminate 0.85% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.63% : 0.000002s : 8: predicate.float_depend_g_call 0.65% : 0.000001s : 6: predicate.float_environ_get_switch 1.05% : 0.000001s : 9: predicate.float_tuple_getitem_switch 0.27% : 0.000000s : 3: predicate.fold_const_symbol 0.82% : 0.000001s : 6: predicate.get_grad_eliminate 0.43% : 0.000001s : 3: predicate.graph_param_transform 0.78% : 0.000001s : 6: predicate.incorporate_call 0.67% : 0.000001s : 6: predicate.incorporate_call_switch 6.04% : 0.000007s : 33: predicate.inline 1.13% : 0.000001s : 6: predicate.inline_without_move 0.44% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.19% : 0.000001s : 6: predicate.less_batch_normalization 1.66% : 0.000002s : 13: predicate.list_to_tuple_eliminator_ 2.10% : 0.000002s : 20: predicate.load_eliminater 1.56% : 0.000002s : 3: predicate.loop_unroll_after_grad 1.52% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.95% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.72% : 0.000001s : 6: predicate.merge_addn 0.72% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.73% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.75% : 0.000001s : 7: predicate.minmaximum_grad 1.06% : 0.000001s : 3: predicate.mutable_eliminate 0.49% : 0.000001s : 3: predicate.opt_reshape 0.46% : 0.000001s : 3: predicate.parallel_virtual_node 1.60% : 0.000002s : 8: predicate.partial_defer_inline 1.19% : 0.000001s : 10: predicate.partial_eliminate 0.77% : 0.000001s : 7: predicate.print_const_string_wrapper 1.07% : 0.000001s : 6: predicate.reduce_all_const_elim 0.97% : 0.000001s : 7: predicate.reduce_eliminate 0.83% : 0.000001s : 6: predicate.remove_not_recompute_node 1.25% : 0.000001s : 13: predicate.replace_applicator 0.47% : 0.000001s : 6: predicate.replace_old_param 0.27% : 0.000000s : 3: predicate.reset_defer_inline 0.80% : 0.000001s : 7: predicate.reshape_eliminate 0.78% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 3: predicate.row_tensor_eliminate 1.02% : 0.000001s : 6: predicate.same_eliminate 0.49% : 0.000001s : 6: predicate.set_cell_output_no_recompute 0.98% : 0.000001s : 6: predicate.shard_identity_eliminate 1.36% : 0.000002s : 9: predicate.special_op_eliminate 1.06% : 0.000001s : 6: predicate.specialize_transform 1.10% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 1.02% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.11% : 0.000003s : 20: predicate.stopgrad_eliminater 0.44% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.99% : 0.000001s : 8: predicate.switch_defer_inline 1.68% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.94% : 0.000006s : 24: predicate.switch_simplify 0.73% : 0.000001s : 7: predicate.tile_eliminate 0.84% : 0.000001s : 7: predicate.transpose_eliminate 1.65% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.48% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.28% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.72% : 0.000003s : 19: predicate.tuple_list_get_item_eliminator 1.35% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.30% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.65% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.05% : 0.000002s : 20: predicate.updatestate_pure_node_eliminater 2.90% : 0.000003s : 26: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 3: predicate.value_based_eliminate 0.86% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.81% : 0.000001s : 6: predicate.virtual_output_eliminate 0.48% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000130 4 12.19% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 87.81% : 0.000114s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.355159 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000050s : 1: add_recomputation 0.00% : 0.000012s : 1: assign_add_opt 0.00% : 0.000106s : 1: auto_monad 0.00% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001397s : 1: bootstrap 0.00% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000010s : 1: convert_after_rewriter 0.00% : 0.000021s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.000470s : 1: eliminate_special_op_node 0.00% : 0.000022s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000498s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000010s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000628s : 80: opt.transform.opt_a 0.00% : 0.000021s : 1: opt.transform.opt_after_cconv 0.00% : 0.000071s : 27: opt.transform.opt_b 0.00% : 0.000032s : 1: opt.transform.opt_trans_graph 0.00% : 0.000019s : 3: opt.transform.special_op_eliminate 0.00% : 0.000031s : 4: opt.transform.symbol_engine_opt 0.14% : 0.009094s : 1: opt_a 0.00% : 0.000088s : 1: opt_after_cconv 0.00% : 0.000140s : 1: opt_b 0.17% : 0.010511s : 1: optimize 0.00% : 0.000009s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000007s : 1: order_py_execute_after_rewriter 0.00% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000013s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000019s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000006s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000049s : 1: pre_auto_parallel 0.00% : 0.000018s : 1: py_interpret_to_execute 0.00% : 0.000011s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000012s : 1: remove_dup_value 0.00% : 0.000169s : 1: renormalize.infer 0.00% : 0.000126s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000087s : 1: rewriter_after_opt_a 0.00% : 0.000034s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000068s : 1: symbol_engine_optimizer 99.39% : 6.316230s : 1: task_emit 0.00% : 0.000048s : 1: tuple_transform 0.23% : 0.014632s : 1: type_inference 0.00% : 0.000093s : 1: validate TotalTime = 6.36108, [21] [bootstrap]: 0.00140158 [type_inference]: 0.0144673 [auto_monad]: 9.17898e-05 [graph_reusing]: 1.41002e-06 [inline]: 1.59023e-06 [parallel-infer-symbol]: 8.4904e-06 [pre_auto_parallel]: 3.45502e-05 [insert-virtual-dataset]: 2.16998e-06 [parallel-infer-symbol-second]: 5.0012e-07 [dataset_repeat_opt]: 1.21025e-06 [pipeline_split]: 1.00024e-06 [optimize]: 0.0107935, [52] [py_interpret_to_execute]: 1.08201e-05 [rewriter_before_opt_a]: 3.395e-05 [opt_a]: 0.0093829, [2] [Cycle 1]: 0.00121696, [43] [expand_dump_flag]: 2.63983e-06 [switch_simplify]: 2.24099e-05 [loop_unroll]: 1.17603e-05 [a_1]: 0.00025033 [recompute_prepare]: 6.47968e-06 [updatestate_depend_eliminate]: 5.26989e-06 [updatestate_assign_eliminate]: 3.11015e-06 [updatestate_loads_eliminate]: 2.43029e-06 [parameter_eliminate]: 3.13995e-06 [a_2]: 8.57203e-05 [accelerated_algorithm]: 6.37025e-06 [shard]: 1.89012e-06 [meta_shard_fg_expand]: 2.42004e-06 [shard_inline]: 6.06989e-06 [auto_parallel]: 9.6201e-06 [parallel]: 1.24602e-05 [flash_sp]: 8.17003e-06 [merge_comm]: 1.13398e-05 [allreduce_fusion]: 3.80026e-06 [matmul_add_comm_reduction]: 6.90995e-06 [allreduce_slice_to_reducescatter]: 3.70201e-07 [virtual_shard_identity]: 6.82985e-06 [virtual_dataset]: 5.85988e-06 [get_grad_eliminate_]: 5.44963e-06 [virtual_output]: 5.5898e-06 [merge_forward]: 3.49013e-06 [cell_reuse_recompute_pass]: 1.70013e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.05301e-05 [before_grad]: 9.98983e-06 [inplace_validation]: 3.09013e-06 [meta_fg_expand]: 3.43984e-06 [inplace_validation_after_expand]: 4.17e-06 [flash_sp_send_recv_attached]: 3.03006e-06 [receive_attached]: 8.44989e-06 [after_resolve]: 9.20985e-06 [a_after_grad]: 8.93977e-06 [special_op_eliminate]: 5.89993e-06 [renormalize]: 0.00033964 [add_forward_monad_depend]: 2.27988e-06 [auto_monad_grad]: 1.79e-06 [auto_monad_eliminator]: 8.63988e-06 [cse]: 1.81501e-05 [a_3]: 3.732e-05 [Cycle 2]: 0.00057844, [43] [expand_dump_flag]: 8.60076e-07 [switch_simplify]: 6.98958e-06 [loop_unroll]: 5.54044e-06 [a_1]: 0.00013781 [recompute_prepare]: 5.32996e-06 [updatestate_depend_eliminate]: 3.18e-06 [updatestate_assign_eliminate]: 2.25985e-06 [updatestate_loads_eliminate]: 2.11969e-06 [parameter_eliminate]: 9.49949e-07 [a_2]: 6.617e-05 [accelerated_algorithm]: 5.9302e-06 [shard]: 1.15996e-06 [meta_shard_fg_expand]: 1.51992e-06 [shard_inline]: 5.77001e-06 [auto_parallel]: 7.52974e-06 [parallel]: 3.46033e-06 [flash_sp]: 4.78001e-06 [merge_comm]: 4.1998e-06 [allreduce_fusion]: 3.40026e-06 [matmul_add_comm_reduction]: 4.33018e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 6.37025e-06 [virtual_dataset]: 5.24987e-06 [get_grad_eliminate_]: 5.39003e-06 [virtual_output]: 5.01983e-06 [merge_forward]: 3.05008e-06 [cell_reuse_recompute_pass]: 1.60979e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.15102e-05 [before_grad]: 8.92999e-06 [inplace_validation]: 2.35997e-06 [meta_fg_expand]: 2.84007e-06 [inplace_validation_after_expand]: 2.98023e-06 [flash_sp_send_recv_attached]: 9.19681e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 7.72998e-06 [a_after_grad]: 8.42009e-06 [special_op_eliminate]: 5.30994e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.49715e-07 [auto_monad_grad]: 8.00006e-07 [auto_monad_eliminator]: 5.29969e-06 [cse]: 9.38028e-06 [a_3]: 3.16296e-05 [py_interpret_to_execute_after_opt_a]: 6.93975e-06 [slice_cell_reuse_recomputed_activation]: 1.81003e-06 [rewriter_after_opt_a]: 6.38803e-05 [convert_after_rewriter]: 5.20982e-06 [order_py_execute_after_rewriter]: 3.81982e-06 [opt_b]: 0.00016302, [1] [Cycle 1]: 0.0001576, [7] [b_1]: 9.92799e-05 [b_2]: 7.39982e-06 [updatestate_depend_eliminate]: 3.12971e-06 [updatestate_assign_eliminate]: 2.31992e-06 [updatestate_loads_eliminate]: 2.00002e-06 [renormalize]: 2.10013e-07 [cse]: 9.61032e-06 [optimize_parallel_all_gather_comm]: 5.11017e-06 [overlap_param_gather]: 2.6701e-06 [cconv]: 1.49598e-05 [loop_unroll]: 0.00048002 [opt_after_cconv]: 8.78102e-05, [1] [Cycle 1]: 8.23201e-05, [7] [c_1]: 2.65203e-05 [parameter_eliminate]: 1.77976e-06 [updatestate_depend_eliminate]: 4.21982e-06 [updatestate_assign_eliminate]: 2.44007e-06 [updatestate_loads_eliminate]: 2.02982e-06 [cse]: 1.24597e-05 [renormalize]: 2.99886e-07 [remove_dup_value]: 6.96024e-06 [tuple_transform]: 5.04698e-05, [1] [Cycle 1]: 4.58905e-05, [2] [d_1]: 3.63402e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 1.42027e-06 [add_cache_embedding]: 8.78004e-06 [add_recomputation]: 3.84799e-05 [cse_after_recomputation]: 1.847e-05, [1] [Cycle 1]: 1.38399e-05, [1] [cse]: 8.74978e-06 [environ_conv]: 1.65198e-05 [swap_dp_allreduce_reducescatter]: 4.40003e-06 [bias_add_comm_swap]: 1.83005e-06 [label_micro_interleaved_index]: 1.79e-06 [label_fine_grained_interleaved_index]: 1.45007e-06 [merge_cast_opt]: 1.51992e-06 [slice_recompute_activation]: 1.28988e-06 [micro_interleaved_order_control]: 1.45985e-06 [assign_add_opt]: 8.82987e-06 [ForceFp32Comm]: 7.59959e-07 [remove_cast_before_assign_add]: 6.70087e-07 [full_micro_interleaved_order_control]: 1.5297e-06 [reorder_send_recv_between_fp_bp]: 1.34995e-06 [comm_op_add_attrs]: 7.09668e-07 [add_comm_op_reuse_tag]: 1.0198e-06 [interleave_split_concat_branches]: 6.3004e-07 [interleave_parallel_branches]: 7.39936e-07 [overlap_opt_shard_in_pipeline]: 7.66013e-06 [overlap_opt_shard_grad_in_pipeline]: 1.59023e-06 [control_data_broadcast_order]: 7.59959e-07 [grouped_pairwise_exchange_alltoall]: 1.08965e-06 [offloading_packed_experts]: 8.00006e-07 [overlap_recompute_and_grad_model_parallel]: 1.43005e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.20144e-07 [overlap_recompute_allgather_and_fa_grad]: 7.19912e-07 [overlap_grad_ring_attention]: 1.27964e-06 [overlap_grad_flash_sp]: 1.08802e-05 [begin_end_overlap_inline]: 5.09899e-07 [split_matmul_comm_elemetwise]: 1.73971e-06 [split_layernorm_comm]: 1.24983e-06 [handle_group_info]: 6.39819e-07 [symbol_engine_optimizer]: 7.23703e-05, [1] [Cycle 1]: 6.75498e-05, [6] [build]: 1.95997e-06 [elim_shapecalc]: 1.24597e-05 [elim_not_effective]: 1.10799e-05 [opt_reshape]: 6.08992e-06 [fold_const_symbol]: 9.66014e-06 [renormalize]: 2.99886e-07 [pipeline_parallel_scheduler]: 1.72993e-06 [auto_monad_reorder]: 1.62697e-05 [get_jit_bprop_graph]: 3.59956e-07 [rewriter_after_jit_bprop_graph]: 3.1013e-07 [eliminate_special_op_node]: 0.00046897 [distribtued_split]: 1.19023e-06 [validate]: 3.32203e-05 [task_emit]: 6.33347 [execute]: 8.90996e-06 Sums bootstrap : 0.001402s : 0.02% type_inference : 0.014467s : 0.23% auto_monad : 0.000092s : 0.00% graph_reusing : 0.000001s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000008s : 0.00% pre_auto_parallel : 0.000035s : 0.00% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000011s : 0.00% optimize.rewriter_before_opt_a : 0.000034s : 0.00% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000029s : 0.00% optimize.opt_a.loop_unroll : 0.000017s : 0.00% optimize.opt_a.a_1 : 0.000388s : 0.01% optimize.opt_a.recompute_prepare : 0.000012s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000008s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000152s : 0.00% optimize.opt_a.accelerated_algorithm : 0.000012s : 0.00% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000004s : 0.00% optimize.opt_a.shard_inline : 0.000012s : 0.00% optimize.opt_a.auto_parallel : 0.000017s : 0.00% optimize.opt_a.parallel : 0.000016s : 0.00% optimize.opt_a.flash_sp : 0.000013s : 0.00% optimize.opt_a.merge_comm : 0.000016s : 0.00% optimize.opt_a.allreduce_fusion : 0.000007s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000011s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000013s : 0.00% optimize.opt_a.virtual_dataset : 0.000011s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000011s : 0.00% optimize.opt_a.virtual_output : 0.000011s : 0.00% optimize.opt_a.merge_forward : 0.000007s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.00% optimize.opt_a.before_grad : 0.000019s : 0.00% optimize.opt_a.inplace_validation : 0.000005s : 0.00% optimize.opt_a.meta_fg_expand : 0.000006s : 0.00% optimize.opt_a.inplace_validation_after_expand : 0.000007s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000009s : 0.00% optimize.opt_a.after_resolve : 0.000017s : 0.00% optimize.opt_a.a_after_grad : 0.000017s : 0.00% optimize.opt_a.special_op_eliminate : 0.000011s : 0.00% optimize.opt_a.renormalize : 0.000340s : 0.01% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000014s : 0.00% optimize.opt_a.cse : 0.000028s : 0.00% optimize.opt_a.a_3 : 0.000069s : 0.00% optimize.py_interpret_to_execute_after_opt_a : 0.000007s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000064s : 0.00% optimize.convert_after_rewriter : 0.000005s : 0.00% optimize.order_py_execute_after_rewriter : 0.000004s : 0.00% optimize.opt_b.b_1 : 0.000099s : 0.00% optimize.opt_b.b_2 : 0.000007s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000003s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000010s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000005s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000015s : 0.00% optimize.loop_unroll : 0.000480s : 0.01% optimize.opt_after_cconv.c_1 : 0.000027s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.cse : 0.000012s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000007s : 0.00% optimize.tuple_transform.d_1 : 0.000036s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000009s : 0.00% optimize.add_recomputation : 0.000038s : 0.00% optimize.cse_after_recomputation.cse : 0.000009s : 0.00% optimize.environ_conv : 0.000017s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000004s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000009s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000008s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000002s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000011s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000006s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000010s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000016s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000469s : 0.01% distribtued_split : 0.000001s : 0.00% validate : 0.000033s : 0.00% task_emit : 6.333469s : 99.70% execute : 0.000009s : 0.00% Time group info: ------[substitution.] 0.000079 20 1.98% : 0.000002s : 2: substitution.elim_not_effective 2.00% : 0.000002s : 2: substitution.fold_const_symbol 5.69% : 0.000004s : 3: substitution.graph_param_transform 52.05% : 0.000041s : 1: substitution.inline 4.40% : 0.000003s : 4: substitution.j_node_and_user_rematch 13.33% : 0.000011s : 2: substitution.reduce_all_const_elim 17.03% : 0.000013s : 4: substitution.remove_not_recompute_node 3.51% : 0.000003s : 2: substitution.replace_old_param ------[type_inference.] 0.014440 2 98.25% : 0.014187s : 1: type_inference.infer 1.75% : 0.000253s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000040 1 100.00% : 0.000040s : 1: match.inline ------[predicate.] 0.000142 740 0.82% : 0.000001s : 7: predicate.accumulaten_eliminater 0.88% : 0.000001s : 3: predicate.ad_related_special_op_eliminate 0.73% : 0.000001s : 6: predicate.addn_check_dump 0.78% : 0.000001s : 7: predicate.addn_zero_filter 0.70% : 0.000001s : 7: predicate.adjust_all_reduce_mul_add 2.06% : 0.000003s : 13: predicate.arithmetic_simplify 0.91% : 0.000001s : 7: predicate.cast_eliminate 0.75% : 0.000001s : 6: predicate.check_bprop_eliminate 0.73% : 0.000001s : 6: predicate.compare_switch_simplify 0.25% : 0.000000s : 3: predicate.const_output_eliminate 0.50% : 0.000001s : 3: predicate.convert_tensor_all_eliminate 1.84% : 0.000003s : 7: predicate.convert_tensor_eliminate 0.78% : 0.000001s : 6: predicate.depend_value_elim 0.83% : 0.000001s : 7: predicate.dict_get_item_const_eliminator 0.87% : 0.000001s : 7: predicate.dict_get_item_eliminator 0.85% : 0.000001s : 7: predicate.dict_set_item_eliminator 0.31% : 0.000000s : 3: predicate.elim_not_effective 0.59% : 0.000001s : 3: predicate.elim_shapecalc_of_broadcastargs 1.34% : 0.000002s : 10: predicate.environ_add_const_eliminate 1.05% : 0.000001s : 10: predicate.environ_get_add_eliminate 1.12% : 0.000002s : 10: predicate.environ_get_depend_swap 1.81% : 0.000003s : 16: predicate.environ_get_eliminate 1.10% : 0.000002s : 10: predicate.environ_get_set_eliminate 0.90% : 0.000001s : 8: predicate.exchange_switch_depend_value 1.58% : 0.000002s : 8: predicate.float_depend_g_call 0.69% : 0.000001s : 6: predicate.float_environ_get_switch 1.14% : 0.000002s : 9: predicate.float_tuple_getitem_switch 0.24% : 0.000000s : 3: predicate.fold_const_symbol 0.89% : 0.000001s : 6: predicate.get_grad_eliminate 0.28% : 0.000000s : 3: predicate.graph_param_transform 0.80% : 0.000001s : 6: predicate.incorporate_call 0.71% : 0.000001s : 6: predicate.incorporate_call_switch 6.16% : 0.000009s : 33: predicate.inline 1.10% : 0.000002s : 6: predicate.inline_without_move 0.44% : 0.000001s : 6: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 6: predicate.less_batch_normalization 1.82% : 0.000003s : 13: predicate.list_to_tuple_eliminator_ 2.19% : 0.000003s : 20: predicate.load_eliminater 1.03% : 0.000001s : 3: predicate.loop_unroll_after_grad 1.64% : 0.000002s : 10: predicate.loop_unroll_before_grad 1.70% : 0.000002s : 13: predicate.make_slice_get_slice_eliminator 0.77% : 0.000001s : 6: predicate.merge_addn 0.73% : 0.000001s : 6: predicate.micro_step_allgather_replace 0.73% : 0.000001s : 6: predicate.mini_step_allgather_replace 0.72% : 0.000001s : 7: predicate.minmaximum_grad 0.81% : 0.000001s : 3: predicate.mutable_eliminate 0.45% : 0.000001s : 3: predicate.opt_reshape 0.44% : 0.000001s : 3: predicate.parallel_virtual_node 1.32% : 0.000002s : 8: predicate.partial_defer_inline 1.28% : 0.000002s : 10: predicate.partial_eliminate 0.84% : 0.000001s : 7: predicate.print_const_string_wrapper 0.89% : 0.000001s : 6: predicate.reduce_all_const_elim 1.05% : 0.000001s : 7: predicate.reduce_eliminate 0.80% : 0.000001s : 6: predicate.remove_not_recompute_node 1.22% : 0.000002s : 13: predicate.replace_applicator 0.52% : 0.000001s : 6: predicate.replace_old_param 0.25% : 0.000000s : 3: predicate.reset_defer_inline 0.78% : 0.000001s : 7: predicate.reshape_eliminate 0.81% : 0.000001s : 6: predicate.row_tensor_add_zeros_like 0.56% : 0.000001s : 3: predicate.row_tensor_eliminate 1.03% : 0.000001s : 6: predicate.same_eliminate 0.57% : 0.000001s : 6: predicate.set_cell_output_no_recompute 1.09% : 0.000002s : 6: predicate.shard_identity_eliminate 1.56% : 0.000002s : 9: predicate.special_op_eliminate 1.06% : 0.000001s : 6: predicate.specialize_transform 0.97% : 0.000001s : 6: predicate.split_environ_get_set_with_tuple_value 1.06% : 0.000001s : 6: predicate.stack_unstack_eliminate 2.17% : 0.000003s : 20: predicate.stopgrad_eliminater 0.40% : 0.000001s : 3: predicate.switch_call_monad_eliminater 0.95% : 0.000001s : 8: predicate.switch_defer_inline 1.73% : 0.000002s : 14: predicate.switch_layer_defer_inline 4.28% : 0.000006s : 24: predicate.switch_simplify 0.83% : 0.000001s : 7: predicate.tile_eliminate 0.78% : 0.000001s : 7: predicate.transpose_eliminate 1.57% : 0.000002s : 13: predicate.tuple_list_convert_item_index_to_positive 1.65% : 0.000002s : 13: predicate.tuple_list_get_item_const_eliminator 1.34% : 0.000002s : 13: predicate.tuple_list_get_item_depend_reorder 2.63% : 0.000004s : 19: predicate.tuple_list_get_item_eliminator 1.44% : 0.000002s : 13: predicate.tuple_list_get_set_item_eliminator 2.43% : 0.000003s : 19: predicate.tuple_list_set_item_eliminator 1.64% : 0.000002s : 13: predicate.tuple_to_list_eliminator_ 2.15% : 0.000003s : 20: predicate.updatestate_pure_node_eliminater 3.05% : 0.000004s : 26: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 3: predicate.value_based_eliminate 0.85% : 0.000001s : 6: predicate.virtual_dataset_eliminate 0.80% : 0.000001s : 6: predicate.virtual_output_eliminate 0.52% : 0.000001s : 3: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000155 4 6.21% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.79% : 0.000146s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 6.373080 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000013s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000043s : 1: add_recomputation 0.00% : 0.000012s : 1: assign_add_opt 0.00% : 0.000102s : 1: auto_monad 0.00% : 0.000023s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.02% : 0.001448s : 1: bootstrap 0.00% : 0.000019s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000009s : 1: convert_after_rewriter 0.00% : 0.000022s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000007s : 1: distribtued_split 0.01% : 0.000480s : 1: eliminate_special_op_node 0.00% : 0.000021s : 1: environ_conv 0.00% : 0.000017s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000005s : 1: get_jit_bprop_graph 0.00% : 0.000007s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000488s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000009s : 1: opt.transform.loop_unroll_optimizer 0.01% : 0.000758s : 80: opt.transform.opt_a 0.00% : 0.000025s : 1: opt.transform.opt_after_cconv 0.00% : 0.000086s : 27: opt.transform.opt_b 0.00% : 0.000035s : 1: opt.transform.opt_trans_graph 0.00% : 0.000025s : 3: opt.transform.special_op_eliminate 0.00% : 0.000035s : 4: opt.transform.symbol_engine_opt 0.15% : 0.009387s : 1: opt_a 0.00% : 0.000091s : 1: opt_after_cconv 0.00% : 0.000166s : 1: opt_b 0.17% : 0.010802s : 1: optimize 0.00% : 0.000008s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000007s : 1: order_py_execute_after_rewriter 0.00% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000011s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000014s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.00% : 0.000041s : 1: pre_auto_parallel 0.00% : 0.000015s : 1: py_interpret_to_execute 0.00% : 0.000011s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000011s : 1: remove_dup_value 0.00% : 0.000171s : 1: renormalize.infer 0.00% : 0.000163s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.00% : 0.000069s : 1: rewriter_after_opt_a 0.00% : 0.000038s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000008s : 1: swap_dp_allreduce_reducescatter 0.00% : 0.000075s : 1: symbol_engine_optimizer 99.38% : 6.333503s : 1: task_emit 0.00% : 0.000054s : 1: tuple_transform 0.23% : 0.014490s : 1: type_inference 0.00% : 0.000071s : 1: validate ........ =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") =============================== warnings summary =============================== =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 =============================== warnings summary =============================== =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul")=============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") ======================= 1 passed, 18 warnings in 17.54s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") ======================= 1 passed, 18 warnings in 17.55s ========================-- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 17.55s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 ======================= 1 passed, 18 warnings in 17.55s ======================== ======================= 1 passed, 18 warnings in 17.55s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 17.56s ======================== ======================= 1 passed, 18 warnings in 17.54s ======================== ======================= 1 passed, 18 warnings in 17.54s ======================== [WARNING] DEVICE(59419,ffffa377ec10,python3.7):2025-02-07-15:55:38.815.885 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x464816b0 is not exist. [WARNING] DEVICE(59420,ffffb14e2c10,python3.7):2025-02-07-15:55:38.865.792 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x29462eb0 is not exist. [WARNING] DEVICE(59425,ffffb19bbc10,python3.7):2025-02-07-15:55:38.889.956 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2d129dc0 is not exist. [WARNING] DEVICE(59424,ffffb0772c10,python3.7):2025-02-07-15:55:38.917.172 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x1f58eae0 is not exist. [WARNING] DEVICE(59426,ffff95f09c10,python3.7):2025-02-07-15:55:38.956.664 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4afd5660 is not exist. [WARNING] DEVICE(59421,ffff99b7fc10,python3.7):2025-02-07-15:55:38.965.347 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3b3d2000 is not exist. [WARNING] DEVICE(59422,ffffa47d7c10,python3.7):2025-02-07-15:55:38.991.470 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2e33a240 is not exist. [WARNING] DEVICE(59423,ffff948efc10,python3.7):2025-02-07-15:55:42.814.949 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x54556a20 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 41.56s ======================== ff8c39f2e51611efac92c4447d93fe45/pass/test_remove_redundancy_test_remove_redundancy_1_1_dp.log0000644000175400017540001414041514751343157031653 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_remove_redundancy.py [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:58.529.799 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:58.660.631 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:58.800.068 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:58.941.459 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:59.918.09 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:worker_4.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:59.246.350 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:worker_5.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:59.401.675 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:worker_6.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:59.562.289 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:worker_7.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] ME(32753:281473065438224,MainProcess):2025-02-07-15:49:59.719.212 [mindspore/parallel/cluster/process_entity/_api.py:223] Distributed job is spawned. Waiting all processes to exit... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:04.659.696 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49932, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32811,ffff014d40f0,python3.7):2025-02-07-15:50:04.659.696 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49932 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:04.659.785 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(32835,ffff1374d0f0,python3.7):2025-02-07-15:50:04.913.987 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49936 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:04.913.987 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49936, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:04.914.215 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49938, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:04.914.249 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(32835,ffff1474f0f0,python3.7):2025-02-07-15:50:04.914.251 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49938 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32826,ffff12b8a0f0,python3.7):2025-02-07-15:50:04.957.860 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49940 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:04.957.852 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49940, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:04.958.032 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49942, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32826,ffff13b8c0f0,python3.7):2025-02-07-15:50:04.958.059 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49942 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:04.958.083 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:05.160.086 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49944, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:05.160.136 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(32811,ffff024d60f0,python3.7):2025-02-07-15:50:05.160.135 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49944 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32856,ffff28e940f0,python3.7):2025-02-07-15:50:05.246.423 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49946 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:05.246.421 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49946, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:05.246.588 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49948, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32856,ffff29e960f0,python3.7):2025-02-07-15:50:05.246.622 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49948 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:05.246.631 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:05.314.093 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49950, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32868,ffff2f7ac0f0,python3.7):2025-02-07-15:50:05.314.094 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49950 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:05.314.216 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:05.414.995 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:05.458.536 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(32881,ffff1d2b30f0,python3.7):2025-02-07-15:50:05.570.711 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49954 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:05.570.711 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49954, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:05.570.884 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49956, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32881,ffff1e2b50f0,python3.7):2025-02-07-15:50:05.570.910 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49956 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:05.570.926 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:05.660.700 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:05.664.647 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49958, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32916,ffff29ad50f0,python3.7):2025-02-07-15:50:05.664.648 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49958 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:05.664.708 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(32903,fffefbfff0f0,python3.7):2025-02-07-15:50:05.693.021 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49960 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:05.693.021 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:49960, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:05.693.193 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49962, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:05.693.235 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(32903,ffff012b50f0,python3.7):2025-02-07-15:50:05.693.233 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49962 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:05.747.022 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:05.814.392 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49964, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:05.814.420 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(32868,ffff307ae0f0,python3.7):2025-02-07-15:50:05.814.444 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49964 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:05.915.125 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:05.958.629 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:06.071.304 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:06.160.793 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:06.164.852 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:49966, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:06.164.874 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(32916,ffff2aad70f0,python3.7):2025-02-07-15:50:06.164.883 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:49966 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:06.193.615 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:06.247.109 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:06.314.808 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:06.415.228 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:06.458.715 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:06.571.392 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:06.660.869 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:06.665.247 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:06.693.706 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:06.747.193 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:06.814.894 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:06.915.315 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:06.958.799 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:07.071.476 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:07.160.943 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:07.165.346 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:07.165.372 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 7 rank id: 7 [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:07.193.808 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:07.193.840 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 6 rank id: 6 [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:07.247.289 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:07.247.319 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 3 rank id: 3 [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:07.314.994 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:07.315.022 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 4 rank id: 4 [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:07.415.426 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:07.415.463 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 2 rank id: 2 [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:07.458.908 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:07.458.939 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 1 rank id: 1 [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:07.571.583 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:07.571.615 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 5 rank id: 5 [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:07.661.073 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:07.661.129 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 0 rank id: 0 [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:11.995.786 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(32916,ffffaa25ac10,python3.7):2025-02-07-15:50:11.996.032 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(32916,fffea5ffb0f0,python3.7):2025-02-07-15:50:12.001.752 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 distribute network. collected 1 item remove_redundancy_dp.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:12.083.249 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(32868,ffffaff3fc10,python3.7):2025-02-07-15:50:12.083.458 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(32868,fffea37fe0f0,python3.7):2025-02-07-15:50:12.084.045 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 distribute network. collected 1 item remove_redundancy_dp.py distribute network shard. [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:12.108.417 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(32856,ffffa9615c10,python3.7):2025-02-07-15:50:12.108.616 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(32856,fffead7fa0f0,python3.7):2025-02-07-15:50:12.109.224 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 distribute network. distribute network create dataset. collected 1 item remove_redundancy_dp.py distribute network shard. distribute network train. distribute network create dataset. [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:12.153.137 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(32835,ffff93ec6c10,python3.7):2025-02-07-15:50:12.153.327 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(32835,fffe86ffd0f0,python3.7):2025-02-07-15:50:12.153.913 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 distribute network. collected 1 item remove_redundancy_dp.py distribute network train. distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:12.211.603 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(32903,ffff80a30c10,python3.7):2025-02-07-15:50:12.211.824 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(32903,fffe84ff90f0,python3.7):2025-02-07-15:50:12.212.428 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 distribute network. collected 1 item remove_redundancy_dp.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:12.393.732 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(32881,ffff9da47c10,python3.7):2025-02-07-15:50:12.394.029 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(32881,fffea17fa0f0,python3.7):2025-02-07-15:50:12.394.817 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 distribute network. collected 1 item remove_redundancy_dp.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:12.451.189 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(32826,ffff93305c10,python3.7):2025-02-07-15:50:12.452.010 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(32826,fffe86ffd0f0,python3.7):2025-02-07-15:50:12.456.909 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 distribute network. [WARNING] DEVICE(32916,fffea5ffb0f0,python3.7):2025-02-07-15:50:12.504.155 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 collected 1 item [WARNING] DEVICE(32868,fffea37fe0f0,python3.7):2025-02-07-15:50:12.587.835 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 remove_redundancy_dp.py distribute network shard. [WARNING] DEVICE(32856,fffead7fa0f0,python3.7):2025-02-07-15:50:12.612.402 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 [WARNING] DEVICE(32835,fffe86ffd0f0,python3.7):2025-02-07-15:50:12.655.905 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 distribute network create dataset. [WARNING] DEVICE(32903,fffe84ff90f0,python3.7):2025-02-07-15:50:12.713.639 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 distribute network train. [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:12.810.147 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(32811,ffff81c64c10,python3.7):2025-02-07-15:50:12.810.429 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group distribute network. [WARNING] DISTRIBUTED(32811,fffe7dffb0f0,python3.7):2025-02-07-15:50:12.814.773 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32811,fffe5b7fe0f0,python3.7):2025-02-07-15:50:12.815.014 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 collected 1 item remove_redundancy_dp.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DISTRIBUTED(32881,fffea17fa0f0,python3.7):2025-02-07-15:50:12.895.302 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32881,fffe4b7fe0f0,python3.7):2025-02-07-15:50:12.895.692 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(32826,fffe86ffd0f0,python3.7):2025-02-07-15:50:12.957.808 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32826,fffdc5ffb0f0,python3.7):2025-02-07-15:50:12.958.448 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(32916,fffea5ffb0f0,python3.7):2025-02-07-15:50:13.005.160 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32916,fffd567fc0f0,python3.7):2025-02-07-15:50:13.005.588 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(32868,fffea37fe0f0,python3.7):2025-02-07-15:50:13.089.031 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32868,fffd4ffff0f0,python3.7):2025-02-07-15:50:13.089.424 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(32856,fffead7fa0f0,python3.7):2025-02-07-15:50:13.113.562 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32856,fffd59ffb0f0,python3.7):2025-02-07-15:50:13.113.919 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(32835,fffe86ffd0f0,python3.7):2025-02-07-15:50:13.156.883 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32835,fffd377fe0f0,python3.7):2025-02-07-15:50:13.157.273 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(32903,fffe84ff90f0,python3.7):2025-02-07-15:50:13.214.290 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32903,fffd357fa0f0,python3.7):2025-02-07-15:50:13.214.652 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(32811,fffe5b7fe0f0,python3.7):2025-02-07-15:50:13.413.637 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(32811,fffe7dffb0f0,python3.7):2025-02-07-15:50:13.414.704 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32856,fffd59ffb0f0,python3.7):2025-02-07-15:50:13.475.078 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(32868,fffd4ffff0f0,python3.7):2025-02-07-15:50:13.475.355 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(32868,fffea37fe0f0,python3.7):2025-02-07-15:50:13.475.599 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(32856,fffead7fa0f0,python3.7):2025-02-07-15:50:13.476.110 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32835,fffd377fe0f0,python3.7):2025-02-07-15:50:13.534.722 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DEVICE(32826,fffdc5ffb0f0,python3.7):2025-02-07-15:50:13.534.836 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(32835,fffe86ffd0f0,python3.7):2025-02-07-15:50:13.534.943 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(32826,fffe86ffd0f0,python3.7):2025-02-07-15:50:13.537.907 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32881,fffe4b7fe0f0,python3.7):2025-02-07-15:50:13.546.357 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(32881,fffea17fa0f0,python3.7):2025-02-07-15:50:13.547.498 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32916,fffd567fc0f0,python3.7):2025-02-07-15:50:13.575.009 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(32916,fffea5ffb0f0,python3.7):2025-02-07-15:50:13.575.195 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(32903,fffd357fa0f0,python3.7):2025-02-07-15:50:13.595.821 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(32903,fffe84ff90f0,python3.7):2025-02-07-15:50:13.596.898 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group TotalTime = 13.7875, [21] [bootstrap]: 0.00153086 [type_inference]: 0.767669 [auto_monad]: 0.00231894 [graph_reusing]: 3.93102e-05 [inline]: 2.00979e-06 [parallel-infer-symbol]: 1.02101e-05 [pre_auto_parallel]: 0.00082314 [insert-virtual-dataset]: 2.96021e-06 [parallel-infer-symbol-second]: 8.69855e-07 [dataset_repeat_opt]: 1.07009e-06 [pipeline_split]: 1.24983e-06 [optimize]: 0.283828, [52] [py_interpret_to_execute]: 0.00083799 [rewriter_before_opt_a]: 0.00196398 [opt_a]: 0.269873, [3] [Cycle 1]: 0.198129, [43] [expand_dump_flag]: 4.44097e-05 [switch_simplify]: 0.00136102 [loop_unroll]: 0.00088948 [a_1]: 0.0256205 [recompute_prepare]: 0.00018366 [updatestate_depend_eliminate]: 0.00036387 [updatestate_assign_eliminate]: 0.00012294 [updatestate_loads_eliminate]: 0.00020595 [parameter_eliminate]: 1.13603e-05 [a_2]: 0.00387056 [accelerated_algorithm]: 0.00041358 [shard]: 1.91992e-06 [meta_shard_fg_expand]: 5.95599e-05 [shard_inline]: 0.00012292 [auto_parallel]: 8.18698e-05 [parallel]: 9.68017e-06 [flash_sp]: 3.938e-05 [merge_comm]: 8.15401e-05 [allreduce_fusion]: 7.23102e-05 [matmul_add_comm_reduction]: 0.00011497 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 0.00012914 [virtual_dataset]: 0.00012236 [get_grad_eliminate_]: 0.00012197 [virtual_output]: 0.00012014 [merge_forward]: 0.00010145 [cell_reuse_recompute_pass]: 2.06986e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022438 [before_grad]: 0.00021738 [inplace_validation]: 0.00018802 [meta_fg_expand]: 0.0476731 [inplace_validation_after_expand]: 0.00067329 [flash_sp_send_recv_attached]: 6.3302e-06 [receive_attached]: 2.30968e-06 [after_resolve]: 0.00094 [a_after_grad]: 0.00149378 [special_op_eliminate]: 0.00073544 [renormalize]: 0.0903146 [add_forward_monad_depend]: 0.00028746 [auto_monad_grad]: 0.00010062 [auto_monad_eliminator]: 0.00116453 [cse]: 0.00310569 [a_3]: 0.0162379 [Cycle 2]: 0.0553723, [43] [expand_dump_flag]: 3.13199e-05 [switch_simplify]: 0.0010333 [loop_unroll]: 0.00102749 [a_1]: 0.0256592 [recompute_prepare]: 0.00014833 [updatestate_depend_eliminate]: 0.00015537 [updatestate_assign_eliminate]: 8.74102e-05 [updatestate_loads_eliminate]: 0.0001419 [parameter_eliminate]: 2.81027e-06 [a_2]: 0.00385281 [accelerated_algorithm]: 0.00014597 [shard]: 1.62004e-06 [meta_shard_fg_expand]: 5.56302e-05 [shard_inline]: 0.00012328 [auto_parallel]: 9.69204e-05 [parallel]: 8.67993e-06 [flash_sp]: 3.29968e-06 [merge_comm]: 9.21502e-05 [allreduce_fusion]: 0.00013063 [matmul_add_comm_reduction]: 0.00010404 [allreduce_slice_to_reducescatter]: 5.29923e-07 [virtual_shard_identity]: 0.00012335 [virtual_dataset]: 0.00011841 [get_grad_eliminate_]: 0.00011674 [virtual_output]: 0.00011762 [merge_forward]: 7.84402e-05 [cell_reuse_recompute_pass]: 1.96975e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022045 [before_grad]: 0.00021135 [inplace_validation]: 7.40997e-05 [meta_fg_expand]: 0.00150864 [inplace_validation_after_expand]: 0.00069793 [flash_sp_send_recv_attached]: 2.10013e-06 [receive_attached]: 1.15018e-06 [after_resolve]: 0.00015328 [a_after_grad]: 0.00020224 [special_op_eliminate]: 0.00012999 [renormalize]: 0.0129754 [add_forward_monad_depend]: 3.55998e-06 [auto_monad_grad]: 1.87987e-06 [auto_monad_eliminator]: 0.00027062 [cse]: 0.00422876 [a_3]: 0.00086882 [Cycle 3]: 0.0104948, [43] [expand_dump_flag]: 2.52016e-06 [switch_simplify]: 0.00012007 [loop_unroll]: 0.00011693 [a_1]: 0.00384229 [recompute_prepare]: 0.00012295 [updatestate_depend_eliminate]: 0.00012917 [updatestate_assign_eliminate]: 8.33198e-05 [updatestate_loads_eliminate]: 8.13804e-05 [parameter_eliminate]: 2.27988e-06 [a_2]: 0.00192734 [accelerated_algorithm]: 0.00014534 [shard]: 1.41002e-06 [meta_shard_fg_expand]: 4.22499e-05 [shard_inline]: 0.00012126 [auto_parallel]: 9.61404e-05 [parallel]: 7.55023e-06 [flash_sp]: 2.21003e-06 [merge_comm]: 9.21302e-05 [allreduce_fusion]: 8.45501e-05 [matmul_add_comm_reduction]: 0.00010592 [allreduce_slice_to_reducescatter]: 4.00003e-07 [virtual_shard_identity]: 0.00012398 [virtual_dataset]: 0.00011803 [get_grad_eliminate_]: 0.00011633 [virtual_output]: 0.00011604 [merge_forward]: 8.10302e-05 [cell_reuse_recompute_pass]: 2.20956e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021685 [before_grad]: 0.00021332 [inplace_validation]: 7.792e-05 [meta_fg_expand]: 9.502e-05 [inplace_validation_after_expand]: 9.47402e-05 [flash_sp_send_recv_attached]: 1.13016e-06 [receive_attached]: 8.40053e-07 [after_resolve]: 0.00013503 [a_after_grad]: 0.00019636 [special_op_eliminate]: 0.00011677 [renormalize]: 1.00117e-07 [add_forward_monad_depend]: 1.72993e-06 [auto_monad_grad]: 1.07009e-06 [auto_monad_eliminator]: 0.00014641 [cse]: 0.00040663 [a_3]: 0.00084497 [py_interpret_to_execute_after_opt_a]: 0.00011026 [slice_cell_reuse_recomputed_activation]: 2.52016e-06 [rewriter_after_opt_a]: 0.00088918 [convert_after_rewriter]: 9.997e-05 [order_py_execute_after_rewriter]: 7.20001e-05 [opt_b]: 0.00358044, [1] [Cycle 1]: 0.00357344, [7] [b_1]: 0.00280384 [b_2]: 0.00012406 [updatestate_depend_eliminate]: 8.481e-05 [updatestate_assign_eliminate]: 7.84299e-05 [updatestate_loads_eliminate]: 8.10404e-05 [renormalize]: 4.10248e-07 [cse]: 0.00034923 [optimize_parallel_all_gather_comm]: 0.00011417 [overlap_param_gather]: 2.41203e-05 [cconv]: 5.705e-05 [loop_unroll]: 0.00081494 [opt_after_cconv]: 0.00142238, [1] [Cycle 1]: 0.0014157, [7] [c_1]: 0.00070436 [parameter_eliminate]: 2.06986e-06 [updatestate_depend_eliminate]: 0.00010808 [updatestate_assign_eliminate]: 8.06698e-05 [updatestate_loads_eliminate]: 8.13901e-05 [cse]: 0.00038769 [renormalize]: 4.00003e-07 [remove_dup_value]: 0.00049418 [tuple_transform]: 0.00087367, [1] [Cycle 1]: 0.00086738, [2] [d_1]: 0.00085108 [renormalize]: 3.59956e-07 [partial_unused_args_eliminate]: 2.33995e-06 [add_cache_embedding]: 0.00012762 [add_recomputation]: 0.00060599 [cse_after_recomputation]: 0.00026319, [1] [Cycle 1]: 0.00025638, [1] [cse]: 0.00024504 [environ_conv]: 7.29002e-05 [swap_dp_allreduce_reducescatter]: 0.00011074 [bias_add_comm_swap]: 1.91014e-06 [label_micro_interleaved_index]: 1.25961e-06 [label_fine_grained_interleaved_index]: 1.16974e-06 [merge_cast_opt]: 1.15996e-06 [slice_recompute_activation]: 1.76998e-06 [micro_interleaved_order_control]: 1.28988e-06 [assign_add_opt]: 9.49996e-06 [ForceFp32Comm]: 5.99772e-07 [remove_cast_before_assign_add]: 6.00237e-07 [full_micro_interleaved_order_control]: 1.34995e-06 [reorder_send_recv_between_fp_bp]: 1.34017e-06 [comm_op_add_attrs]: 5.79748e-07 [add_comm_op_reuse_tag]: 5.80214e-07 [interleave_split_concat_branches]: 4.99655e-07 [interleave_parallel_branches]: 5.89993e-07 [overlap_opt_shard_in_pipeline]: 2.78102e-05 [overlap_opt_shard_grad_in_pipeline]: 1.38022e-06 [control_data_broadcast_order]: 9.20147e-07 [grouped_pairwise_exchange_alltoall]: 6.99889e-07 [offloading_packed_experts]: 6.50063e-07 [overlap_recompute_and_grad_model_parallel]: 1.12038e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.49829e-07 [overlap_recompute_allgather_and_fa_grad]: 5.79748e-07 [overlap_grad_ring_attention]: 1.15996e-06 [overlap_grad_flash_sp]: 0.00014002 [begin_end_overlap_inline]: 4.80097e-07 [split_matmul_comm_elemetwise]: 1.32993e-06 [split_layernorm_comm]: 1.11992e-06 [handle_group_info]: 6.00237e-07 [symbol_engine_optimizer]: 0.00077338, [1] [Cycle 1]: 0.00076745, [6] [build]: 3.877e-05 [elim_shapecalc]: 0.00012867 [elim_not_effective]: 0.00022999 [opt_reshape]: 0.00013038 [fold_const_symbol]: 0.00020124 [renormalize]: 3.30154e-07 [pipeline_parallel_scheduler]: 1.97999e-06 [auto_monad_reorder]: 0.00033977 [get_jit_bprop_graph]: 4.09782e-07 [rewriter_after_jit_bprop_graph]: 3.7998e-07 [eliminate_special_op_node]: 0.00092588 [distribtued_split]: 0.00035082 [validate]: 0.00029316 [task_emit]: 12.728 [execute]: 9.26992e-06 Sums bootstrap : 0.001531s : 0.01% type_inference : 0.767669s : 5.57% auto_monad : 0.002319s : 0.02% graph_reusing : 0.000039s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000010s : 0.00% pre_auto_parallel : 0.000823s : 0.01% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000838s : 0.01% optimize.rewriter_before_opt_a : 0.001964s : 0.01% optimize.opt_a.expand_dump_flag : 0.000078s : 0.00% optimize.opt_a.switch_simplify : 0.002514s : 0.02% optimize.opt_a.loop_unroll : 0.002034s : 0.01% optimize.opt_a.a_1 : 0.055122s : 0.40% optimize.opt_a.recompute_prepare : 0.000455s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000648s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000294s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000429s : 0.00% optimize.opt_a.parameter_eliminate : 0.000016s : 0.00% optimize.opt_a.a_2 : 0.009651s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000705s : 0.01% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000157s : 0.00% optimize.opt_a.shard_inline : 0.000367s : 0.00% optimize.opt_a.auto_parallel : 0.000275s : 0.00% optimize.opt_a.parallel : 0.000026s : 0.00% optimize.opt_a.flash_sp : 0.000045s : 0.00% optimize.opt_a.merge_comm : 0.000266s : 0.00% optimize.opt_a.allreduce_fusion : 0.000287s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000325s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000376s : 0.00% optimize.opt_a.virtual_dataset : 0.000359s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000355s : 0.00% optimize.opt_a.virtual_output : 0.000354s : 0.00% optimize.opt_a.merge_forward : 0.000261s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000006s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000662s : 0.00% optimize.opt_a.before_grad : 0.000642s : 0.00% optimize.opt_a.inplace_validation : 0.000340s : 0.00% optimize.opt_a.meta_fg_expand : 0.049277s : 0.36% optimize.opt_a.inplace_validation_after_expand : 0.001466s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000010s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.001228s : 0.01% optimize.opt_a.a_after_grad : 0.001892s : 0.01% optimize.opt_a.special_op_eliminate : 0.000982s : 0.01% optimize.opt_a.renormalize : 0.103290s : 0.75% optimize.opt_a.add_forward_monad_depend : 0.000293s : 0.00% optimize.opt_a.auto_monad_grad : 0.000104s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001582s : 0.01% optimize.opt_a.cse : 0.007741s : 0.06% optimize.opt_a.a_3 : 0.017952s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000110s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000889s : 0.01% optimize.convert_after_rewriter : 0.000100s : 0.00% optimize.order_py_execute_after_rewriter : 0.000072s : 0.00% optimize.opt_b.b_1 : 0.002804s : 0.02% optimize.opt_b.b_2 : 0.000124s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000085s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000078s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000081s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000349s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000114s : 0.00% optimize.overlap_param_gather : 0.000024s : 0.00% optimize.cconv : 0.000057s : 0.00% optimize.loop_unroll : 0.000815s : 0.01% optimize.opt_after_cconv.c_1 : 0.000704s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000108s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000081s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000081s : 0.00% optimize.opt_after_cconv.cse : 0.000388s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000494s : 0.00% optimize.tuple_transform.d_1 : 0.000851s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000128s : 0.00% optimize.add_recomputation : 0.000606s : 0.00% optimize.cse_after_recomputation.cse : 0.000245s : 0.00% optimize.environ_conv : 0.000073s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000111s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000009s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000028s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000140s : 0.00% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000039s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000129s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000230s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000201s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000340s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000926s : 0.01% distribtued_split : 0.000351s : 0.00% validate : 0.000293s : 0.00% task_emit : 12.728049s : 92.38% execute : 0.000009s : 0.00% Time group info: ------[substitution.] 0.016366 3191 0.12% : 0.000020s : 9: substitution.addn_check_dump 0.34% : 0.000055s : 8: substitution.addn_zero_filter 0.09% : 0.000015s : 8: substitution.adjust_all_reduce_mul_add 1.39% : 0.000227s : 59: substitution.arithmetic_simplify 0.44% : 0.000071s : 11: substitution.cast_eliminate 0.23% : 0.000038s : 35: substitution.depend_value_elim 0.21% : 0.000035s : 97: substitution.elim_not_effective 0.01% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.07% : 0.000011s : 6: substitution.environ_get_add_eliminate 0.03% : 0.000004s : 3: substitution.environ_get_depend_swap 0.08% : 0.000013s : 12: substitution.environ_get_eliminate 0.13% : 0.000021s : 6: substitution.environ_get_set_eliminate 0.23% : 0.000037s : 53: substitution.float_depend_g_call 0.03% : 0.000005s : 6: substitution.float_environ_get_switch 0.03% : 0.000005s : 4: substitution.float_tuple_getitem_switch 0.18% : 0.000029s : 97: substitution.fold_const_symbol 8.10% : 0.001325s : 8: substitution.getattr_setattr_resolve 0.47% : 0.000077s : 116: substitution.graph_param_transform 0.04% : 0.000007s : 10: substitution.incorporate_call 0.03% : 0.000005s : 10: substitution.incorporate_call_switch 67.47% : 0.011042s : 326: substitution.inline 1.32% : 0.000216s : 40: substitution.inline_without_move 0.69% : 0.000113s : 286: substitution.j_node_and_user_rematch 1.74% : 0.000285s : 40: substitution.less_batch_normalization 0.21% : 0.000034s : 66: substitution.load_eliminater 0.27% : 0.000044s : 10: substitution.merge_addn 0.39% : 0.000065s : 57: substitution.minmaximum_grad 0.01% : 0.000002s : 4: substitution.opt_reshape 0.20% : 0.000033s : 4: substitution.partial_defer_inline 0.64% : 0.000105s : 53: substitution.partial_eliminate 0.07% : 0.000011s : 15: substitution.reduce_all_const_elim 0.14% : 0.000023s : 11: substitution.reduce_eliminate 0.90% : 0.000147s : 286: substitution.remove_not_recompute_node 3.55% : 0.000581s : 326: substitution.replace_applicator 0.44% : 0.000072s : 162: substitution.replace_old_param 0.17% : 0.000028s : 8: substitution.reshape_eliminate 0.04% : 0.000006s : 5: substitution.set_cell_output_no_recompute 0.03% : 0.000005s : 2: substitution.specialize_transform 0.09% : 0.000014s : 12: substitution.split_environ_get_set_with_tuple_value 0.30% : 0.000049s : 24: substitution.switch_simplify 0.19% : 0.000031s : 14: substitution.tile_eliminate 0.89% : 0.000145s : 57: substitution.tuple_list_convert_item_index_to_positive 0.47% : 0.000077s : 63: substitution.tuple_list_get_item_const_eliminator 0.79% : 0.000129s : 63: substitution.tuple_list_get_item_depend_reorder 3.67% : 0.000600s : 242: substitution.tuple_list_get_item_eliminator 0.64% : 0.000105s : 63: substitution.tuple_list_get_set_item_eliminator 0.98% : 0.000160s : 178: substitution.updatestate_pure_node_eliminater 1.47% : 0.000240s : 215: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.767098 2 96.05% : 0.736772s : 1: type_inference.infer 3.95% : 0.030326s : 1: type_inference.specialize ------[replace.] 0.005789 566 0.11% : 0.000006s : 1: replace.arithmetic_simplify 0.58% : 0.000034s : 6: replace.cast_eliminate 0.83% : 0.000048s : 7: replace.depend_value_elim 0.67% : 0.000039s : 3: replace.environ_get_set_eliminate 1.69% : 0.000098s : 6: replace.getattr_setattr_resolve 49.79% : 0.002882s : 313: replace.inline 0.36% : 0.000021s : 1: replace.merge_addn 3.41% : 0.000197s : 13: replace.partial_eliminate 2.93% : 0.000170s : 10: replace.replace_applicator 4.14% : 0.000239s : 24: replace.switch_simplify 0.89% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 34.35% : 0.001989s : 175: replace.tuple_list_get_item_eliminator 0.26% : 0.000015s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.012678 566 0.08% : 0.000010s : 1: match.arithmetic_simplify 0.34% : 0.000043s : 6: match.cast_eliminate 0.02% : 0.000003s : 7: match.depend_value_elim 0.12% : 0.000015s : 3: match.environ_get_set_eliminate 9.52% : 0.001206s : 6: match.getattr_setattr_resolve 85.52% : 0.010843s : 313: match.inline 0.16% : 0.000020s : 1: match.merge_addn 0.46% : 0.000058s : 13: match.partial_eliminate 0.35% : 0.000045s : 10: match.replace_applicator 0.30% : 0.000038s : 24: match.switch_simplify 0.23% : 0.000029s : 6: match.tuple_list_get_item_depend_reorder 2.85% : 0.000361s : 175: match.tuple_list_get_item_eliminator 0.06% : 0.000007s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.015458100237 0.86% : 0.000133s : 982: predicate.accumulaten_eliminater 0.20% : 0.000032s : 116: predicate.ad_related_special_op_eliminate 0.54% : 0.000084s : 621: predicate.addn_check_dump 0.84% : 0.000130s : 982: predicate.addn_zero_filter 0.83% : 0.000128s : 982: predicate.adjust_all_reduce_mul_add 1.95% : 0.000301s : 1604: predicate.arithmetic_simplify 0.88% : 0.000137s : 989: predicate.cast_eliminate 3.22% : 0.000498s : 2476: predicate.check_bprop_eliminate 0.55% : 0.000085s : 621: predicate.compare_switch_simplify 0.06% : 0.000009s : 122: predicate.const_output_eliminate 0.11% : 0.000017s : 116: predicate.convert_tensor_all_eliminate 1.44% : 0.000222s : 1173: predicate.convert_tensor_eliminate 0.57% : 0.000088s : 624: predicate.depend_value_elim 0.92% : 0.000142s : 992: predicate.dict_get_item_const_eliminator 0.98% : 0.000152s : 992: predicate.dict_get_item_eliminator 0.96% : 0.000148s : 992: predicate.dict_set_item_eliminator 0.05% : 0.000008s : 116: predicate.elim_not_effective 0.12% : 0.000019s : 116: predicate.elim_shapecalc_of_broadcastargs 0.94% : 0.000146s : 1111: predicate.environ_add_const_eliminate 0.95% : 0.000147s : 1114: predicate.environ_get_add_eliminate 0.94% : 0.000146s : 1111: predicate.environ_get_depend_swap 1.56% : 0.000241s : 1735: predicate.environ_get_eliminate 0.95% : 0.000147s : 1114: predicate.environ_get_set_eliminate 1.70% : 0.000263s : 1500: predicate.exchange_switch_depend_value 1.69% : 0.000261s : 1500: predicate.float_depend_g_call 0.55% : 0.000085s : 621: predicate.float_environ_get_switch 0.66% : 0.000102s : 743: predicate.float_tuple_getitem_switch 0.05% : 0.000008s : 116: predicate.fold_const_symbol 0.34% : 0.000052s : 366: predicate.get_grad_eliminate 0.08% : 0.000012s : 40: predicate.getattr_setattr_resolve 0.06% : 0.000010s : 116: predicate.graph_param_transform 0.55% : 0.000084s : 621: predicate.incorporate_call 0.54% : 0.000083s : 621: predicate.incorporate_call_switch 4.26% : 0.000659s : 3713: predicate.inline 1.45% : 0.000224s : 1014: predicate.inline_without_move 0.17% : 0.000026s : 366: predicate.j_node_and_user_rematch 0.41% : 0.000064s : 374: predicate.less_batch_normalization 1.25% : 0.000193s : 1411: predicate.list_to_tuple_eliminator_ 2.09% : 0.000324s : 2412: predicate.load_eliminater 0.23% : 0.000035s : 122: predicate.loop_unroll_after_grad 2.59% : 0.000400s : 2198: predicate.loop_unroll_before_grad 1.08% : 0.000168s : 1242: predicate.make_slice_get_slice_eliminator 0.56% : 0.000086s : 623: predicate.merge_addn 2.74% : 0.000424s : 2432: predicate.micro_step_allgather_replace 2.77% : 0.000427s : 2432: predicate.mini_step_allgather_replace 0.84% : 0.000130s : 983: predicate.minmaximum_grad 0.12% : 0.000018s : 116: predicate.mutable_eliminate 0.11% : 0.000017s : 116: predicate.opt_reshape 0.12% : 0.000019s : 122: predicate.parallel_virtual_node 2.59% : 0.000400s : 1500: predicate.partial_defer_inline 1.26% : 0.000195s : 1308: predicate.partial_eliminate 0.85% : 0.000131s : 982: predicate.print_const_string_wrapper 0.54% : 0.000084s : 610: predicate.reduce_all_const_elim 1.06% : 0.000163s : 983: predicate.reduce_eliminate 0.17% : 0.000026s : 366: predicate.remove_not_recompute_node 1.98% : 0.000306s : 3625: predicate.replace_applicator 0.48% : 0.000075s : 1014: predicate.replace_old_param 0.06% : 0.000010s : 122: predicate.reset_defer_inline 0.85% : 0.000132s : 983: predicate.reshape_eliminate 2.82% : 0.000436s : 2432: predicate.row_tensor_add_zeros_like 0.13% : 0.000020s : 122: predicate.row_tensor_eliminate 3.14% : 0.000485s : 2476: predicate.same_eliminate 0.21% : 0.000032s : 417: predicate.set_cell_output_no_recompute 0.35% : 0.000054s : 366: predicate.shard_identity_eliminate 1.34% : 0.000208s : 1136: predicate.special_op_eliminate 0.63% : 0.000098s : 623: predicate.specialize_transform 2.99% : 0.000461s : 2432: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000152s : 1014: predicate.stack_unstack_eliminate 2.07% : 0.000321s : 2412: predicate.stopgrad_eliminater 0.11% : 0.000017s : 122: predicate.switch_call_monad_eliminater 1.47% : 0.000227s : 1500: predicate.switch_defer_inline 4.32% : 0.000668s : 3976: predicate.switch_layer_defer_inline 4.70% : 0.000726s : 4369: predicate.switch_simplify 0.85% : 0.000131s : 983: predicate.tile_eliminate 0.91% : 0.000140s : 983: predicate.transpose_eliminate 1.19% : 0.000184s : 1230: predicate.tuple_list_convert_item_index_to_positive 1.20% : 0.000185s : 1236: predicate.tuple_list_get_item_const_eliminator 1.04% : 0.000161s : 1236: predicate.tuple_list_get_item_depend_reorder 2.14% : 0.000331s : 2032: predicate.tuple_list_get_item_eliminator 1.09% : 0.000169s : 1236: predicate.tuple_list_get_set_item_eliminator 1.85% : 0.000285s : 1857: predicate.tuple_list_set_item_eliminator 1.27% : 0.000196s : 1411: predicate.tuple_to_list_eliminator_ 2.11% : 0.000326s : 2412: predicate.updatestate_pure_node_eliminater 2.89% : 0.000447s : 3034: predicate.updatestate_useless_node_eliminater 0.12% : 0.000018s : 122: predicate.value_based_eliminate 0.34% : 0.000053s : 366: predicate.virtual_dataset_eliminate 0.34% : 0.000052s : 366: predicate.virtual_output_eliminate 0.12% : 0.000019s : 122: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.039719 649 63.10% : 0.025062s : 290: func_graph_cloner_run.FuncGraphClonerGraph 4.56% : 0.001809s : 27: func_graph_cloner_run.FuncGraphClonerNode 32.35% : 0.012848s : 332: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.277041 280 0.00% : 0.000003s : 1: ForceFp32Comm 0.00% : 0.000134s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.00% : 0.000617s : 1: add_recomputation 0.00% : 0.000013s : 1: assign_add_opt 0.02% : 0.002341s : 1: auto_monad 0.00% : 0.000355s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.01% : 0.001572s : 1: bootstrap 0.00% : 0.000063s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000107s : 1: convert_after_rewriter 0.00% : 0.000268s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000364s : 1: distribtued_split 0.01% : 0.000940s : 1: eliminate_special_op_node 0.00% : 0.000080s : 1: environ_conv 0.00% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.00% : 0.000008s : 1: get_jit_bprop_graph 0.00% : 0.000049s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.000825s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000158s : 1: opt.transform.loop_unroll_optimizer 0.67% : 0.095418s : 162: opt.transform.opt_a 0.00% : 0.000702s : 1: opt.transform.opt_after_cconv 0.02% : 0.002898s : 27: opt.transform.opt_b 0.01% : 0.001529s : 4: opt.transform.opt_resolve 0.01% : 0.000848s : 1: opt.transform.opt_trans_graph 0.00% : 0.000385s : 3: opt.transform.special_op_eliminate 0.00% : 0.000685s : 4: opt.transform.symbol_engine_opt 1.89% : 0.269878s : 1: opt_a 0.01% : 0.001428s : 1: opt_after_cconv 0.03% : 0.003585s : 1: opt_b 1.99% : 0.283838s : 1: optimize 0.00% : 0.000121s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000077s : 1: order_py_execute_after_rewriter 0.00% : 0.000146s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000032s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000030s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000016s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.000841s : 1: pre_auto_parallel 0.01% : 0.000889s : 1: py_interpret_to_execute 0.00% : 0.000117s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.00% : 0.000505s : 1: remove_dup_value 0.49% : 0.070494s : 2: renormalize.infer 0.23% : 0.032773s : 2: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000899s : 1: rewriter_after_opt_a 0.01% : 0.001977s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000117s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000777s : 1: symbol_engine_optimizer 89.15% : 12.728078s : 1: task_emit 0.01% : 0.000878s : 1: tuple_transform 5.38% : 0.767701s : 1: type_inference 0.01% : 0.001310s : 1: validate TotalTime = 14.0001, [21] [bootstrap]: 0.00217485 [type_inference]: 0.787149 [auto_monad]: 0.00234258 [graph_reusing]: 3.77903e-05 [inline]: 2.29012e-06 [parallel-infer-symbol]: 1.432e-05 [pre_auto_parallel]: 0.00079542 [insert-virtual-dataset]: 3.80026e-06 [parallel-infer-symbol-second]: 1.07009e-06 [dataset_repeat_opt]: 2.14996e-06 [pipeline_split]: 1.99024e-06 [optimize]: 0.296046, [52] [py_interpret_to_execute]: 0.00082335 [rewriter_before_opt_a]: 0.00198708 [opt_a]: 0.281678, [3] [Cycle 1]: 0.207565, [43] [expand_dump_flag]: 4.83198e-05 [switch_simplify]: 0.0014405 [loop_unroll]: 0.00089632 [a_1]: 0.0259916 [recompute_prepare]: 0.00018896 [updatestate_depend_eliminate]: 0.00038455 [updatestate_assign_eliminate]: 0.00011313 [updatestate_loads_eliminate]: 0.00022024 [parameter_eliminate]: 1.45901e-05 [a_2]: 0.00386047 [accelerated_algorithm]: 0.00042955 [shard]: 2.46009e-06 [meta_shard_fg_expand]: 6.116e-05 [shard_inline]: 0.0001244 [auto_parallel]: 8.641e-05 [parallel]: 2.11103e-05 [flash_sp]: 4.32702e-05 [merge_comm]: 9.35299e-05 [allreduce_fusion]: 7.29798e-05 [matmul_add_comm_reduction]: 0.000139 [allreduce_slice_to_reducescatter]: 5.49946e-07 [virtual_shard_identity]: 0.00013236 [virtual_dataset]: 0.00012323 [get_grad_eliminate_]: 0.00012288 [virtual_output]: 0.0001202 [merge_forward]: 0.0001072 [cell_reuse_recompute_pass]: 2.31992e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022736 [before_grad]: 0.00021635 [inplace_validation]: 0.00014523 [meta_fg_expand]: 0.0507954 [inplace_validation_after_expand]: 0.00059983 [flash_sp_send_recv_attached]: 9.35001e-06 [receive_attached]: 1.82902e-05 [after_resolve]: 0.00099742 [a_after_grad]: 0.00150187 [special_op_eliminate]: 0.00073256 [renormalize]: 0.0952607 [add_forward_monad_depend]: 0.00030339 [auto_monad_grad]: 0.00011161 [auto_monad_eliminator]: 0.00123237 [cse]: 0.00323086 [a_3]: 0.0167965 [Cycle 2]: 0.0576905, [43] [expand_dump_flag]: 3.515e-05 [switch_simplify]: 0.00114222 [loop_unroll]: 0.00105831 [a_1]: 0.0261639 [recompute_prepare]: 0.00016581 [updatestate_depend_eliminate]: 0.0001675 [updatestate_assign_eliminate]: 8.97599e-05 [updatestate_loads_eliminate]: 0.00014883 [parameter_eliminate]: 4.25009e-06 [a_2]: 0.00387475 [accelerated_algorithm]: 0.00014728 [shard]: 2.17976e-06 [meta_shard_fg_expand]: 5.67399e-05 [shard_inline]: 0.0001241 [auto_parallel]: 0.00010559 [parallel]: 1.31801e-05 [flash_sp]: 4.27011e-06 [merge_comm]: 9.54201e-05 [allreduce_fusion]: 8.209e-05 [matmul_add_comm_reduction]: 0.00010539 [allreduce_slice_to_reducescatter]: 5.89993e-07 [virtual_shard_identity]: 0.00012713 [virtual_dataset]: 0.00011948 [get_grad_eliminate_]: 0.00011749 [virtual_output]: 0.00011814 [merge_forward]: 8.13403e-05 [cell_reuse_recompute_pass]: 2.60025e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022089 [before_grad]: 0.00020971 [inplace_validation]: 7.55503e-05 [meta_fg_expand]: 0.00166798 [inplace_validation_after_expand]: 0.00068017 [flash_sp_send_recv_attached]: 2.37999e-06 [receive_attached]: 2.12016e-06 [after_resolve]: 0.00016175 [a_after_grad]: 0.00020534 [special_op_eliminate]: 0.00011997 [renormalize]: 0.0139721 [add_forward_monad_depend]: 5.00958e-06 [auto_monad_grad]: 2.95043e-06 [auto_monad_eliminator]: 0.00027629 [cse]: 0.00462528 [a_3]: 0.00086907 [Cycle 3]: 0.0104869, [43] [expand_dump_flag]: 2.58023e-06 [switch_simplify]: 0.00012358 [loop_unroll]: 0.00011685 [a_1]: 0.00383929 [recompute_prepare]: 0.00012208 [updatestate_depend_eliminate]: 0.00013164 [updatestate_assign_eliminate]: 8.443e-05 [updatestate_loads_eliminate]: 8.35797e-05 [parameter_eliminate]: 3.2899e-06 [a_2]: 0.00187187 [accelerated_algorithm]: 0.00014351 [shard]: 1.62004e-06 [meta_shard_fg_expand]: 4.32399e-05 [shard_inline]: 0.00011961 [auto_parallel]: 0.00015615 [parallel]: 1.18599e-05 [flash_sp]: 2.71015e-06 [merge_comm]: 9.71998e-05 [allreduce_fusion]: 8.527e-05 [matmul_add_comm_reduction]: 0.00010762 [allreduce_slice_to_reducescatter]: 5.29923e-07 [virtual_shard_identity]: 0.00012483 [virtual_dataset]: 0.00011779 [get_grad_eliminate_]: 0.0001154 [virtual_output]: 0.00011559 [merge_forward]: 8.37003e-05 [cell_reuse_recompute_pass]: 3.70014e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021911 [before_grad]: 0.00020814 [inplace_validation]: 7.845e-05 [meta_fg_expand]: 9.46997e-05 [inplace_validation_after_expand]: 9.736e-05 [flash_sp_send_recv_attached]: 1.93994e-06 [receive_attached]: 1.6503e-06 [after_resolve]: 0.00013899 [a_after_grad]: 0.0001976 [special_op_eliminate]: 0.00011708 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 2.58023e-06 [auto_monad_grad]: 1.62981e-06 [auto_monad_eliminator]: 0.00015522 [cse]: 0.00035411 [a_3]: 0.00084419 [py_interpret_to_execute_after_opt_a]: 0.00011268 [slice_cell_reuse_recomputed_activation]: 3.07988e-06 [rewriter_after_opt_a]: 0.00101444 [convert_after_rewriter]: 0.00010288 [order_py_execute_after_rewriter]: 7.12899e-05 [opt_b]: 0.00358729, [1] [Cycle 1]: 0.00357903, [7] [b_1]: 0.00276172 [b_2]: 0.00012399 [updatestate_depend_eliminate]: 8.53599e-05 [updatestate_assign_eliminate]: 0.00012314 [updatestate_loads_eliminate]: 8.33501e-05 [renormalize]: 5.40167e-07 [cse]: 0.00034727 [optimize_parallel_all_gather_comm]: 0.00011718 [overlap_param_gather]: 2.69003e-05 [cconv]: 6.58603e-05 [loop_unroll]: 0.00096431 [opt_after_cconv]: 0.00138523, [1] [Cycle 1]: 0.00137758, [7] [c_1]: 0.00070596 [parameter_eliminate]: 2.89036e-06 [updatestate_depend_eliminate]: 0.00010877 [updatestate_assign_eliminate]: 8.144e-05 [updatestate_loads_eliminate]: 8.18199e-05 [cse]: 0.00034476 [renormalize]: 5.19678e-07 [remove_dup_value]: 0.00051652 [tuple_transform]: 0.00091766, [1] [Cycle 1]: 0.00091015, [2] [d_1]: 0.00089133 [renormalize]: 4.99655e-07 [partial_unused_args_eliminate]: 3.22005e-06 [add_cache_embedding]: 0.00013252 [add_recomputation]: 0.00061593 [cse_after_recomputation]: 0.00026425, [1] [Cycle 1]: 0.00025638, [1] [cse]: 0.0002431 [environ_conv]: 7.62199e-05 [swap_dp_allreduce_reducescatter]: 0.00011009 [bias_add_comm_swap]: 3.05986e-06 [label_micro_interleaved_index]: 2.48011e-06 [label_fine_grained_interleaved_index]: 2.22027e-06 [merge_cast_opt]: 1.24006e-06 [slice_recompute_activation]: 1.72015e-06 [micro_interleaved_order_control]: 1.92039e-06 [assign_add_opt]: 1.10599e-05 [ForceFp32Comm]: 9.60194e-07 [remove_cast_before_assign_add]: 9.19681e-07 [full_micro_interleaved_order_control]: 2.21003e-06 [reorder_send_recv_between_fp_bp]: 2.08011e-06 [comm_op_add_attrs]: 1.00024e-06 [add_comm_op_reuse_tag]: 1.24006e-06 [interleave_split_concat_branches]: 1.06962e-06 [interleave_parallel_branches]: 8.29808e-07 [overlap_opt_shard_in_pipeline]: 3.117e-05 [overlap_opt_shard_grad_in_pipeline]: 2.12993e-06 [control_data_broadcast_order]: 1.13016e-06 [grouped_pairwise_exchange_alltoall]: 1.32993e-06 [offloading_packed_experts]: 1.40024e-06 [overlap_recompute_and_grad_model_parallel]: 1.97999e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.2003e-07 [overlap_recompute_allgather_and_fa_grad]: 1.55019e-06 [overlap_grad_ring_attention]: 1.66986e-06 [overlap_grad_flash_sp]: 0.00012955 [begin_end_overlap_inline]: 7.79983e-07 [split_matmul_comm_elemetwise]: 2.1304e-06 [split_layernorm_comm]: 1.8198e-06 [handle_group_info]: 9.4017e-07 [symbol_engine_optimizer]: 0.00077504, [1] [Cycle 1]: 0.00076826, [6] [build]: 4.38597e-05 [elim_shapecalc]: 0.00013287 [elim_not_effective]: 0.00022994 [opt_reshape]: 0.00013041 [fold_const_symbol]: 0.00019312 [renormalize]: 4.80097e-07 [pipeline_parallel_scheduler]: 2.61003e-06 [auto_monad_reorder]: 0.00031082 [get_jit_bprop_graph]: 5.30388e-07 [rewriter_after_jit_bprop_graph]: 4.70318e-07 [eliminate_special_op_node]: 0.00094539 [distribtued_split]: 0.00037506 [validate]: 0.00029183 [task_emit]: 12.9082 [execute]: 1.20201e-05 Sums bootstrap : 0.002175s : 0.02% type_inference : 0.787149s : 5.63% auto_monad : 0.002343s : 0.02% graph_reusing : 0.000038s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000014s : 0.00% pre_auto_parallel : 0.000795s : 0.01% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000823s : 0.01% optimize.rewriter_before_opt_a : 0.001987s : 0.01% optimize.opt_a.expand_dump_flag : 0.000086s : 0.00% optimize.opt_a.switch_simplify : 0.002706s : 0.02% optimize.opt_a.loop_unroll : 0.002071s : 0.01% optimize.opt_a.a_1 : 0.055995s : 0.40% optimize.opt_a.recompute_prepare : 0.000477s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000684s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000287s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000453s : 0.00% optimize.opt_a.parameter_eliminate : 0.000022s : 0.00% optimize.opt_a.a_2 : 0.009607s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000720s : 0.01% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000161s : 0.00% optimize.opt_a.shard_inline : 0.000368s : 0.00% optimize.opt_a.auto_parallel : 0.000348s : 0.00% optimize.opt_a.parallel : 0.000046s : 0.00% optimize.opt_a.flash_sp : 0.000050s : 0.00% optimize.opt_a.merge_comm : 0.000286s : 0.00% optimize.opt_a.allreduce_fusion : 0.000240s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000352s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000384s : 0.00% optimize.opt_a.virtual_dataset : 0.000361s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000356s : 0.00% optimize.opt_a.virtual_output : 0.000354s : 0.00% optimize.opt_a.merge_forward : 0.000272s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000667s : 0.00% optimize.opt_a.before_grad : 0.000634s : 0.00% optimize.opt_a.inplace_validation : 0.000299s : 0.00% optimize.opt_a.meta_fg_expand : 0.052558s : 0.38% optimize.opt_a.inplace_validation_after_expand : 0.001377s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000014s : 0.00% optimize.opt_a.receive_attached : 0.000022s : 0.00% optimize.opt_a.after_resolve : 0.001298s : 0.01% optimize.opt_a.a_after_grad : 0.001905s : 0.01% optimize.opt_a.special_op_eliminate : 0.000970s : 0.01% optimize.opt_a.renormalize : 0.109233s : 0.78% optimize.opt_a.add_forward_monad_depend : 0.000311s : 0.00% optimize.opt_a.auto_monad_grad : 0.000116s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001664s : 0.01% optimize.opt_a.cse : 0.008210s : 0.06% optimize.opt_a.a_3 : 0.018510s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000113s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001014s : 0.01% optimize.convert_after_rewriter : 0.000103s : 0.00% optimize.order_py_execute_after_rewriter : 0.000071s : 0.00% optimize.opt_b.b_1 : 0.002762s : 0.02% optimize.opt_b.b_2 : 0.000124s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000085s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000123s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000083s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000347s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000117s : 0.00% optimize.overlap_param_gather : 0.000027s : 0.00% optimize.cconv : 0.000066s : 0.00% optimize.loop_unroll : 0.000964s : 0.01% optimize.opt_after_cconv.c_1 : 0.000706s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000109s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000081s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000082s : 0.00% optimize.opt_after_cconv.cse : 0.000345s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000517s : 0.00% optimize.tuple_transform.d_1 : 0.000891s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000133s : 0.00% optimize.add_recomputation : 0.000616s : 0.00% optimize.cse_after_recomputation.cse : 0.000243s : 0.00% optimize.environ_conv : 0.000076s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000110s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000011s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000031s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000130s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000044s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000133s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000230s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000193s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000311s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000945s : 0.01% distribtued_split : 0.000375s : 0.00% validate : 0.000292s : 0.00% task_emit : 12.908164s : 92.26% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.017353 3191 0.12% : 0.000021s : 9: substitution.addn_check_dump 0.37% : 0.000065s : 8: substitution.addn_zero_filter 0.11% : 0.000018s : 8: substitution.adjust_all_reduce_mul_add 1.46% : 0.000254s : 59: substitution.arithmetic_simplify 0.36% : 0.000062s : 11: substitution.cast_eliminate 0.25% : 0.000043s : 35: substitution.depend_value_elim 0.21% : 0.000036s : 97: substitution.elim_not_effective 0.01% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.13% : 0.000023s : 6: substitution.environ_get_add_eliminate 0.03% : 0.000006s : 3: substitution.environ_get_depend_swap 0.09% : 0.000016s : 12: substitution.environ_get_eliminate 0.14% : 0.000025s : 6: substitution.environ_get_set_eliminate 0.23% : 0.000040s : 53: substitution.float_depend_g_call 0.03% : 0.000006s : 6: substitution.float_environ_get_switch 0.03% : 0.000006s : 4: substitution.float_tuple_getitem_switch 0.16% : 0.000028s : 97: substitution.fold_const_symbol 7.40% : 0.001284s : 8: substitution.getattr_setattr_resolve 0.44% : 0.000076s : 116: substitution.graph_param_transform 0.04% : 0.000008s : 10: substitution.incorporate_call 0.03% : 0.000006s : 10: substitution.incorporate_call_switch 68.13% : 0.011823s : 326: substitution.inline 1.30% : 0.000225s : 40: substitution.inline_without_move 0.66% : 0.000114s : 286: substitution.j_node_and_user_rematch 1.73% : 0.000300s : 40: substitution.less_batch_normalization 0.21% : 0.000036s : 66: substitution.load_eliminater 0.29% : 0.000051s : 10: substitution.merge_addn 0.38% : 0.000066s : 57: substitution.minmaximum_grad 0.01% : 0.000002s : 4: substitution.opt_reshape 0.19% : 0.000033s : 4: substitution.partial_defer_inline 0.61% : 0.000106s : 53: substitution.partial_eliminate 0.07% : 0.000012s : 15: substitution.reduce_all_const_elim 0.15% : 0.000027s : 11: substitution.reduce_eliminate 0.86% : 0.000149s : 286: substitution.remove_not_recompute_node 3.59% : 0.000623s : 326: substitution.replace_applicator 0.43% : 0.000075s : 162: substitution.replace_old_param 0.18% : 0.000031s : 8: substitution.reshape_eliminate 0.04% : 0.000008s : 5: substitution.set_cell_output_no_recompute 0.03% : 0.000006s : 2: substitution.specialize_transform 0.09% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.35% : 0.000061s : 24: substitution.switch_simplify 0.20% : 0.000034s : 14: substitution.tile_eliminate 0.87% : 0.000150s : 57: substitution.tuple_list_convert_item_index_to_positive 0.47% : 0.000081s : 63: substitution.tuple_list_get_item_const_eliminator 0.77% : 0.000133s : 63: substitution.tuple_list_get_item_depend_reorder 3.57% : 0.000620s : 242: substitution.tuple_list_get_item_eliminator 0.61% : 0.000106s : 63: substitution.tuple_list_get_set_item_eliminator 0.96% : 0.000166s : 178: substitution.updatestate_pure_node_eliminater 1.58% : 0.000274s : 215: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.786575 2 96.03% : 0.755313s : 1: type_inference.infer 3.97% : 0.031262s : 1: type_inference.specialize ------[replace.] 0.006030 566 0.09% : 0.000006s : 1: replace.arithmetic_simplify 0.55% : 0.000033s : 6: replace.cast_eliminate 0.86% : 0.000052s : 7: replace.depend_value_elim 0.62% : 0.000038s : 3: replace.environ_get_set_eliminate 1.94% : 0.000117s : 6: replace.getattr_setattr_resolve 50.08% : 0.003019s : 313: replace.inline 0.35% : 0.000021s : 1: replace.merge_addn 3.40% : 0.000205s : 13: replace.partial_eliminate 2.95% : 0.000178s : 10: replace.replace_applicator 4.21% : 0.000254s : 24: replace.switch_simplify 0.85% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 33.83% : 0.002040s : 175: replace.tuple_list_get_item_eliminator 0.26% : 0.000016s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.013461 566 0.08% : 0.000011s : 1: match.arithmetic_simplify 0.21% : 0.000028s : 6: match.cast_eliminate 0.02% : 0.000003s : 7: match.depend_value_elim 0.14% : 0.000019s : 3: match.environ_get_set_eliminate 8.71% : 0.001172s : 6: match.getattr_setattr_resolve 86.36% : 0.011624s : 313: match.inline 0.18% : 0.000025s : 1: match.merge_addn 0.49% : 0.000066s : 13: match.partial_eliminate 0.35% : 0.000047s : 10: match.replace_applicator 0.37% : 0.000050s : 24: match.switch_simplify 0.23% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 2.79% : 0.000376s : 175: match.tuple_list_get_item_eliminator 0.07% : 0.000009s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.015532100237 0.85% : 0.000132s : 982: predicate.accumulaten_eliminater 0.19% : 0.000030s : 116: predicate.ad_related_special_op_eliminate 0.54% : 0.000084s : 621: predicate.addn_check_dump 0.85% : 0.000132s : 982: predicate.addn_zero_filter 0.84% : 0.000131s : 982: predicate.adjust_all_reduce_mul_add 1.82% : 0.000283s : 1604: predicate.arithmetic_simplify 0.85% : 0.000132s : 989: predicate.cast_eliminate 3.28% : 0.000509s : 2476: predicate.check_bprop_eliminate 0.54% : 0.000085s : 621: predicate.compare_switch_simplify 0.06% : 0.000009s : 122: predicate.const_output_eliminate 0.11% : 0.000017s : 116: predicate.convert_tensor_all_eliminate 1.29% : 0.000200s : 1173: predicate.convert_tensor_eliminate 0.56% : 0.000087s : 624: predicate.depend_value_elim 0.91% : 0.000142s : 992: predicate.dict_get_item_const_eliminator 0.93% : 0.000145s : 992: predicate.dict_get_item_eliminator 0.92% : 0.000144s : 992: predicate.dict_set_item_eliminator 0.06% : 0.000009s : 116: predicate.elim_not_effective 0.12% : 0.000019s : 116: predicate.elim_shapecalc_of_broadcastargs 0.95% : 0.000147s : 1111: predicate.environ_add_const_eliminate 0.97% : 0.000150s : 1114: predicate.environ_get_add_eliminate 0.93% : 0.000144s : 1111: predicate.environ_get_depend_swap 1.55% : 0.000241s : 1735: predicate.environ_get_eliminate 0.92% : 0.000142s : 1114: predicate.environ_get_set_eliminate 1.32% : 0.000205s : 1500: predicate.exchange_switch_depend_value 1.67% : 0.000259s : 1500: predicate.float_depend_g_call 0.54% : 0.000084s : 621: predicate.float_environ_get_switch 0.65% : 0.000101s : 743: predicate.float_tuple_getitem_switch 0.05% : 0.000008s : 116: predicate.fold_const_symbol 0.34% : 0.000052s : 366: predicate.get_grad_eliminate 0.09% : 0.000013s : 40: predicate.getattr_setattr_resolve 0.06% : 0.000010s : 116: predicate.graph_param_transform 0.55% : 0.000085s : 621: predicate.incorporate_call 0.54% : 0.000083s : 621: predicate.incorporate_call_switch 4.20% : 0.000653s : 3713: predicate.inline 1.44% : 0.000223s : 1014: predicate.inline_without_move 0.17% : 0.000027s : 366: predicate.j_node_and_user_rematch 0.41% : 0.000064s : 374: predicate.less_batch_normalization 1.26% : 0.000195s : 1411: predicate.list_to_tuple_eliminator_ 2.08% : 0.000323s : 2412: predicate.load_eliminater 0.22% : 0.000034s : 122: predicate.loop_unroll_after_grad 2.68% : 0.000417s : 2198: predicate.loop_unroll_before_grad 1.31% : 0.000203s : 1242: predicate.make_slice_get_slice_eliminator 0.55% : 0.000085s : 623: predicate.merge_addn 2.86% : 0.000445s : 2432: predicate.micro_step_allgather_replace 2.88% : 0.000448s : 2432: predicate.mini_step_allgather_replace 0.83% : 0.000128s : 983: predicate.minmaximum_grad 0.12% : 0.000019s : 116: predicate.mutable_eliminate 0.12% : 0.000018s : 116: predicate.opt_reshape 0.12% : 0.000019s : 122: predicate.parallel_virtual_node 2.43% : 0.000377s : 1500: predicate.partial_defer_inline 1.25% : 0.000194s : 1308: predicate.partial_eliminate 0.86% : 0.000134s : 982: predicate.print_const_string_wrapper 0.54% : 0.000083s : 610: predicate.reduce_all_const_elim 1.06% : 0.000165s : 983: predicate.reduce_eliminate 0.17% : 0.000026s : 366: predicate.remove_not_recompute_node 1.98% : 0.000308s : 3625: predicate.replace_applicator 0.49% : 0.000076s : 1014: predicate.replace_old_param 0.06% : 0.000010s : 122: predicate.reset_defer_inline 0.89% : 0.000138s : 983: predicate.reshape_eliminate 2.96% : 0.000459s : 2432: predicate.row_tensor_add_zeros_like 0.12% : 0.000019s : 122: predicate.row_tensor_eliminate 3.28% : 0.000510s : 2476: predicate.same_eliminate 0.21% : 0.000032s : 417: predicate.set_cell_output_no_recompute 0.36% : 0.000055s : 366: predicate.shard_identity_eliminate 1.32% : 0.000206s : 1136: predicate.special_op_eliminate 0.62% : 0.000096s : 623: predicate.specialize_transform 3.16% : 0.000491s : 2432: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000153s : 1014: predicate.stack_unstack_eliminate 2.04% : 0.000317s : 2412: predicate.stopgrad_eliminater 0.11% : 0.000017s : 122: predicate.switch_call_monad_eliminater 1.46% : 0.000226s : 1500: predicate.switch_defer_inline 4.39% : 0.000681s : 3976: predicate.switch_layer_defer_inline 4.84% : 0.000752s : 4369: predicate.switch_simplify 0.85% : 0.000131s : 983: predicate.tile_eliminate 0.83% : 0.000129s : 983: predicate.transpose_eliminate 1.18% : 0.000183s : 1230: predicate.tuple_list_convert_item_index_to_positive 1.20% : 0.000186s : 1236: predicate.tuple_list_get_item_const_eliminator 1.08% : 0.000168s : 1236: predicate.tuple_list_get_item_depend_reorder 2.06% : 0.000320s : 2032: predicate.tuple_list_get_item_eliminator 1.13% : 0.000175s : 1236: predicate.tuple_list_get_set_item_eliminator 1.74% : 0.000271s : 1857: predicate.tuple_list_set_item_eliminator 1.25% : 0.000193s : 1411: predicate.tuple_to_list_eliminator_ 2.12% : 0.000330s : 2412: predicate.updatestate_pure_node_eliminater 2.95% : 0.000458s : 3034: predicate.updatestate_useless_node_eliminater 0.12% : 0.000019s : 122: predicate.value_based_eliminate 0.34% : 0.000053s : 366: predicate.virtual_dataset_eliminate 0.34% : 0.000052s : 366: predicate.virtual_output_eliminate 0.12% : 0.000018s : 122: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.043212 649 63.73% : 0.027540s : 290: func_graph_cloner_run.FuncGraphClonerGraph 4.77% : 0.002060s : 27: func_graph_cloner_run.FuncGraphClonerNode 31.50% : 0.013611s : 332: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.509445 280 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000139s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000628s : 1: add_recomputation 0.00% : 0.000014s : 1: assign_add_opt 0.02% : 0.002367s : 1: auto_monad 0.00% : 0.000324s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.02% : 0.002224s : 1: bootstrap 0.00% : 0.000072s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000110s : 1: convert_after_rewriter 0.00% : 0.000269s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000391s : 1: distribtued_split 0.01% : 0.000961s : 1: eliminate_special_op_node 0.00% : 0.000084s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000008s : 1: get_jit_bprop_graph 0.00% : 0.000047s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000010s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.000976s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000157s : 1: opt.transform.loop_unroll_optimizer 0.67% : 0.097129s : 162: opt.transform.opt_a 0.00% : 0.000703s : 1: opt.transform.opt_after_cconv 0.02% : 0.002857s : 27: opt.transform.opt_b 0.01% : 0.001515s : 4: opt.transform.opt_resolve 0.01% : 0.000888s : 1: opt.transform.opt_trans_graph 0.00% : 0.000405s : 3: opt.transform.special_op_eliminate 0.00% : 0.000681s : 4: opt.transform.symbol_engine_opt 1.94% : 0.281684s : 1: opt_a 0.01% : 0.001391s : 1: opt_after_cconv 0.02% : 0.003592s : 1: opt_b 2.04% : 0.296058s : 1: optimize 0.00% : 0.000124s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000076s : 1: order_py_execute_after_rewriter 0.00% : 0.000135s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000036s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000033s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000020s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.000816s : 1: pre_auto_parallel 0.01% : 0.000876s : 1: py_interpret_to_execute 0.00% : 0.000121s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.00% : 0.000529s : 1: remove_dup_value 0.52% : 0.074754s : 2: renormalize.infer 0.24% : 0.034452s : 2: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001026s : 1: rewriter_after_opt_a 0.01% : 0.002003s : 1: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000116s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000779s : 1: symbol_engine_optimizer 88.96% : 12.908224s : 1: task_emit 0.01% : 0.000922s : 1: tuple_transform 5.43% : 0.787183s : 1: type_inference 0.01% : 0.001344s : 1: validate TotalTime = 13.8801, [21] [bootstrap]: 0.00185591 [type_inference]: 0.762848 [auto_monad]: 0.00236066 [graph_reusing]: 4.00702e-05 [inline]: 1.61026e-06 [parallel-infer-symbol]: 1.78022e-06 [pre_auto_parallel]: 0.00079937 [insert-virtual-dataset]: 3.2899e-06 [parallel-infer-symbol-second]: 9.89996e-07 [dataset_repeat_opt]: 1.31037e-06 [pipeline_split]: 1.09989e-06 [optimize]: 0.285893, [52] [py_interpret_to_execute]: 0.00089299 [rewriter_before_opt_a]: 0.00194783 [opt_a]: 0.271818, [3] [Cycle 1]: 0.199937, [43] [expand_dump_flag]: 4.431e-05 [switch_simplify]: 0.00136369 [loop_unroll]: 0.00089851 [a_1]: 0.0260014 [recompute_prepare]: 0.00020139 [updatestate_depend_eliminate]: 0.0003883 [updatestate_assign_eliminate]: 0.00013474 [updatestate_loads_eliminate]: 0.00021606 [parameter_eliminate]: 1.16499e-05 [a_2]: 0.00387203 [accelerated_algorithm]: 0.00042232 [shard]: 1.95019e-06 [meta_shard_fg_expand]: 5.86403e-05 [shard_inline]: 0.00012219 [auto_parallel]: 8.19098e-05 [parallel]: 9.5102e-06 [flash_sp]: 4.90001e-05 [merge_comm]: 8.085e-05 [allreduce_fusion]: 7.07298e-05 [matmul_add_comm_reduction]: 0.00011751 [allreduce_slice_to_reducescatter]: 3.7998e-07 [virtual_shard_identity]: 0.00012875 [virtual_dataset]: 0.00012172 [get_grad_eliminate_]: 0.00012205 [virtual_output]: 0.00012067 [merge_forward]: 0.00010889 [cell_reuse_recompute_pass]: 2.03028e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022441 [before_grad]: 0.00021842 [inplace_validation]: 0.00014106 [meta_fg_expand]: 0.0484635 [inplace_validation_after_expand]: 0.00061003 [flash_sp_send_recv_attached]: 6.19981e-06 [receive_attached]: 2.71993e-06 [after_resolve]: 0.00094408 [a_after_grad]: 0.00149679 [special_op_eliminate]: 0.00076262 [renormalize]: 0.0907032 [add_forward_monad_depend]: 0.00028856 [auto_monad_grad]: 0.00010257 [auto_monad_eliminator]: 0.00118966 [cse]: 0.00308089 [a_3]: 0.0164612 [Cycle 2]: 0.0555211, [43] [expand_dump_flag]: 3.20598e-05 [switch_simplify]: 0.0010319 [loop_unroll]: 0.00101742 [a_1]: 0.0256927 [recompute_prepare]: 0.00014966 [updatestate_depend_eliminate]: 0.00015757 [updatestate_assign_eliminate]: 8.75299e-05 [updatestate_loads_eliminate]: 0.0001417 [parameter_eliminate]: 2.90014e-06 [a_2]: 0.00385627 [accelerated_algorithm]: 0.00014467 [shard]: 1.78022e-06 [meta_shard_fg_expand]: 5.57504e-05 [shard_inline]: 0.00012245 [auto_parallel]: 9.98001e-05 [parallel]: 9.32999e-06 [flash_sp]: 3.36999e-06 [merge_comm]: 9.06102e-05 [allreduce_fusion]: 8.17203e-05 [matmul_add_comm_reduction]: 0.0001064 [allreduce_slice_to_reducescatter]: 4.00003e-07 [virtual_shard_identity]: 0.00012448 [virtual_dataset]: 0.00011907 [get_grad_eliminate_]: 0.00011698 [virtual_output]: 0.00011837 [merge_forward]: 8.01003e-05 [cell_reuse_recompute_pass]: 2.08989e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021883 [before_grad]: 0.00021026 [inplace_validation]: 7.348e-05 [meta_fg_expand]: 0.00149691 [inplace_validation_after_expand]: 0.00076671 [flash_sp_send_recv_attached]: 1.93994e-06 [receive_attached]: 1.37975e-06 [after_resolve]: 0.00015647 [a_after_grad]: 0.00020279 [special_op_eliminate]: 0.00011982 [renormalize]: 0.0130631 [add_forward_monad_depend]: 4.09968e-06 [auto_monad_grad]: 2.2701e-06 [auto_monad_eliminator]: 0.00027323 [cse]: 0.00425439 [a_3]: 0.00086985 [Cycle 3]: 0.0104354, [43] [expand_dump_flag]: 1.82027e-06 [switch_simplify]: 0.0001212 [loop_unroll]: 0.00013564 [a_1]: 0.00377057 [recompute_prepare]: 0.00017711 [updatestate_depend_eliminate]: 0.0001351 [updatestate_assign_eliminate]: 8.31196e-05 [updatestate_loads_eliminate]: 8.22898e-05 [parameter_eliminate]: 2.2701e-06 [a_2]: 0.0018809 [accelerated_algorithm]: 0.00014337 [shard]: 1.29035e-06 [meta_shard_fg_expand]: 4.17801e-05 [shard_inline]: 0.00012209 [auto_parallel]: 9.67598e-05 [parallel]: 7.98982e-06 [flash_sp]: 2.02004e-06 [merge_comm]: 9.15197e-05 [allreduce_fusion]: 8.45301e-05 [matmul_add_comm_reduction]: 0.00010833 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 0.00012355 [virtual_dataset]: 0.00011818 [get_grad_eliminate_]: 0.00011523 [virtual_output]: 0.00011624 [merge_forward]: 8.49403e-05 [cell_reuse_recompute_pass]: 2.42004e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022028 [before_grad]: 0.00022145 [inplace_validation]: 8.10102e-05 [meta_fg_expand]: 9.46401e-05 [inplace_validation_after_expand]: 9.61903e-05 [flash_sp_send_recv_attached]: 1.24006e-06 [receive_attached]: 8.29808e-07 [after_resolve]: 0.00013664 [a_after_grad]: 0.00019749 [special_op_eliminate]: 0.00011864 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 1.69035e-06 [auto_monad_grad]: 1.74996e-06 [auto_monad_eliminator]: 0.00014911 [cse]: 0.00035661 [a_3]: 0.00085422 [py_interpret_to_execute_after_opt_a]: 0.0001112 [slice_cell_reuse_recomputed_activation]: 2.21003e-06 [rewriter_after_opt_a]: 0.00091548 [convert_after_rewriter]: 9.89302e-05 [order_py_execute_after_rewriter]: 7.31302e-05 [opt_b]: 0.00357608, [1] [Cycle 1]: 0.00356908, [7] [b_1]: 0.00280139 [b_2]: 0.00012299 [updatestate_depend_eliminate]: 8.584e-05 [updatestate_assign_eliminate]: 7.71997e-05 [updatestate_loads_eliminate]: 7.99401e-05 [renormalize]: 4.70318e-07 [cse]: 0.00035115 [optimize_parallel_all_gather_comm]: 0.00012192 [overlap_param_gather]: 2.759e-05 [cconv]: 5.84698e-05 [loop_unroll]: 0.00085317 [opt_after_cconv]: 0.00138594, [1] [Cycle 1]: 0.00137934, [7] [c_1]: 0.00070346 [parameter_eliminate]: 1.82027e-06 [updatestate_depend_eliminate]: 0.00010814 [updatestate_assign_eliminate]: 8.21399e-05 [updatestate_loads_eliminate]: 8.16598e-05 [cse]: 0.00035318 [renormalize]: 3.59956e-07 [remove_dup_value]: 0.00050195 [tuple_transform]: 0.00085448, [1] [Cycle 1]: 0.00084871, [2] [d_1]: 0.00083378 [renormalize]: 3.29688e-07 [partial_unused_args_eliminate]: 2.15974e-06 [add_cache_embedding]: 0.00013184 [add_recomputation]: 0.00060762 [cse_after_recomputation]: 0.00026675, [1] [Cycle 1]: 0.00025995, [1] [cse]: 0.00024946 [environ_conv]: 7.63303e-05 [swap_dp_allreduce_reducescatter]: 0.00011223 [bias_add_comm_swap]: 2.46009e-06 [label_micro_interleaved_index]: 1.50036e-06 [label_fine_grained_interleaved_index]: 1.33971e-06 [merge_cast_opt]: 7.79983e-07 [slice_recompute_activation]: 1.45007e-06 [micro_interleaved_order_control]: 1.14972e-06 [assign_add_opt]: 9.79006e-06 [ForceFp32Comm]: 5.99772e-07 [remove_cast_before_assign_add]: 6.3004e-07 [full_micro_interleaved_order_control]: 1.24983e-06 [reorder_send_recv_between_fp_bp]: 1.28988e-06 [comm_op_add_attrs]: 6.40284e-07 [add_comm_op_reuse_tag]: 5.89993e-07 [interleave_split_concat_branches]: 8.60076e-07 [interleave_parallel_branches]: 5.49946e-07 [overlap_opt_shard_in_pipeline]: 3.494e-05 [overlap_opt_shard_grad_in_pipeline]: 1.27964e-06 [control_data_broadcast_order]: 1.0198e-06 [grouped_pairwise_exchange_alltoall]: 9.80217e-07 [offloading_packed_experts]: 6.9011e-07 [overlap_recompute_and_grad_model_parallel]: 1.43982e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.20144e-07 [overlap_recompute_allgather_and_fa_grad]: 9.80217e-07 [overlap_grad_ring_attention]: 1.53016e-06 [overlap_grad_flash_sp]: 0.0001432 [begin_end_overlap_inline]: 5.89993e-07 [split_matmul_comm_elemetwise]: 1.64984e-06 [split_layernorm_comm]: 1.18976e-06 [handle_group_info]: 9.79751e-07 [symbol_engine_optimizer]: 0.00074928, [1] [Cycle 1]: 0.0007434, [6] [build]: 3.98397e-05 [elim_shapecalc]: 0.0001299 [elim_not_effective]: 0.00019813 [opt_reshape]: 0.00011777 [fold_const_symbol]: 0.00022133 [renormalize]: 3.7998e-07 [pipeline_parallel_scheduler]: 1.45007e-06 [auto_monad_reorder]: 0.00035213 [get_jit_bprop_graph]: 8.49832e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00092304 [distribtued_split]: 0.00038136 [validate]: 0.00029384 [task_emit]: 12.823 [execute]: 1.04201e-05 Sums bootstrap : 0.001856s : 0.01% type_inference : 0.762848s : 5.50% auto_monad : 0.002361s : 0.02% graph_reusing : 0.000040s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000799s : 0.01% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000893s : 0.01% optimize.rewriter_before_opt_a : 0.001948s : 0.01% optimize.opt_a.expand_dump_flag : 0.000078s : 0.00% optimize.opt_a.switch_simplify : 0.002517s : 0.02% optimize.opt_a.loop_unroll : 0.002052s : 0.01% optimize.opt_a.a_1 : 0.055465s : 0.40% optimize.opt_a.recompute_prepare : 0.000528s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000681s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000305s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000440s : 0.00% optimize.opt_a.parameter_eliminate : 0.000017s : 0.00% optimize.opt_a.a_2 : 0.009609s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000710s : 0.01% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000156s : 0.00% optimize.opt_a.shard_inline : 0.000367s : 0.00% optimize.opt_a.auto_parallel : 0.000278s : 0.00% optimize.opt_a.parallel : 0.000027s : 0.00% optimize.opt_a.flash_sp : 0.000054s : 0.00% optimize.opt_a.merge_comm : 0.000263s : 0.00% optimize.opt_a.allreduce_fusion : 0.000237s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000332s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000377s : 0.00% optimize.opt_a.virtual_dataset : 0.000359s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000354s : 0.00% optimize.opt_a.virtual_output : 0.000355s : 0.00% optimize.opt_a.merge_forward : 0.000274s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000007s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000664s : 0.00% optimize.opt_a.before_grad : 0.000650s : 0.00% optimize.opt_a.inplace_validation : 0.000296s : 0.00% optimize.opt_a.meta_fg_expand : 0.050055s : 0.36% optimize.opt_a.inplace_validation_after_expand : 0.001473s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000009s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.001237s : 0.01% optimize.opt_a.a_after_grad : 0.001897s : 0.01% optimize.opt_a.special_op_eliminate : 0.001001s : 0.01% optimize.opt_a.renormalize : 0.103766s : 0.75% optimize.opt_a.add_forward_monad_depend : 0.000294s : 0.00% optimize.opt_a.auto_monad_grad : 0.000107s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001612s : 0.01% optimize.opt_a.cse : 0.007692s : 0.06% optimize.opt_a.a_3 : 0.018185s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000111s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000915s : 0.01% optimize.convert_after_rewriter : 0.000099s : 0.00% optimize.order_py_execute_after_rewriter : 0.000073s : 0.00% optimize.opt_b.b_1 : 0.002801s : 0.02% optimize.opt_b.b_2 : 0.000123s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000086s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000077s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000080s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000351s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000122s : 0.00% optimize.overlap_param_gather : 0.000028s : 0.00% optimize.cconv : 0.000058s : 0.00% optimize.loop_unroll : 0.000853s : 0.01% optimize.opt_after_cconv.c_1 : 0.000703s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000108s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000082s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000082s : 0.00% optimize.opt_after_cconv.cse : 0.000353s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000502s : 0.00% optimize.tuple_transform.d_1 : 0.000834s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000132s : 0.00% optimize.add_recomputation : 0.000608s : 0.00% optimize.cse_after_recomputation.cse : 0.000249s : 0.00% optimize.environ_conv : 0.000076s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000112s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000010s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000035s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000143s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000040s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000130s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000198s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000118s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000221s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000352s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000923s : 0.01% distribtued_split : 0.000381s : 0.00% validate : 0.000294s : 0.00% task_emit : 12.823003s : 92.44% execute : 0.000010s : 0.00% Time group info: ------[substitution.] 0.016715 3191 0.11% : 0.000019s : 9: substitution.addn_check_dump 0.33% : 0.000056s : 8: substitution.addn_zero_filter 0.09% : 0.000015s : 8: substitution.adjust_all_reduce_mul_add 1.36% : 0.000227s : 59: substitution.arithmetic_simplify 0.45% : 0.000075s : 11: substitution.cast_eliminate 0.24% : 0.000040s : 35: substitution.depend_value_elim 0.17% : 0.000028s : 97: substitution.elim_not_effective 0.01% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.13% : 0.000022s : 6: substitution.environ_get_add_eliminate 0.03% : 0.000004s : 3: substitution.environ_get_depend_swap 0.09% : 0.000014s : 12: substitution.environ_get_eliminate 0.13% : 0.000022s : 6: substitution.environ_get_set_eliminate 0.24% : 0.000040s : 53: substitution.float_depend_g_call 0.03% : 0.000005s : 6: substitution.float_environ_get_switch 0.03% : 0.000005s : 4: substitution.float_tuple_getitem_switch 0.21% : 0.000035s : 97: substitution.fold_const_symbol 8.02% : 0.001341s : 8: substitution.getattr_setattr_resolve 0.45% : 0.000075s : 116: substitution.graph_param_transform 0.04% : 0.000007s : 10: substitution.incorporate_call 0.03% : 0.000005s : 10: substitution.incorporate_call_switch 67.48% : 0.011280s : 326: substitution.inline 1.30% : 0.000218s : 40: substitution.inline_without_move 0.74% : 0.000123s : 286: substitution.j_node_and_user_rematch 1.78% : 0.000297s : 40: substitution.less_batch_normalization 0.31% : 0.000051s : 66: substitution.load_eliminater 0.27% : 0.000044s : 10: substitution.merge_addn 0.39% : 0.000065s : 57: substitution.minmaximum_grad 0.01% : 0.000002s : 4: substitution.opt_reshape 0.22% : 0.000036s : 4: substitution.partial_defer_inline 0.58% : 0.000096s : 53: substitution.partial_eliminate 0.06% : 0.000011s : 15: substitution.reduce_all_const_elim 0.14% : 0.000024s : 11: substitution.reduce_eliminate 0.87% : 0.000146s : 286: substitution.remove_not_recompute_node 3.52% : 0.000588s : 326: substitution.replace_applicator 0.44% : 0.000074s : 162: substitution.replace_old_param 0.18% : 0.000029s : 8: substitution.reshape_eliminate 0.04% : 0.000007s : 5: substitution.set_cell_output_no_recompute 0.03% : 0.000005s : 2: substitution.specialize_transform 0.09% : 0.000015s : 12: substitution.split_environ_get_set_with_tuple_value 0.29% : 0.000048s : 24: substitution.switch_simplify 0.19% : 0.000032s : 14: substitution.tile_eliminate 0.89% : 0.000149s : 57: substitution.tuple_list_convert_item_index_to_positive 0.48% : 0.000080s : 63: substitution.tuple_list_get_item_const_eliminator 0.78% : 0.000131s : 63: substitution.tuple_list_get_item_depend_reorder 3.65% : 0.000610s : 242: substitution.tuple_list_get_item_eliminator 0.63% : 0.000105s : 63: substitution.tuple_list_get_set_item_eliminator 0.99% : 0.000165s : 178: substitution.updatestate_pure_node_eliminater 1.48% : 0.000247s : 215: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.762240 2 96.02% : 0.731895s : 1: type_inference.infer 3.98% : 0.030346s : 1: type_inference.specialize ------[replace.] 0.006004 566 0.10% : 0.000006s : 1: replace.arithmetic_simplify 0.57% : 0.000034s : 6: replace.cast_eliminate 0.84% : 0.000051s : 7: replace.depend_value_elim 0.64% : 0.000038s : 3: replace.environ_get_set_eliminate 1.66% : 0.000100s : 6: replace.getattr_setattr_resolve 49.42% : 0.002967s : 313: replace.inline 0.34% : 0.000020s : 1: replace.merge_addn 3.38% : 0.000203s : 13: replace.partial_eliminate 2.83% : 0.000170s : 10: replace.replace_applicator 4.07% : 0.000245s : 24: replace.switch_simplify 0.82% : 0.000049s : 6: replace.tuple_list_get_item_depend_reorder 35.08% : 0.002106s : 175: replace.tuple_list_get_item_eliminator 0.25% : 0.000015s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.012929 566 0.07% : 0.000009s : 1: match.arithmetic_simplify 0.36% : 0.000047s : 6: match.cast_eliminate 0.02% : 0.000003s : 7: match.depend_value_elim 0.12% : 0.000016s : 3: match.environ_get_set_eliminate 9.44% : 0.001220s : 6: match.getattr_setattr_resolve 85.60% : 0.011067s : 313: match.inline 0.16% : 0.000020s : 1: match.merge_addn 0.45% : 0.000058s : 13: match.partial_eliminate 0.35% : 0.000045s : 10: match.replace_applicator 0.29% : 0.000038s : 24: match.switch_simplify 0.23% : 0.000029s : 6: match.tuple_list_get_item_depend_reorder 2.86% : 0.000369s : 175: match.tuple_list_get_item_eliminator 0.05% : 0.000007s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.015602100237 0.84% : 0.000131s : 982: predicate.accumulaten_eliminater 0.20% : 0.000031s : 116: predicate.ad_related_special_op_eliminate 0.55% : 0.000086s : 621: predicate.addn_check_dump 0.88% : 0.000138s : 982: predicate.addn_zero_filter 0.85% : 0.000132s : 982: predicate.adjust_all_reduce_mul_add 1.87% : 0.000292s : 1604: predicate.arithmetic_simplify 0.92% : 0.000143s : 989: predicate.cast_eliminate 2.88% : 0.000449s : 2476: predicate.check_bprop_eliminate 0.56% : 0.000087s : 621: predicate.compare_switch_simplify 0.06% : 0.000010s : 122: predicate.const_output_eliminate 0.11% : 0.000017s : 116: predicate.convert_tensor_all_eliminate 1.29% : 0.000202s : 1173: predicate.convert_tensor_eliminate 0.57% : 0.000089s : 624: predicate.depend_value_elim 1.00% : 0.000155s : 992: predicate.dict_get_item_const_eliminator 0.99% : 0.000155s : 992: predicate.dict_get_item_eliminator 1.00% : 0.000156s : 992: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 116: predicate.elim_not_effective 0.12% : 0.000019s : 116: predicate.elim_shapecalc_of_broadcastargs 0.95% : 0.000148s : 1111: predicate.environ_add_const_eliminate 0.97% : 0.000151s : 1114: predicate.environ_get_add_eliminate 0.94% : 0.000146s : 1111: predicate.environ_get_depend_swap 1.56% : 0.000243s : 1735: predicate.environ_get_eliminate 0.96% : 0.000150s : 1114: predicate.environ_get_set_eliminate 1.31% : 0.000205s : 1500: predicate.exchange_switch_depend_value 1.68% : 0.000263s : 1500: predicate.float_depend_g_call 0.55% : 0.000085s : 621: predicate.float_environ_get_switch 0.66% : 0.000103s : 743: predicate.float_tuple_getitem_switch 0.05% : 0.000008s : 116: predicate.fold_const_symbol 0.34% : 0.000053s : 366: predicate.get_grad_eliminate 0.08% : 0.000012s : 40: predicate.getattr_setattr_resolve 0.06% : 0.000010s : 116: predicate.graph_param_transform 0.55% : 0.000085s : 621: predicate.incorporate_call 0.54% : 0.000085s : 621: predicate.incorporate_call_switch 4.22% : 0.000659s : 3713: predicate.inline 1.44% : 0.000225s : 1014: predicate.inline_without_move 0.17% : 0.000027s : 366: predicate.j_node_and_user_rematch 0.41% : 0.000065s : 374: predicate.less_batch_normalization 1.31% : 0.000204s : 1411: predicate.list_to_tuple_eliminator_ 2.13% : 0.000332s : 2412: predicate.load_eliminater 0.23% : 0.000036s : 122: predicate.loop_unroll_after_grad 2.72% : 0.000424s : 2198: predicate.loop_unroll_before_grad 1.13% : 0.000177s : 1242: predicate.make_slice_get_slice_eliminator 0.57% : 0.000088s : 623: predicate.merge_addn 2.75% : 0.000430s : 2432: predicate.micro_step_allgather_replace 2.76% : 0.000431s : 2432: predicate.mini_step_allgather_replace 0.88% : 0.000138s : 983: predicate.minmaximum_grad 0.12% : 0.000019s : 116: predicate.mutable_eliminate 0.11% : 0.000017s : 116: predicate.opt_reshape 0.12% : 0.000019s : 122: predicate.parallel_virtual_node 2.44% : 0.000380s : 1500: predicate.partial_defer_inline 1.26% : 0.000197s : 1308: predicate.partial_eliminate 0.97% : 0.000151s : 982: predicate.print_const_string_wrapper 0.55% : 0.000086s : 610: predicate.reduce_all_const_elim 1.11% : 0.000173s : 983: predicate.reduce_eliminate 0.17% : 0.000027s : 366: predicate.remove_not_recompute_node 2.02% : 0.000315s : 3625: predicate.replace_applicator 0.48% : 0.000075s : 1014: predicate.replace_old_param 0.06% : 0.000010s : 122: predicate.reset_defer_inline 0.85% : 0.000133s : 983: predicate.reshape_eliminate 2.92% : 0.000455s : 2432: predicate.row_tensor_add_zeros_like 0.13% : 0.000020s : 122: predicate.row_tensor_eliminate 2.98% : 0.000465s : 2476: predicate.same_eliminate 0.21% : 0.000032s : 417: predicate.set_cell_output_no_recompute 0.35% : 0.000055s : 366: predicate.shard_identity_eliminate 1.52% : 0.000237s : 1136: predicate.special_op_eliminate 0.63% : 0.000099s : 623: predicate.specialize_transform 3.01% : 0.000470s : 2432: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000155s : 1014: predicate.stack_unstack_eliminate 2.08% : 0.000324s : 2412: predicate.stopgrad_eliminater 0.11% : 0.000017s : 122: predicate.switch_call_monad_eliminater 1.46% : 0.000228s : 1500: predicate.switch_defer_inline 4.29% : 0.000669s : 3976: predicate.switch_layer_defer_inline 4.79% : 0.000748s : 4369: predicate.switch_simplify 0.94% : 0.000147s : 983: predicate.tile_eliminate 0.84% : 0.000131s : 983: predicate.transpose_eliminate 1.20% : 0.000187s : 1230: predicate.tuple_list_convert_item_index_to_positive 1.21% : 0.000189s : 1236: predicate.tuple_list_get_item_const_eliminator 1.09% : 0.000171s : 1236: predicate.tuple_list_get_item_depend_reorder 2.12% : 0.000331s : 2032: predicate.tuple_list_get_item_eliminator 1.11% : 0.000173s : 1236: predicate.tuple_list_get_set_item_eliminator 1.77% : 0.000276s : 1857: predicate.tuple_list_set_item_eliminator 1.29% : 0.000202s : 1411: predicate.tuple_to_list_eliminator_ 2.09% : 0.000326s : 2412: predicate.updatestate_pure_node_eliminater 2.98% : 0.000465s : 3034: predicate.updatestate_useless_node_eliminater 0.12% : 0.000019s : 122: predicate.value_based_eliminate 0.34% : 0.000053s : 366: predicate.virtual_dataset_eliminate 0.34% : 0.000052s : 366: predicate.virtual_output_eliminate 0.12% : 0.000019s : 122: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.039533 649 63.10% : 0.024944s : 290: func_graph_cloner_run.FuncGraphClonerGraph 4.66% : 0.001843s : 27: func_graph_cloner_run.FuncGraphClonerNode 32.24% : 0.012746s : 332: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.372888 280 0.00% : 0.000003s : 1: ForceFp32Comm 0.00% : 0.000139s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.00% : 0.000618s : 1: add_recomputation 0.00% : 0.000013s : 1: assign_add_opt 0.02% : 0.002384s : 1: auto_monad 0.00% : 0.000366s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001901s : 1: bootstrap 0.00% : 0.000065s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000105s : 1: convert_after_rewriter 0.00% : 0.000311s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000395s : 1: distribtued_split 0.01% : 0.000938s : 1: eliminate_special_op_node 0.00% : 0.000085s : 1: environ_conv 0.00% : 0.000019s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000049s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.000863s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.00% : 0.000198s : 1: opt.transform.loop_unroll_optimizer 0.67% : 0.096090s : 162: opt.transform.opt_a 0.00% : 0.000701s : 1: opt.transform.opt_after_cconv 0.02% : 0.002896s : 27: opt.transform.opt_b 0.01% : 0.001546s : 4: opt.transform.opt_resolve 0.01% : 0.000831s : 1: opt.transform.opt_trans_graph 0.00% : 0.000384s : 3: opt.transform.special_op_eliminate 0.00% : 0.000662s : 4: opt.transform.symbol_engine_opt 1.89% : 0.271823s : 1: opt_a 0.01% : 0.001391s : 1: opt_after_cconv 0.02% : 0.003581s : 1: opt_b 1.99% : 0.285904s : 1: optimize 0.00% : 0.000129s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000078s : 1: order_py_execute_after_rewriter 0.00% : 0.000149s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000040s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000033s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000007s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.000817s : 1: pre_auto_parallel 0.01% : 0.000944s : 1: py_interpret_to_execute 0.00% : 0.000119s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.00% : 0.000512s : 1: remove_dup_value 0.49% : 0.070931s : 2: renormalize.infer 0.23% : 0.032812s : 2: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000925s : 1: rewriter_after_opt_a 0.01% : 0.001960s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000118s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000753s : 1: symbol_engine_optimizer 89.22% : 12.823038s : 1: task_emit 0.01% : 0.000858s : 1: tuple_transform 5.31% : 0.762883s : 1: type_inference 0.01% : 0.001353s : 1: validate TotalTime = 13.8666, [21] [bootstrap]: 0.00204073 [type_inference]: 0.776117 [auto_monad]: 0.00243198 [graph_reusing]: 3.97102e-05 [inline]: 1.72015e-06 [parallel-infer-symbol]: 1.56402e-05 [pre_auto_parallel]: 0.00081482 [insert-virtual-dataset]: 4.84008e-06 [parallel-infer-symbol-second]: 1.17021e-06 [dataset_repeat_opt]: 1.55997e-06 [pipeline_split]: 1.95019e-06 [optimize]: 0.288462, [52] [py_interpret_to_execute]: 0.00083583 [rewriter_before_opt_a]: 0.00199937 [opt_a]: 0.274248, [3] [Cycle 1]: 0.202082, [43] [expand_dump_flag]: 4.48702e-05 [switch_simplify]: 0.00138968 [loop_unroll]: 0.00089683 [a_1]: 0.0256713 [recompute_prepare]: 0.00018328 [updatestate_depend_eliminate]: 0.0003847 [updatestate_assign_eliminate]: 0.00012181 [updatestate_loads_eliminate]: 0.00022312 [parameter_eliminate]: 1.37701e-05 [a_2]: 0.00388906 [accelerated_algorithm]: 0.00041963 [shard]: 2.71993e-06 [meta_shard_fg_expand]: 5.81401e-05 [shard_inline]: 0.00012321 [auto_parallel]: 8.34302e-05 [parallel]: 1.22199e-05 [flash_sp]: 4.39999e-05 [merge_comm]: 8.44602e-05 [allreduce_fusion]: 7.33701e-05 [matmul_add_comm_reduction]: 0.00012953 [allreduce_slice_to_reducescatter]: 5.80214e-07 [virtual_shard_identity]: 0.00012979 [virtual_dataset]: 0.00012185 [get_grad_eliminate_]: 0.00012083 [virtual_output]: 0.00011957 [merge_forward]: 0.00010332 [cell_reuse_recompute_pass]: 2.37022e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022975 [before_grad]: 0.00022035 [inplace_validation]: 0.0001448 [meta_fg_expand]: 0.0486633 [inplace_validation_after_expand]: 0.00063227 [flash_sp_send_recv_attached]: 8.42009e-06 [receive_attached]: 1.73901e-05 [after_resolve]: 0.00094684 [a_after_grad]: 0.00150174 [special_op_eliminate]: 0.00073196 [renormalize]: 0.0931945 [add_forward_monad_depend]: 0.00028105 [auto_monad_grad]: 9.976e-05 [auto_monad_eliminator]: 0.00117607 [cse]: 0.00314386 [a_3]: 0.0161057 [Cycle 2]: 0.0558742, [43] [expand_dump_flag]: 3.14401e-05 [switch_simplify]: 0.00102709 [loop_unroll]: 0.00101752 [a_1]: 0.0259375 [recompute_prepare]: 0.00014795 [updatestate_depend_eliminate]: 0.00016299 [updatestate_assign_eliminate]: 9.05101e-05 [updatestate_loads_eliminate]: 0.00015133 [parameter_eliminate]: 3.77977e-06 [a_2]: 0.00385784 [accelerated_algorithm]: 0.00015366 [shard]: 1.66986e-06 [meta_shard_fg_expand]: 5.47497e-05 [shard_inline]: 0.0001231 [auto_parallel]: 0.00010144 [parallel]: 1.08397e-05 [flash_sp]: 4.00981e-06 [merge_comm]: 9.38401e-05 [allreduce_fusion]: 8.35201e-05 [matmul_add_comm_reduction]: 0.00010761 [allreduce_slice_to_reducescatter]: 4.69852e-07 [virtual_shard_identity]: 0.00012343 [virtual_dataset]: 0.00011879 [get_grad_eliminate_]: 0.00011676 [virtual_output]: 0.00011771 [merge_forward]: 7.92299e-05 [cell_reuse_recompute_pass]: 2.2701e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0002238 [before_grad]: 0.00023177 [inplace_validation]: 7.786e-05 [meta_fg_expand]: 0.00150779 [inplace_validation_after_expand]: 0.00069767 [flash_sp_send_recv_attached]: 1.95019e-06 [receive_attached]: 1.81003e-06 [after_resolve]: 0.00015695 [a_after_grad]: 0.00021237 [special_op_eliminate]: 0.00011964 [renormalize]: 0.0132222 [add_forward_monad_depend]: 4.35021e-06 [auto_monad_grad]: 2.10013e-06 [auto_monad_eliminator]: 0.00027868 [cse]: 0.00416066 [a_3]: 0.00086402 [Cycle 3]: 0.0104114, [43] [expand_dump_flag]: 1.62004e-06 [switch_simplify]: 0.00011945 [loop_unroll]: 0.00011582 [a_1]: 0.00380259 [recompute_prepare]: 0.00012244 [updatestate_depend_eliminate]: 0.00013482 [updatestate_assign_eliminate]: 8.55201e-05 [updatestate_loads_eliminate]: 8.504e-05 [parameter_eliminate]: 2.84007e-06 [a_2]: 0.00186879 [accelerated_algorithm]: 0.00016032 [shard]: 1.26986e-06 [meta_shard_fg_expand]: 4.18196e-05 [shard_inline]: 0.00012016 [auto_parallel]: 0.00010251 [parallel]: 7.79983e-06 [flash_sp]: 2.04006e-06 [merge_comm]: 9.47001e-05 [allreduce_fusion]: 8.561e-05 [matmul_add_comm_reduction]: 0.00010624 [allreduce_slice_to_reducescatter]: 5.0012e-07 [virtual_shard_identity]: 0.0001251 [virtual_dataset]: 0.00011787 [get_grad_eliminate_]: 0.0001138 [virtual_output]: 0.00011525 [merge_forward]: 8.27103e-05 [cell_reuse_recompute_pass]: 2.44984e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022152 [before_grad]: 0.00021191 [inplace_validation]: 7.98097e-05 [meta_fg_expand]: 9.41199e-05 [inplace_validation_after_expand]: 9.68901e-05 [flash_sp_send_recv_attached]: 1.43982e-06 [receive_attached]: 8.30274e-07 [after_resolve]: 0.00013657 [a_after_grad]: 0.00019697 [special_op_eliminate]: 0.00011645 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 2.2999e-06 [auto_monad_grad]: 1.53016e-06 [auto_monad_eliminator]: 0.00015698 [cse]: 0.00035754 [a_3]: 0.00084301 [py_interpret_to_execute_after_opt_a]: 0.00012453 [slice_cell_reuse_recomputed_activation]: 3.2098e-06 [rewriter_after_opt_a]: 0.00094647 [convert_after_rewriter]: 0.00010317 [order_py_execute_after_rewriter]: 7.41798e-05 [opt_b]: 0.00358232, [1] [Cycle 1]: 0.00357369, [7] [b_1]: 0.00279053 [b_2]: 0.00012568 [updatestate_depend_eliminate]: 8.906e-05 [updatestate_assign_eliminate]: 7.83601e-05 [updatestate_loads_eliminate]: 8.388e-05 [renormalize]: 4.69852e-07 [cse]: 0.0003512 [optimize_parallel_all_gather_comm]: 0.00011736 [overlap_param_gather]: 2.5e-05 [cconv]: 6.33998e-05 [loop_unroll]: 0.00084216 [opt_after_cconv]: 0.00139454, [1] [Cycle 1]: 0.00138726, [7] [c_1]: 0.00070239 [parameter_eliminate]: 2.7502e-06 [updatestate_depend_eliminate]: 0.00011136 [updatestate_assign_eliminate]: 8.31001e-05 [updatestate_loads_eliminate]: 8.55201e-05 [cse]: 0.00034972 [renormalize]: 5.50412e-07 [remove_dup_value]: 0.00050477 [tuple_transform]: 0.00090478, [1] [Cycle 1]: 0.00089787, [2] [d_1]: 0.00087856 [renormalize]: 4.80097e-07 [partial_unused_args_eliminate]: 3.18e-06 [add_cache_embedding]: 0.00013154 [add_recomputation]: 0.00061944 [cse_after_recomputation]: 0.00026812, [1] [Cycle 1]: 0.00026037, [1] [cse]: 0.00024776 [environ_conv]: 7.32099e-05 [swap_dp_allreduce_reducescatter]: 0.0001137 [bias_add_comm_swap]: 3.02028e-06 [label_micro_interleaved_index]: 2.12993e-06 [label_fine_grained_interleaved_index]: 2.56998e-06 [merge_cast_opt]: 1.42027e-06 [slice_recompute_activation]: 1.87987e-06 [micro_interleaved_order_control]: 1.97999e-06 [assign_add_opt]: 1.09598e-05 [ForceFp32Comm]: 8.89879e-07 [remove_cast_before_assign_add]: 1.09011e-06 [full_micro_interleaved_order_control]: 2.18023e-06 [reorder_send_recv_between_fp_bp]: 2.04984e-06 [comm_op_add_attrs]: 1.10967e-06 [add_comm_op_reuse_tag]: 1.04029e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 9.60194e-07 [overlap_opt_shard_in_pipeline]: 2.744e-05 [overlap_opt_shard_grad_in_pipeline]: 2.16998e-06 [control_data_broadcast_order]: 1.09011e-06 [grouped_pairwise_exchange_alltoall]: 1.32993e-06 [offloading_packed_experts]: 1.09989e-06 [overlap_recompute_and_grad_model_parallel]: 1.76998e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.49832e-07 [overlap_recompute_allgather_and_fa_grad]: 1.24006e-06 [overlap_grad_ring_attention]: 1.6503e-06 [overlap_grad_flash_sp]: 0.00015532 [begin_end_overlap_inline]: 8.00006e-07 [split_matmul_comm_elemetwise]: 2.16998e-06 [split_layernorm_comm]: 1.91014e-06 [handle_group_info]: 9.30391e-07 [symbol_engine_optimizer]: 0.00078314, [1] [Cycle 1]: 0.00077643, [6] [build]: 4.05498e-05 [elim_shapecalc]: 0.00013252 [elim_not_effective]: 0.00020568 [opt_reshape]: 0.00013359 [fold_const_symbol]: 0.00022537 [renormalize]: 4.70318e-07 [pipeline_parallel_scheduler]: 2.09035e-06 [auto_monad_reorder]: 0.00034702 [get_jit_bprop_graph]: 5.49946e-07 [rewriter_after_jit_bprop_graph]: 4.80097e-07 [eliminate_special_op_node]: 0.00097429 [distribtued_split]: 0.00037947 [validate]: 0.00029829 [task_emit]: 12.7932 [execute]: 1.38902e-05 Sums bootstrap : 0.002041s : 0.01% type_inference : 0.776117s : 5.60% auto_monad : 0.002432s : 0.02% graph_reusing : 0.000040s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000016s : 0.00% pre_auto_parallel : 0.000815s : 0.01% insert-virtual-dataset : 0.000005s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000836s : 0.01% optimize.rewriter_before_opt_a : 0.001999s : 0.01% optimize.opt_a.expand_dump_flag : 0.000078s : 0.00% optimize.opt_a.switch_simplify : 0.002536s : 0.02% optimize.opt_a.loop_unroll : 0.002030s : 0.01% optimize.opt_a.a_1 : 0.055411s : 0.40% optimize.opt_a.recompute_prepare : 0.000454s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000683s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000298s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000459s : 0.00% optimize.opt_a.parameter_eliminate : 0.000020s : 0.00% optimize.opt_a.a_2 : 0.009616s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000734s : 0.01% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000155s : 0.00% optimize.opt_a.shard_inline : 0.000366s : 0.00% optimize.opt_a.auto_parallel : 0.000287s : 0.00% optimize.opt_a.parallel : 0.000031s : 0.00% optimize.opt_a.flash_sp : 0.000050s : 0.00% optimize.opt_a.merge_comm : 0.000273s : 0.00% optimize.opt_a.allreduce_fusion : 0.000243s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000343s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000378s : 0.00% optimize.opt_a.virtual_dataset : 0.000359s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000351s : 0.00% optimize.opt_a.virtual_output : 0.000353s : 0.00% optimize.opt_a.merge_forward : 0.000265s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000007s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000675s : 0.00% optimize.opt_a.before_grad : 0.000664s : 0.00% optimize.opt_a.inplace_validation : 0.000302s : 0.00% optimize.opt_a.meta_fg_expand : 0.050265s : 0.36% optimize.opt_a.inplace_validation_after_expand : 0.001427s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000012s : 0.00% optimize.opt_a.receive_attached : 0.000020s : 0.00% optimize.opt_a.after_resolve : 0.001240s : 0.01% optimize.opt_a.a_after_grad : 0.001911s : 0.01% optimize.opt_a.special_op_eliminate : 0.000968s : 0.01% optimize.opt_a.renormalize : 0.106417s : 0.77% optimize.opt_a.add_forward_monad_depend : 0.000288s : 0.00% optimize.opt_a.auto_monad_grad : 0.000103s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001612s : 0.01% optimize.opt_a.cse : 0.007662s : 0.06% optimize.opt_a.a_3 : 0.017813s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000125s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000946s : 0.01% optimize.convert_after_rewriter : 0.000103s : 0.00% optimize.order_py_execute_after_rewriter : 0.000074s : 0.00% optimize.opt_b.b_1 : 0.002791s : 0.02% optimize.opt_b.b_2 : 0.000126s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000089s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000078s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000084s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000351s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000117s : 0.00% optimize.overlap_param_gather : 0.000025s : 0.00% optimize.cconv : 0.000063s : 0.00% optimize.loop_unroll : 0.000842s : 0.01% optimize.opt_after_cconv.c_1 : 0.000702s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000111s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000083s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000086s : 0.00% optimize.opt_after_cconv.cse : 0.000350s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000505s : 0.00% optimize.tuple_transform.d_1 : 0.000879s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000132s : 0.00% optimize.add_recomputation : 0.000619s : 0.00% optimize.cse_after_recomputation.cse : 0.000248s : 0.00% optimize.environ_conv : 0.000073s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000114s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000011s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000027s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000155s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000041s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000133s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000206s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000134s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000225s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000347s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000974s : 0.01% distribtued_split : 0.000379s : 0.00% validate : 0.000298s : 0.00% task_emit : 12.793221s : 92.32% execute : 0.000014s : 0.00% Time group info: ------[substitution.] 0.017040 3191 0.13% : 0.000022s : 9: substitution.addn_check_dump 0.37% : 0.000064s : 8: substitution.addn_zero_filter 0.10% : 0.000017s : 8: substitution.adjust_all_reduce_mul_add 1.47% : 0.000251s : 59: substitution.arithmetic_simplify 0.44% : 0.000075s : 11: substitution.cast_eliminate 0.23% : 0.000040s : 35: substitution.depend_value_elim 0.17% : 0.000028s : 97: substitution.elim_not_effective 0.01% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.06% : 0.000011s : 6: substitution.environ_get_add_eliminate 0.03% : 0.000005s : 3: substitution.environ_get_depend_swap 0.09% : 0.000015s : 12: substitution.environ_get_eliminate 0.13% : 0.000021s : 6: substitution.environ_get_set_eliminate 0.23% : 0.000039s : 53: substitution.float_depend_g_call 0.03% : 0.000006s : 6: substitution.float_environ_get_switch 0.04% : 0.000006s : 4: substitution.float_tuple_getitem_switch 0.21% : 0.000036s : 97: substitution.fold_const_symbol 7.99% : 0.001361s : 8: substitution.getattr_setattr_resolve 0.45% : 0.000077s : 116: substitution.graph_param_transform 0.04% : 0.000008s : 10: substitution.incorporate_call 0.03% : 0.000005s : 10: substitution.incorporate_call_switch 67.02% : 0.011420s : 326: substitution.inline 1.29% : 0.000220s : 40: substitution.inline_without_move 0.67% : 0.000115s : 286: substitution.j_node_and_user_rematch 1.78% : 0.000304s : 40: substitution.less_batch_normalization 0.21% : 0.000036s : 66: substitution.load_eliminater 0.28% : 0.000049s : 10: substitution.merge_addn 0.39% : 0.000066s : 57: substitution.minmaximum_grad 0.01% : 0.000002s : 4: substitution.opt_reshape 0.18% : 0.000031s : 4: substitution.partial_defer_inline 0.69% : 0.000118s : 53: substitution.partial_eliminate 0.07% : 0.000012s : 15: substitution.reduce_all_const_elim 0.16% : 0.000027s : 11: substitution.reduce_eliminate 0.92% : 0.000157s : 286: substitution.remove_not_recompute_node 3.52% : 0.000599s : 326: substitution.replace_applicator 0.43% : 0.000073s : 162: substitution.replace_old_param 0.19% : 0.000032s : 8: substitution.reshape_eliminate 0.04% : 0.000007s : 5: substitution.set_cell_output_no_recompute 0.04% : 0.000007s : 2: substitution.specialize_transform 0.09% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.31% : 0.000053s : 24: substitution.switch_simplify 0.20% : 0.000034s : 14: substitution.tile_eliminate 0.89% : 0.000152s : 57: substitution.tuple_list_convert_item_index_to_positive 0.48% : 0.000082s : 63: substitution.tuple_list_get_item_const_eliminator 0.80% : 0.000136s : 63: substitution.tuple_list_get_item_depend_reorder 3.87% : 0.000660s : 242: substitution.tuple_list_get_item_eliminator 0.66% : 0.000112s : 63: substitution.tuple_list_get_set_item_eliminator 0.96% : 0.000164s : 178: substitution.updatestate_pure_node_eliminater 1.55% : 0.000263s : 215: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.775543 2 96.06% : 0.745005s : 1: type_inference.infer 3.94% : 0.030538s : 1: type_inference.specialize ------[replace.] 0.005958 566 0.11% : 0.000006s : 1: replace.arithmetic_simplify 0.58% : 0.000035s : 6: replace.cast_eliminate 0.88% : 0.000052s : 7: replace.depend_value_elim 0.64% : 0.000038s : 3: replace.environ_get_set_eliminate 1.87% : 0.000111s : 6: replace.getattr_setattr_resolve 50.28% : 0.002996s : 313: replace.inline 0.36% : 0.000022s : 1: replace.merge_addn 3.42% : 0.000204s : 13: replace.partial_eliminate 2.96% : 0.000176s : 10: replace.replace_applicator 4.09% : 0.000244s : 24: replace.switch_simplify 0.86% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 33.69% : 0.002007s : 175: replace.tuple_list_get_item_eliminator 0.27% : 0.000016s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.013148 566 0.09% : 0.000012s : 1: match.arithmetic_simplify 0.33% : 0.000043s : 6: match.cast_eliminate 0.02% : 0.000003s : 7: match.depend_value_elim 0.12% : 0.000016s : 3: match.environ_get_set_eliminate 9.45% : 0.001242s : 6: match.getattr_setattr_resolve 85.29% : 0.011214s : 313: match.inline 0.17% : 0.000022s : 1: match.merge_addn 0.51% : 0.000067s : 13: match.partial_eliminate 0.35% : 0.000047s : 10: match.replace_applicator 0.32% : 0.000042s : 24: match.switch_simplify 0.23% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 3.06% : 0.000403s : 175: match.tuple_list_get_item_eliminator 0.06% : 0.000007s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.015349100237 0.85% : 0.000131s : 982: predicate.accumulaten_eliminater 0.21% : 0.000031s : 116: predicate.ad_related_special_op_eliminate 0.56% : 0.000085s : 621: predicate.addn_check_dump 1.00% : 0.000154s : 982: predicate.addn_zero_filter 0.82% : 0.000126s : 982: predicate.adjust_all_reduce_mul_add 1.90% : 0.000292s : 1604: predicate.arithmetic_simplify 0.92% : 0.000140s : 989: predicate.cast_eliminate 2.85% : 0.000437s : 2476: predicate.check_bprop_eliminate 0.56% : 0.000086s : 621: predicate.compare_switch_simplify 0.06% : 0.000010s : 122: predicate.const_output_eliminate 0.11% : 0.000017s : 116: predicate.convert_tensor_all_eliminate 1.47% : 0.000226s : 1173: predicate.convert_tensor_eliminate 0.57% : 0.000088s : 624: predicate.depend_value_elim 0.91% : 0.000140s : 992: predicate.dict_get_item_const_eliminator 0.97% : 0.000148s : 992: predicate.dict_get_item_eliminator 0.92% : 0.000141s : 992: predicate.dict_set_item_eliminator 0.06% : 0.000009s : 116: predicate.elim_not_effective 0.12% : 0.000019s : 116: predicate.elim_shapecalc_of_broadcastargs 0.97% : 0.000149s : 1111: predicate.environ_add_const_eliminate 0.96% : 0.000147s : 1114: predicate.environ_get_add_eliminate 0.94% : 0.000144s : 1111: predicate.environ_get_depend_swap 1.62% : 0.000249s : 1735: predicate.environ_get_eliminate 0.94% : 0.000145s : 1114: predicate.environ_get_set_eliminate 1.33% : 0.000203s : 1500: predicate.exchange_switch_depend_value 1.70% : 0.000261s : 1500: predicate.float_depend_g_call 0.56% : 0.000086s : 621: predicate.float_environ_get_switch 0.66% : 0.000102s : 743: predicate.float_tuple_getitem_switch 0.05% : 0.000008s : 116: predicate.fold_const_symbol 0.34% : 0.000052s : 366: predicate.get_grad_eliminate 0.08% : 0.000013s : 40: predicate.getattr_setattr_resolve 0.07% : 0.000010s : 116: predicate.graph_param_transform 0.56% : 0.000086s : 621: predicate.incorporate_call 0.60% : 0.000092s : 621: predicate.incorporate_call_switch 4.41% : 0.000677s : 3713: predicate.inline 1.45% : 0.000223s : 1014: predicate.inline_without_move 0.18% : 0.000027s : 366: predicate.j_node_and_user_rematch 0.42% : 0.000064s : 374: predicate.less_batch_normalization 1.26% : 0.000194s : 1411: predicate.list_to_tuple_eliminator_ 2.14% : 0.000328s : 2412: predicate.load_eliminater 0.23% : 0.000035s : 122: predicate.loop_unroll_after_grad 2.60% : 0.000399s : 2198: predicate.loop_unroll_before_grad 1.14% : 0.000174s : 1242: predicate.make_slice_get_slice_eliminator 0.57% : 0.000087s : 623: predicate.merge_addn 2.88% : 0.000443s : 2432: predicate.micro_step_allgather_replace 2.78% : 0.000427s : 2432: predicate.mini_step_allgather_replace 0.84% : 0.000129s : 983: predicate.minmaximum_grad 0.12% : 0.000019s : 116: predicate.mutable_eliminate 0.11% : 0.000017s : 116: predicate.opt_reshape 0.12% : 0.000019s : 122: predicate.parallel_virtual_node 2.47% : 0.000380s : 1500: predicate.partial_defer_inline 1.25% : 0.000192s : 1308: predicate.partial_eliminate 0.86% : 0.000132s : 982: predicate.print_const_string_wrapper 0.56% : 0.000086s : 610: predicate.reduce_all_const_elim 1.11% : 0.000170s : 983: predicate.reduce_eliminate 0.17% : 0.000026s : 366: predicate.remove_not_recompute_node 2.02% : 0.000309s : 3625: predicate.replace_applicator 0.49% : 0.000075s : 1014: predicate.replace_old_param 0.06% : 0.000010s : 122: predicate.reset_defer_inline 0.89% : 0.000136s : 983: predicate.reshape_eliminate 2.84% : 0.000435s : 2432: predicate.row_tensor_add_zeros_like 0.13% : 0.000020s : 122: predicate.row_tensor_eliminate 2.97% : 0.000456s : 2476: predicate.same_eliminate 0.21% : 0.000032s : 417: predicate.set_cell_output_no_recompute 0.36% : 0.000056s : 366: predicate.shard_identity_eliminate 1.34% : 0.000205s : 1136: predicate.special_op_eliminate 0.64% : 0.000099s : 623: predicate.specialize_transform 2.95% : 0.000453s : 2432: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000153s : 1014: predicate.stack_unstack_eliminate 2.07% : 0.000318s : 2412: predicate.stopgrad_eliminater 0.11% : 0.000017s : 122: predicate.switch_call_monad_eliminater 1.54% : 0.000236s : 1500: predicate.switch_defer_inline 4.28% : 0.000656s : 3976: predicate.switch_layer_defer_inline 4.75% : 0.000729s : 4369: predicate.switch_simplify 0.89% : 0.000137s : 983: predicate.tile_eliminate 0.88% : 0.000135s : 983: predicate.transpose_eliminate 1.19% : 0.000183s : 1230: predicate.tuple_list_convert_item_index_to_positive 1.22% : 0.000187s : 1236: predicate.tuple_list_get_item_const_eliminator 1.08% : 0.000165s : 1236: predicate.tuple_list_get_item_depend_reorder 2.13% : 0.000327s : 2032: predicate.tuple_list_get_item_eliminator 1.11% : 0.000171s : 1236: predicate.tuple_list_get_set_item_eliminator 1.81% : 0.000277s : 1857: predicate.tuple_list_set_item_eliminator 1.25% : 0.000192s : 1411: predicate.tuple_to_list_eliminator_ 2.17% : 0.000332s : 2412: predicate.updatestate_pure_node_eliminater 2.79% : 0.000429s : 3034: predicate.updatestate_useless_node_eliminater 0.12% : 0.000019s : 122: predicate.value_based_eliminate 0.35% : 0.000053s : 366: predicate.virtual_dataset_eliminate 0.34% : 0.000052s : 366: predicate.virtual_output_eliminate 0.13% : 0.000020s : 122: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.041517 649 64.18% : 0.026645s : 290: func_graph_cloner_run.FuncGraphClonerGraph 4.70% : 0.001953s : 27: func_graph_cloner_run.FuncGraphClonerNode 31.12% : 0.012919s : 332: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.364122 280 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000138s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000630s : 1: add_recomputation 0.00% : 0.000014s : 1: assign_add_opt 0.02% : 0.002458s : 1: auto_monad 0.00% : 0.000362s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.002099s : 1: bootstrap 0.00% : 0.000069s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000110s : 1: convert_after_rewriter 0.00% : 0.000272s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000396s : 1: distribtued_split 0.01% : 0.000990s : 1: eliminate_special_op_node 0.00% : 0.000081s : 1: environ_conv 0.00% : 0.000024s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000009s : 1: get_jit_bprop_graph 0.00% : 0.000049s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000011s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000853s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000159s : 1: opt.transform.loop_unroll_optimizer 0.67% : 0.095611s : 162: opt.transform.opt_a 0.00% : 0.000700s : 1: opt.transform.opt_after_cconv 0.02% : 0.002886s : 27: opt.transform.opt_b 0.01% : 0.001586s : 4: opt.transform.opt_resolve 0.01% : 0.000875s : 1: opt.transform.opt_trans_graph 0.00% : 0.000390s : 3: opt.transform.special_op_eliminate 0.00% : 0.000691s : 4: opt.transform.symbol_engine_opt 1.91% : 0.274254s : 1: opt_a 0.01% : 0.001401s : 1: opt_after_cconv 0.02% : 0.003587s : 1: opt_b 2.01% : 0.288473s : 1: optimize 0.00% : 0.000124s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000080s : 1: order_py_execute_after_rewriter 0.00% : 0.000161s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000033s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000031s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000022s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000007s : 1: pipeline_split 0.01% : 0.000835s : 1: pre_auto_parallel 0.01% : 0.000889s : 1: py_interpret_to_execute 0.00% : 0.000133s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000517s : 1: remove_dup_value 0.51% : 0.073459s : 2: renormalize.infer 0.23% : 0.032932s : 2: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000958s : 1: rewriter_after_opt_a 0.01% : 0.002015s : 1: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000120s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000787s : 1: symbol_engine_optimizer 89.06% : 12.793264s : 1: task_emit 0.01% : 0.000909s : 1: tuple_transform 5.40% : 0.776154s : 1: type_inference 0.01% : 0.001347s : 1: validate TotalTime = 13.8815, [21] [bootstrap]: 0.00151813 [type_inference]: 0.785357 [auto_monad]: 0.00255549 [graph_reusing]: 3.91803e-05 [inline]: 2.10013e-06 [parallel-infer-symbol]: 1.70898e-05 [pre_auto_parallel]: 0.00083495 [insert-virtual-dataset]: 4.40003e-06 [parallel-infer-symbol-second]: 1.09011e-06 [dataset_repeat_opt]: 1.70013e-06 [pipeline_split]: 1.89012e-06 [optimize]: 0.301164, [52] [py_interpret_to_execute]: 0.00083045 [rewriter_before_opt_a]: 0.0019998 [opt_a]: 0.286878, [3] [Cycle 1]: 0.213708, [43] [expand_dump_flag]: 4.67901e-05 [switch_simplify]: 0.0013916 [loop_unroll]: 0.00089574 [a_1]: 0.0260773 [recompute_prepare]: 0.00018502 [updatestate_depend_eliminate]: 0.00037211 [updatestate_assign_eliminate]: 0.00013615 [updatestate_loads_eliminate]: 0.00022332 [parameter_eliminate]: 1.54101e-05 [a_2]: 0.00392556 [accelerated_algorithm]: 0.00042749 [shard]: 2.23005e-06 [meta_shard_fg_expand]: 5.82603e-05 [shard_inline]: 0.0001255 [auto_parallel]: 8.77203e-05 [parallel]: 1.30096e-05 [flash_sp]: 4.38797e-05 [merge_comm]: 8.58502e-05 [allreduce_fusion]: 7.38301e-05 [matmul_add_comm_reduction]: 0.00012937 [allreduce_slice_to_reducescatter]: 7.89762e-07 [virtual_shard_identity]: 0.00013165 [virtual_dataset]: 0.00012355 [get_grad_eliminate_]: 0.00012366 [virtual_output]: 0.00012388 [merge_forward]: 0.00010774 [cell_reuse_recompute_pass]: 2.52994e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022606 [before_grad]: 0.00021917 [inplace_validation]: 0.00014731 [meta_fg_expand]: 0.0495487 [inplace_validation_after_expand]: 0.00062001 [flash_sp_send_recv_attached]: 7.58981e-06 [receive_attached]: 3.85009e-06 [after_resolve]: 0.00094864 [a_after_grad]: 0.0015523 [special_op_eliminate]: 0.00075161 [renormalize]: 0.10321 [add_forward_monad_depend]: 0.00028825 [auto_monad_grad]: 0.00010277 [auto_monad_eliminator]: 0.00118476 [cse]: 0.00314885 [a_3]: 0.0162578 [Cycle 2]: 0.0566923, [43] [expand_dump_flag]: 3.30098e-05 [switch_simplify]: 0.00103908 [loop_unroll]: 0.00104009 [a_1]: 0.0262699 [recompute_prepare]: 0.00015351 [updatestate_depend_eliminate]: 0.00016103 [updatestate_assign_eliminate]: 9.04198e-05 [updatestate_loads_eliminate]: 0.00014887 [parameter_eliminate]: 3.62983e-06 [a_2]: 0.00386614 [accelerated_algorithm]: 0.00014804 [shard]: 2.21003e-06 [meta_shard_fg_expand]: 5.541e-05 [shard_inline]: 0.00012393 [auto_parallel]: 0.0001042 [parallel]: 1.26199e-05 [flash_sp]: 4.34974e-06 [merge_comm]: 9.57502e-05 [allreduce_fusion]: 8.39899e-05 [matmul_add_comm_reduction]: 0.000106 [allreduce_slice_to_reducescatter]: 5.10365e-07 [virtual_shard_identity]: 0.00012644 [virtual_dataset]: 0.00012 [get_grad_eliminate_]: 0.00011908 [virtual_output]: 0.00011854 [merge_forward]: 8.28099e-05 [cell_reuse_recompute_pass]: 2.12016e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022024 [before_grad]: 0.00023038 [inplace_validation]: 7.769e-05 [meta_fg_expand]: 0.00163874 [inplace_validation_after_expand]: 0.00070628 [flash_sp_send_recv_attached]: 2.42004e-06 [receive_attached]: 1.58977e-06 [after_resolve]: 0.00016055 [a_after_grad]: 0.00020939 [special_op_eliminate]: 0.00012128 [renormalize]: 0.0134151 [add_forward_monad_depend]: 4.33018e-06 [auto_monad_grad]: 1.76998e-06 [auto_monad_eliminator]: 0.00027812 [cse]: 0.00422308 [a_3]: 0.00087823 [Cycle 3]: 0.0106003, [43] [expand_dump_flag]: 2.71015e-06 [switch_simplify]: 0.00012301 [loop_unroll]: 0.00011845 [a_1]: 0.00385682 [recompute_prepare]: 0.0001276 [updatestate_depend_eliminate]: 0.00013499 [updatestate_assign_eliminate]: 8.73199e-05 [updatestate_loads_eliminate]: 8.47704e-05 [parameter_eliminate]: 2.62028e-06 [a_2]: 0.00191778 [accelerated_algorithm]: 0.00014556 [shard]: 1.2801e-06 [meta_shard_fg_expand]: 4.176e-05 [shard_inline]: 0.00012318 [auto_parallel]: 0.00010234 [parallel]: 8.17981e-06 [flash_sp]: 2.31992e-06 [merge_comm]: 9.44999e-05 [allreduce_fusion]: 8.58796e-05 [matmul_add_comm_reduction]: 0.00010737 [allreduce_slice_to_reducescatter]: 2.89641e-07 [virtual_shard_identity]: 0.00012575 [virtual_dataset]: 0.00012058 [get_grad_eliminate_]: 0.00011745 [virtual_output]: 0.00011822 [merge_forward]: 8.38102e-05 [cell_reuse_recompute_pass]: 2.31992e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022018 [before_grad]: 0.00021226 [inplace_validation]: 8.05901e-05 [meta_fg_expand]: 9.53898e-05 [inplace_validation_after_expand]: 0.00010154 [flash_sp_send_recv_attached]: 1.26008e-06 [receive_attached]: 8.79634e-07 [after_resolve]: 0.00013782 [a_after_grad]: 0.00020044 [special_op_eliminate]: 0.00012666 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 2.41026e-06 [auto_monad_grad]: 1.45007e-06 [auto_monad_eliminator]: 0.00015914 [cse]: 0.00035212 [a_3]: 0.00088019 [py_interpret_to_execute_after_opt_a]: 0.00011274 [slice_cell_reuse_recomputed_activation]: 3.16044e-06 [rewriter_after_opt_a]: 0.00095375 [convert_after_rewriter]: 0.0001056 [order_py_execute_after_rewriter]: 7.45999e-05 [opt_b]: 0.00362426, [1] [Cycle 1]: 0.00361619, [7] [b_1]: 0.00283131 [b_2]: 0.00012568 [updatestate_depend_eliminate]: 8.82498e-05 [updatestate_assign_eliminate]: 7.97198e-05 [updatestate_loads_eliminate]: 8.264e-05 [renormalize]: 4.49829e-07 [cse]: 0.00035042 [optimize_parallel_all_gather_comm]: 0.0001167 [overlap_param_gather]: 2.70302e-05 [cconv]: 6.42398e-05 [loop_unroll]: 0.00084618 [opt_after_cconv]: 0.0014217, [1] [Cycle 1]: 0.00141404, [7] [c_1]: 0.00070851 [parameter_eliminate]: 2.39024e-06 [updatestate_depend_eliminate]: 0.00011388 [updatestate_assign_eliminate]: 8.33096e-05 [updatestate_loads_eliminate]: 8.39499e-05 [cse]: 0.00036377 [renormalize]: 6.20261e-07 [remove_dup_value]: 0.00052993 [tuple_transform]: 0.00087619, [1] [Cycle 1]: 0.00086923, [2] [d_1]: 0.0008514 [renormalize]: 4.4005e-07 [partial_unused_args_eliminate]: 3.2098e-06 [add_cache_embedding]: 0.00013407 [add_recomputation]: 0.00061781 [cse_after_recomputation]: 0.00026426, [1] [Cycle 1]: 0.00025587, [1] [cse]: 0.00024289 [environ_conv]: 7.55601e-05 [swap_dp_allreduce_reducescatter]: 0.00011584 [bias_add_comm_swap]: 2.58023e-06 [label_micro_interleaved_index]: 1.87987e-06 [label_fine_grained_interleaved_index]: 2.53972e-06 [merge_cast_opt]: 1.68011e-06 [slice_recompute_activation]: 1.98977e-06 [micro_interleaved_order_control]: 1.95019e-06 [assign_add_opt]: 1.116e-05 [ForceFp32Comm]: 1.0198e-06 [remove_cast_before_assign_add]: 1.03982e-06 [full_micro_interleaved_order_control]: 2.41958e-06 [reorder_send_recv_between_fp_bp]: 2.04006e-06 [comm_op_add_attrs]: 1.07009e-06 [add_comm_op_reuse_tag]: 1.21025e-06 [interleave_split_concat_branches]: 8.49832e-07 [interleave_parallel_branches]: 9.49949e-07 [overlap_opt_shard_in_pipeline]: 2.92896e-05 [overlap_opt_shard_grad_in_pipeline]: 2.63983e-06 [control_data_broadcast_order]: 1.26008e-06 [grouped_pairwise_exchange_alltoall]: 1.24983e-06 [offloading_packed_experts]: 1.07987e-06 [overlap_recompute_and_grad_model_parallel]: 1.77976e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.13994e-06 [overlap_recompute_allgather_and_fa_grad]: 1.20979e-06 [overlap_grad_ring_attention]: 2.00002e-06 [overlap_grad_flash_sp]: 0.00016302 [begin_end_overlap_inline]: 9.20147e-07 [split_matmul_comm_elemetwise]: 2.2999e-06 [split_layernorm_comm]: 2.14996e-06 [handle_group_info]: 1.05985e-06 [symbol_engine_optimizer]: 0.00073952, [1] [Cycle 1]: 0.00073272, [6] [build]: 4.16702e-05 [elim_shapecalc]: 0.0001345 [elim_not_effective]: 0.00020278 [opt_reshape]: 0.0001203 [fold_const_symbol]: 0.00019274 [renormalize]: 4.09782e-07 [pipeline_parallel_scheduler]: 2.42982e-06 [auto_monad_reorder]: 0.00033142 [get_jit_bprop_graph]: 5.60191e-07 [rewriter_after_jit_bprop_graph]: 4.49829e-07 [eliminate_special_op_node]: 0.00093198 [distribtued_split]: 0.0003597 [validate]: 0.00029644 [task_emit]: 12.7867 [execute]: 1.434e-05 Sums bootstrap : 0.001518s : 0.01% type_inference : 0.785357s : 5.66% auto_monad : 0.002555s : 0.02% graph_reusing : 0.000039s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000017s : 0.00% pre_auto_parallel : 0.000835s : 0.01% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000830s : 0.01% optimize.rewriter_before_opt_a : 0.002000s : 0.01% optimize.opt_a.expand_dump_flag : 0.000083s : 0.00% optimize.opt_a.switch_simplify : 0.002554s : 0.02% optimize.opt_a.loop_unroll : 0.002054s : 0.01% optimize.opt_a.a_1 : 0.056204s : 0.41% optimize.opt_a.recompute_prepare : 0.000466s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000668s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000314s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000457s : 0.00% optimize.opt_a.parameter_eliminate : 0.000022s : 0.00% optimize.opt_a.a_2 : 0.009709s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000721s : 0.01% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000155s : 0.00% optimize.opt_a.shard_inline : 0.000373s : 0.00% optimize.opt_a.auto_parallel : 0.000294s : 0.00% optimize.opt_a.parallel : 0.000034s : 0.00% optimize.opt_a.flash_sp : 0.000051s : 0.00% optimize.opt_a.merge_comm : 0.000276s : 0.00% optimize.opt_a.allreduce_fusion : 0.000244s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000343s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000384s : 0.00% optimize.opt_a.virtual_dataset : 0.000364s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000360s : 0.00% optimize.opt_a.virtual_output : 0.000361s : 0.00% optimize.opt_a.merge_forward : 0.000274s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000007s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000666s : 0.00% optimize.opt_a.before_grad : 0.000662s : 0.00% optimize.opt_a.inplace_validation : 0.000306s : 0.00% optimize.opt_a.meta_fg_expand : 0.051283s : 0.37% optimize.opt_a.inplace_validation_after_expand : 0.001428s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000011s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.001247s : 0.01% optimize.opt_a.a_after_grad : 0.001962s : 0.01% optimize.opt_a.special_op_eliminate : 0.001000s : 0.01% optimize.opt_a.renormalize : 0.116626s : 0.84% optimize.opt_a.add_forward_monad_depend : 0.000295s : 0.00% optimize.opt_a.auto_monad_grad : 0.000106s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001622s : 0.01% optimize.opt_a.cse : 0.007724s : 0.06% optimize.opt_a.a_3 : 0.018016s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000113s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000954s : 0.01% optimize.convert_after_rewriter : 0.000106s : 0.00% optimize.order_py_execute_after_rewriter : 0.000075s : 0.00% optimize.opt_b.b_1 : 0.002831s : 0.02% optimize.opt_b.b_2 : 0.000126s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000088s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000080s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000083s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000350s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000117s : 0.00% optimize.overlap_param_gather : 0.000027s : 0.00% optimize.cconv : 0.000064s : 0.00% optimize.loop_unroll : 0.000846s : 0.01% optimize.opt_after_cconv.c_1 : 0.000709s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000114s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000083s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000084s : 0.00% optimize.opt_after_cconv.cse : 0.000364s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000530s : 0.00% optimize.tuple_transform.d_1 : 0.000851s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000134s : 0.00% optimize.add_recomputation : 0.000618s : 0.00% optimize.cse_after_recomputation.cse : 0.000243s : 0.00% optimize.environ_conv : 0.000076s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000116s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000011s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000029s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000163s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000042s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000135s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000203s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000120s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000193s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000331s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000932s : 0.01% distribtued_split : 0.000360s : 0.00% validate : 0.000296s : 0.00% task_emit : 12.786698s : 92.17% execute : 0.000014s : 0.00% Time group info: ------[substitution.] 0.017172 3191 0.13% : 0.000022s : 9: substitution.addn_check_dump 0.37% : 0.000064s : 8: substitution.addn_zero_filter 0.10% : 0.000017s : 8: substitution.adjust_all_reduce_mul_add 1.48% : 0.000255s : 59: substitution.arithmetic_simplify 0.47% : 0.000081s : 11: substitution.cast_eliminate 0.24% : 0.000040s : 35: substitution.depend_value_elim 0.16% : 0.000028s : 97: substitution.elim_not_effective 0.02% : 0.000003s : 1: substitution.elim_shapecalc_of_broadcastargs 0.06% : 0.000011s : 6: substitution.environ_get_add_eliminate 0.03% : 0.000006s : 3: substitution.environ_get_depend_swap 0.09% : 0.000016s : 12: substitution.environ_get_eliminate 0.14% : 0.000024s : 6: substitution.environ_get_set_eliminate 0.23% : 0.000039s : 53: substitution.float_depend_g_call 0.04% : 0.000006s : 6: substitution.float_environ_get_switch 0.04% : 0.000006s : 4: substitution.float_tuple_getitem_switch 0.16% : 0.000027s : 97: substitution.fold_const_symbol 7.88% : 0.001352s : 8: substitution.getattr_setattr_resolve 0.44% : 0.000075s : 116: substitution.graph_param_transform 0.04% : 0.000008s : 10: substitution.incorporate_call 0.03% : 0.000005s : 10: substitution.incorporate_call_switch 67.28% : 0.011554s : 326: substitution.inline 1.30% : 0.000223s : 40: substitution.inline_without_move 0.77% : 0.000132s : 286: substitution.j_node_and_user_rematch 1.73% : 0.000297s : 40: substitution.less_batch_normalization 0.21% : 0.000037s : 66: substitution.load_eliminater 0.29% : 0.000050s : 10: substitution.merge_addn 0.41% : 0.000071s : 57: substitution.minmaximum_grad 0.01% : 0.000002s : 4: substitution.opt_reshape 0.20% : 0.000034s : 4: substitution.partial_defer_inline 0.72% : 0.000124s : 53: substitution.partial_eliminate 0.07% : 0.000012s : 15: substitution.reduce_all_const_elim 0.15% : 0.000026s : 11: substitution.reduce_eliminate 0.85% : 0.000147s : 286: substitution.remove_not_recompute_node 3.46% : 0.000594s : 326: substitution.replace_applicator 0.44% : 0.000075s : 162: substitution.replace_old_param 0.18% : 0.000031s : 8: substitution.reshape_eliminate 0.04% : 0.000007s : 5: substitution.set_cell_output_no_recompute 0.04% : 0.000006s : 2: substitution.specialize_transform 0.10% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.30% : 0.000052s : 24: substitution.switch_simplify 0.21% : 0.000036s : 14: substitution.tile_eliminate 0.91% : 0.000156s : 57: substitution.tuple_list_convert_item_index_to_positive 0.48% : 0.000082s : 63: substitution.tuple_list_get_item_const_eliminator 0.79% : 0.000135s : 63: substitution.tuple_list_get_item_depend_reorder 3.73% : 0.000640s : 242: substitution.tuple_list_get_item_eliminator 0.65% : 0.000112s : 63: substitution.tuple_list_get_set_item_eliminator 0.96% : 0.000165s : 178: substitution.updatestate_pure_node_eliminater 1.57% : 0.000269s : 215: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.784783 2 95.98% : 0.753248s : 1: type_inference.infer 4.02% : 0.031535s : 1: type_inference.specialize ------[replace.] 0.005959 566 0.10% : 0.000006s : 1: replace.arithmetic_simplify 0.59% : 0.000035s : 6: replace.cast_eliminate 0.85% : 0.000051s : 7: replace.depend_value_elim 0.64% : 0.000038s : 3: replace.environ_get_set_eliminate 1.86% : 0.000111s : 6: replace.getattr_setattr_resolve 50.43% : 0.003005s : 313: replace.inline 0.35% : 0.000021s : 1: replace.merge_addn 3.36% : 0.000200s : 13: replace.partial_eliminate 2.95% : 0.000176s : 10: replace.replace_applicator 4.03% : 0.000240s : 24: replace.switch_simplify 0.88% : 0.000053s : 6: replace.tuple_list_get_item_depend_reorder 33.69% : 0.002008s : 175: replace.tuple_list_get_item_eliminator 0.27% : 0.000016s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.013244 566 0.09% : 0.000012s : 1: match.arithmetic_simplify 0.36% : 0.000048s : 6: match.cast_eliminate 0.02% : 0.000003s : 7: match.depend_value_elim 0.13% : 0.000018s : 3: match.environ_get_set_eliminate 9.28% : 0.001229s : 6: match.getattr_setattr_resolve 85.51% : 0.011326s : 313: match.inline 0.18% : 0.000024s : 1: match.merge_addn 0.52% : 0.000069s : 13: match.partial_eliminate 0.35% : 0.000046s : 10: match.replace_applicator 0.31% : 0.000041s : 24: match.switch_simplify 0.24% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 2.94% : 0.000389s : 175: match.tuple_list_get_item_eliminator 0.06% : 0.000008s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.015416100237 1.14% : 0.000176s : 982: predicate.accumulaten_eliminater 0.20% : 0.000032s : 116: predicate.ad_related_special_op_eliminate 0.55% : 0.000085s : 621: predicate.addn_check_dump 0.91% : 0.000140s : 982: predicate.addn_zero_filter 0.86% : 0.000133s : 982: predicate.adjust_all_reduce_mul_add 1.90% : 0.000294s : 1604: predicate.arithmetic_simplify 0.87% : 0.000134s : 989: predicate.cast_eliminate 2.81% : 0.000433s : 2476: predicate.check_bprop_eliminate 0.56% : 0.000086s : 621: predicate.compare_switch_simplify 0.06% : 0.000010s : 122: predicate.const_output_eliminate 0.11% : 0.000017s : 116: predicate.convert_tensor_all_eliminate 1.33% : 0.000205s : 1173: predicate.convert_tensor_eliminate 0.57% : 0.000087s : 624: predicate.depend_value_elim 0.93% : 0.000144s : 992: predicate.dict_get_item_const_eliminator 1.03% : 0.000159s : 992: predicate.dict_get_item_eliminator 0.95% : 0.000147s : 992: predicate.dict_set_item_eliminator 0.06% : 0.000008s : 116: predicate.elim_not_effective 0.13% : 0.000020s : 116: predicate.elim_shapecalc_of_broadcastargs 0.98% : 0.000151s : 1111: predicate.environ_add_const_eliminate 0.95% : 0.000147s : 1114: predicate.environ_get_add_eliminate 1.01% : 0.000155s : 1111: predicate.environ_get_depend_swap 1.56% : 0.000240s : 1735: predicate.environ_get_eliminate 1.00% : 0.000154s : 1114: predicate.environ_get_set_eliminate 1.37% : 0.000210s : 1500: predicate.exchange_switch_depend_value 1.64% : 0.000253s : 1500: predicate.float_depend_g_call 0.55% : 0.000085s : 621: predicate.float_environ_get_switch 0.66% : 0.000102s : 743: predicate.float_tuple_getitem_switch 0.05% : 0.000008s : 116: predicate.fold_const_symbol 0.35% : 0.000053s : 366: predicate.get_grad_eliminate 0.08% : 0.000013s : 40: predicate.getattr_setattr_resolve 0.07% : 0.000010s : 116: predicate.graph_param_transform 0.56% : 0.000086s : 621: predicate.incorporate_call 0.54% : 0.000084s : 621: predicate.incorporate_call_switch 4.28% : 0.000659s : 3713: predicate.inline 1.47% : 0.000227s : 1014: predicate.inline_without_move 0.17% : 0.000027s : 366: predicate.j_node_and_user_rematch 0.42% : 0.000065s : 374: predicate.less_batch_normalization 1.23% : 0.000190s : 1411: predicate.list_to_tuple_eliminator_ 2.18% : 0.000336s : 2412: predicate.load_eliminater 0.23% : 0.000035s : 122: predicate.loop_unroll_after_grad 2.62% : 0.000403s : 2198: predicate.loop_unroll_before_grad 1.10% : 0.000170s : 1242: predicate.make_slice_get_slice_eliminator 0.56% : 0.000086s : 623: predicate.merge_addn 2.82% : 0.000434s : 2432: predicate.micro_step_allgather_replace 2.74% : 0.000423s : 2432: predicate.mini_step_allgather_replace 0.85% : 0.000131s : 983: predicate.minmaximum_grad 0.12% : 0.000018s : 116: predicate.mutable_eliminate 0.11% : 0.000017s : 116: predicate.opt_reshape 0.12% : 0.000019s : 122: predicate.parallel_virtual_node 2.47% : 0.000381s : 1500: predicate.partial_defer_inline 1.26% : 0.000195s : 1308: predicate.partial_eliminate 0.85% : 0.000131s : 982: predicate.print_const_string_wrapper 0.54% : 0.000083s : 610: predicate.reduce_all_const_elim 1.05% : 0.000162s : 983: predicate.reduce_eliminate 0.17% : 0.000026s : 366: predicate.remove_not_recompute_node 2.00% : 0.000309s : 3625: predicate.replace_applicator 0.49% : 0.000076s : 1014: predicate.replace_old_param 0.06% : 0.000010s : 122: predicate.reset_defer_inline 0.87% : 0.000134s : 983: predicate.reshape_eliminate 2.80% : 0.000431s : 2432: predicate.row_tensor_add_zeros_like 0.13% : 0.000020s : 122: predicate.row_tensor_eliminate 2.95% : 0.000454s : 2476: predicate.same_eliminate 0.21% : 0.000033s : 417: predicate.set_cell_output_no_recompute 0.36% : 0.000056s : 366: predicate.shard_identity_eliminate 1.38% : 0.000213s : 1136: predicate.special_op_eliminate 0.64% : 0.000099s : 623: predicate.specialize_transform 3.00% : 0.000462s : 2432: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000154s : 1014: predicate.stack_unstack_eliminate 2.10% : 0.000324s : 2412: predicate.stopgrad_eliminater 0.11% : 0.000017s : 122: predicate.switch_call_monad_eliminater 1.47% : 0.000226s : 1500: predicate.switch_defer_inline 4.32% : 0.000665s : 3976: predicate.switch_layer_defer_inline 4.80% : 0.000739s : 4369: predicate.switch_simplify 1.20% : 0.000185s : 983: predicate.tile_eliminate 0.85% : 0.000131s : 983: predicate.transpose_eliminate 1.21% : 0.000186s : 1230: predicate.tuple_list_convert_item_index_to_positive 1.24% : 0.000192s : 1236: predicate.tuple_list_get_item_const_eliminator 1.05% : 0.000162s : 1236: predicate.tuple_list_get_item_depend_reorder 2.07% : 0.000319s : 2032: predicate.tuple_list_get_item_eliminator 1.13% : 0.000174s : 1236: predicate.tuple_list_get_set_item_eliminator 1.83% : 0.000282s : 1857: predicate.tuple_list_set_item_eliminator 1.23% : 0.000190s : 1411: predicate.tuple_to_list_eliminator_ 2.11% : 0.000326s : 2412: predicate.updatestate_pure_node_eliminater 2.75% : 0.000424s : 3034: predicate.updatestate_useless_node_eliminater 0.12% : 0.000019s : 122: predicate.value_based_eliminate 0.35% : 0.000054s : 366: predicate.virtual_dataset_eliminate 0.34% : 0.000053s : 366: predicate.virtual_output_eliminate 0.13% : 0.000020s : 122: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.042184 649 64.21% : 0.027086s : 290: func_graph_cloner_run.FuncGraphClonerGraph 4.67% : 0.001969s : 27: func_graph_cloner_run.FuncGraphClonerNode 31.12% : 0.013129s : 332: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.403193 280 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000141s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000630s : 1: add_recomputation 0.00% : 0.000015s : 1: assign_add_opt 0.02% : 0.002581s : 1: auto_monad 0.00% : 0.000346s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001577s : 1: bootstrap 0.00% : 0.000071s : 1: cconv 0.00% : 0.000005s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000114s : 1: convert_after_rewriter 0.00% : 0.000269s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000375s : 1: distribtued_split 0.01% : 0.000947s : 1: eliminate_special_op_node 0.00% : 0.000084s : 1: environ_conv 0.00% : 0.000025s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000049s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000011s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000007s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000858s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000160s : 1: opt.transform.loop_unroll_optimizer 0.67% : 0.096862s : 162: opt.transform.opt_a 0.00% : 0.000706s : 1: opt.transform.opt_after_cconv 0.02% : 0.002926s : 27: opt.transform.opt_b 0.01% : 0.001579s : 4: opt.transform.opt_resolve 0.01% : 0.000848s : 1: opt.transform.opt_trans_graph 0.00% : 0.000378s : 3: opt.transform.special_op_eliminate 0.00% : 0.000645s : 4: opt.transform.symbol_engine_opt 1.99% : 0.286884s : 1: opt_a 0.01% : 0.001428s : 1: opt_after_cconv 0.03% : 0.003629s : 1: opt_b 2.09% : 0.301178s : 1: optimize 0.00% : 0.000124s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000081s : 1: order_py_execute_after_rewriter 0.00% : 0.000169s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000034s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000033s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000023s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.000856s : 1: pre_auto_parallel 0.01% : 0.000884s : 1: py_interpret_to_execute 0.00% : 0.000121s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000543s : 1: remove_dup_value 0.57% : 0.081457s : 2: renormalize.infer 0.24% : 0.035143s : 2: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000966s : 1: rewriter_after_opt_a 0.01% : 0.002016s : 1: rewriter_before_opt_a 0.00% : 0.000008s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000123s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000744s : 1: symbol_engine_optimizer 88.78% : 12.786745s : 1: task_emit 0.01% : 0.000881s : 1: tuple_transform 5.45% : 0.785393s : 1: type_inference 0.01% : 0.001353s : 1: validate TotalTime = 14.2815, [21] [bootstrap]: 0.00185445 [type_inference]: 0.781893 [auto_monad]: 0.00243025 [graph_reusing]: 3.78801e-05 [inline]: 2.52994e-06 [parallel-infer-symbol]: 2.25008e-06 [pre_auto_parallel]: 0.00077124 [insert-virtual-dataset]: 4.01028e-06 [parallel-infer-symbol-second]: 9.4017e-07 [dataset_repeat_opt]: 1.24006e-06 [pipeline_split]: 1.51992e-06 [optimize]: 0.303562, [52] [py_interpret_to_execute]: 0.00082882 [rewriter_before_opt_a]: 0.00199387 [opt_a]: 0.288823, [3] [Cycle 1]: 0.209226, [43] [expand_dump_flag]: 4.745e-05 [switch_simplify]: 0.00138004 [loop_unroll]: 0.00093393 [a_1]: 0.0272587 [recompute_prepare]: 0.00019032 [updatestate_depend_eliminate]: 0.00037112 [updatestate_assign_eliminate]: 0.00012606 [updatestate_loads_eliminate]: 0.00022055 [parameter_eliminate]: 1.32602e-05 [a_2]: 0.00394391 [accelerated_algorithm]: 0.00042597 [shard]: 2.57976e-06 [meta_shard_fg_expand]: 6.54403e-05 [shard_inline]: 0.00012498 [auto_parallel]: 8.66298e-05 [parallel]: 1.249e-05 [flash_sp]: 5.43497e-05 [merge_comm]: 8.468e-05 [allreduce_fusion]: 7.354e-05 [matmul_add_comm_reduction]: 0.00012316 [allreduce_slice_to_reducescatter]: 5.89993e-07 [virtual_shard_identity]: 0.00013079 [virtual_dataset]: 0.00012331 [get_grad_eliminate_]: 0.00012093 [virtual_output]: 0.00011919 [merge_forward]: 0.00010395 [cell_reuse_recompute_pass]: 2.48989e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022518 [before_grad]: 0.00021789 [inplace_validation]: 0.00014444 [meta_fg_expand]: 0.0481653 [inplace_validation_after_expand]: 0.00063481 [flash_sp_send_recv_attached]: 8.82987e-06 [receive_attached]: 1.81599e-05 [after_resolve]: 0.00094196 [a_after_grad]: 0.00149145 [special_op_eliminate]: 0.00074016 [renormalize]: 0.0983317 [add_forward_monad_depend]: 0.00030238 [auto_monad_grad]: 0.00011514 [auto_monad_eliminator]: 0.00124089 [cse]: 0.00313565 [a_3]: 0.0168464 [Cycle 2]: 0.0574635, [43] [expand_dump_flag]: 3.42401e-05 [switch_simplify]: 0.00113585 [loop_unroll]: 0.00106198 [a_1]: 0.0262655 [recompute_prepare]: 0.00015575 [updatestate_depend_eliminate]: 0.00016611 [updatestate_assign_eliminate]: 8.89902e-05 [updatestate_loads_eliminate]: 0.0001465 [parameter_eliminate]: 3.70992e-06 [a_2]: 0.00388453 [accelerated_algorithm]: 0.00014717 [shard]: 1.99024e-06 [meta_shard_fg_expand]: 5.64996e-05 [shard_inline]: 0.0001226 [auto_parallel]: 0.00010397 [parallel]: 1.17202e-05 [flash_sp]: 4.21982e-06 [merge_comm]: 9.20002e-05 [allreduce_fusion]: 8.11098e-05 [matmul_add_comm_reduction]: 0.00010327 [allreduce_slice_to_reducescatter]: 5.0012e-07 [virtual_shard_identity]: 0.00012466 [virtual_dataset]: 0.00011864 [get_grad_eliminate_]: 0.00011737 [virtual_output]: 0.00011756 [merge_forward]: 8.03703e-05 [cell_reuse_recompute_pass]: 2.23005e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022099 [before_grad]: 0.00021096 [inplace_validation]: 7.447e-05 [meta_fg_expand]: 0.00173945 [inplace_validation_after_expand]: 0.00068039 [flash_sp_send_recv_attached]: 2.25008e-06 [receive_attached]: 1.51014e-06 [after_resolve]: 0.00016035 [a_after_grad]: 0.000205 [special_op_eliminate]: 0.00012188 [renormalize]: 0.013781 [add_forward_monad_depend]: 4.80982e-06 [auto_monad_grad]: 2.29012e-06 [auto_monad_eliminator]: 0.00027742 [cse]: 0.00450761 [a_3]: 0.00086681 [Cycle 3]: 0.0162607, [43] [expand_dump_flag]: 2.77022e-06 [switch_simplify]: 0.00012173 [loop_unroll]: 0.00011645 [a_1]: 0.00387299 [recompute_prepare]: 0.00012577 [updatestate_depend_eliminate]: 0.0001331 [updatestate_assign_eliminate]: 8.61599e-05 [updatestate_loads_eliminate]: 8.607e-05 [parameter_eliminate]: 3.78024e-06 [a_2]: 0.00189038 [accelerated_algorithm]: 0.00014608 [shard]: 2.40002e-06 [meta_shard_fg_expand]: 4.45298e-05 [shard_inline]: 0.00012158 [auto_parallel]: 0.00564094 [parallel]: 1.85296e-05 [flash_sp]: 3.39979e-06 [merge_comm]: 0.00012103 [allreduce_fusion]: 8.845e-05 [matmul_add_comm_reduction]: 0.00011574 [allreduce_slice_to_reducescatter]: 7.30157e-07 [virtual_shard_identity]: 0.00013712 [virtual_dataset]: 0.00011971 [get_grad_eliminate_]: 0.00011704 [virtual_output]: 0.00011628 [merge_forward]: 8.82898e-05 [cell_reuse_recompute_pass]: 4.37023e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00023178 [before_grad]: 0.00021159 [inplace_validation]: 8.325e-05 [meta_fg_expand]: 0.00010436 [inplace_validation_after_expand]: 0.00010733 [flash_sp_send_recv_attached]: 2.01026e-06 [receive_attached]: 1.64006e-06 [after_resolve]: 0.00014191 [a_after_grad]: 0.00019898 [special_op_eliminate]: 0.00012145 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 3.78024e-06 [auto_monad_grad]: 3.58e-06 [auto_monad_eliminator]: 0.00018772 [cse]: 0.00039041 [a_3]: 0.00085018 [py_interpret_to_execute_after_opt_a]: 0.00012982 [slice_cell_reuse_recomputed_activation]: 2.44007e-06 [rewriter_after_opt_a]: 0.00096323 [convert_after_rewriter]: 0.00010852 [order_py_execute_after_rewriter]: 7.56499e-05 [opt_b]: 0.00360756, [1] [Cycle 1]: 0.00359615, [7] [b_1]: 0.00281259 [b_2]: 0.000124 [updatestate_depend_eliminate]: 9.083e-05 [updatestate_assign_eliminate]: 7.99401e-05 [updatestate_loads_eliminate]: 8.39601e-05 [renormalize]: 3.7998e-07 [cse]: 0.00035296 [optimize_parallel_all_gather_comm]: 0.00012221 [overlap_param_gather]: 2.21399e-05 [cconv]: 7.72499e-05 [loop_unroll]: 0.00122004 [opt_after_cconv]: 0.00146952, [1] [Cycle 1]: 0.00146249, [7] [c_1]: 0.00076771 [parameter_eliminate]: 3.32016e-06 [updatestate_depend_eliminate]: 0.00011639 [updatestate_assign_eliminate]: 8.27899e-05 [updatestate_loads_eliminate]: 8.39601e-05 [cse]: 0.00035563 [renormalize]: 5.0012e-07 [remove_dup_value]: 0.0005401 [tuple_transform]: 0.00087171, [1] [Cycle 1]: 0.00086507, [2] [d_1]: 0.0008491 [renormalize]: 4.50294e-07 [partial_unused_args_eliminate]: 3.03006e-06 [add_cache_embedding]: 0.00014035 [add_recomputation]: 0.0006502 [cse_after_recomputation]: 0.00027035, [1] [Cycle 1]: 0.00026275, [1] [cse]: 0.00025103 [environ_conv]: 8.41902e-05 [swap_dp_allreduce_reducescatter]: 0.00011186 [bias_add_comm_swap]: 2.43997e-05 [label_micro_interleaved_index]: 2.01957e-06 [label_fine_grained_interleaved_index]: 1.89012e-06 [merge_cast_opt]: 1.34017e-06 [slice_recompute_activation]: 2.02004e-06 [micro_interleaved_order_control]: 1.66008e-06 [assign_add_opt]: 1.23e-05 [ForceFp32Comm]: 8.89879e-07 [remove_cast_before_assign_add]: 9.69972e-07 [full_micro_interleaved_order_control]: 2.00002e-06 [reorder_send_recv_between_fp_bp]: 2.04006e-06 [comm_op_add_attrs]: 1.0198e-06 [add_comm_op_reuse_tag]: 1.36998e-06 [interleave_split_concat_branches]: 8.30274e-07 [interleave_parallel_branches]: 8.09785e-07 [overlap_opt_shard_in_pipeline]: 2.79797e-05 [overlap_opt_shard_grad_in_pipeline]: 2.2701e-06 [control_data_broadcast_order]: 1.07009e-06 [grouped_pairwise_exchange_alltoall]: 1.23028e-06 [offloading_packed_experts]: 1.11014e-06 [overlap_recompute_and_grad_model_parallel]: 1.83005e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.10036e-06 [overlap_recompute_allgather_and_fa_grad]: 1.32015e-06 [overlap_grad_ring_attention]: 1.57999e-06 [overlap_grad_flash_sp]: 0.00014831 [begin_end_overlap_inline]: 7.69738e-07 [split_matmul_comm_elemetwise]: 1.93994e-06 [split_layernorm_comm]: 1.97021e-06 [handle_group_info]: 9.29926e-07 [symbol_engine_optimizer]: 0.00073736, [1] [Cycle 1]: 0.00073122, [6] [build]: 4.63799e-05 [elim_shapecalc]: 0.00013462 [elim_not_effective]: 0.00019946 [opt_reshape]: 0.00011928 [fold_const_symbol]: 0.00019507 [renormalize]: 4.29805e-07 [pipeline_parallel_scheduler]: 1.90968e-06 [auto_monad_reorder]: 0.00033463 [get_jit_bprop_graph]: 7.5018e-07 [rewriter_after_jit_bprop_graph]: 6.49597e-07 [eliminate_special_op_node]: 0.00092863 [distribtued_split]: 0.00041387 [validate]: 0.00029989 [task_emit]: 13.1875 [execute]: 1.32401e-05 Sums bootstrap : 0.001854s : 0.01% type_inference : 0.781893s : 5.48% auto_monad : 0.002430s : 0.02% graph_reusing : 0.000038s : 0.00% inline : 0.000003s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000771s : 0.01% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000829s : 0.01% optimize.rewriter_before_opt_a : 0.001994s : 0.01% optimize.opt_a.expand_dump_flag : 0.000084s : 0.00% optimize.opt_a.switch_simplify : 0.002638s : 0.02% optimize.opt_a.loop_unroll : 0.002112s : 0.01% optimize.opt_a.a_1 : 0.057397s : 0.40% optimize.opt_a.recompute_prepare : 0.000472s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000670s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000301s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000453s : 0.00% optimize.opt_a.parameter_eliminate : 0.000021s : 0.00% optimize.opt_a.a_2 : 0.009719s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000719s : 0.01% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000166s : 0.00% optimize.opt_a.shard_inline : 0.000369s : 0.00% optimize.opt_a.auto_parallel : 0.005832s : 0.04% optimize.opt_a.parallel : 0.000043s : 0.00% optimize.opt_a.flash_sp : 0.000062s : 0.00% optimize.opt_a.merge_comm : 0.000298s : 0.00% optimize.opt_a.allreduce_fusion : 0.000243s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000342s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000393s : 0.00% optimize.opt_a.virtual_dataset : 0.000362s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000355s : 0.00% optimize.opt_a.virtual_output : 0.000353s : 0.00% optimize.opt_a.merge_forward : 0.000273s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000678s : 0.00% optimize.opt_a.before_grad : 0.000640s : 0.00% optimize.opt_a.inplace_validation : 0.000302s : 0.00% optimize.opt_a.meta_fg_expand : 0.050009s : 0.35% optimize.opt_a.inplace_validation_after_expand : 0.001423s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000013s : 0.00% optimize.opt_a.receive_attached : 0.000021s : 0.00% optimize.opt_a.after_resolve : 0.001244s : 0.01% optimize.opt_a.a_after_grad : 0.001895s : 0.01% optimize.opt_a.special_op_eliminate : 0.000983s : 0.01% optimize.opt_a.renormalize : 0.112113s : 0.79% optimize.opt_a.add_forward_monad_depend : 0.000311s : 0.00% optimize.opt_a.auto_monad_grad : 0.000121s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001706s : 0.01% optimize.opt_a.cse : 0.008034s : 0.06% optimize.opt_a.a_3 : 0.018563s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000130s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000963s : 0.01% optimize.convert_after_rewriter : 0.000109s : 0.00% optimize.order_py_execute_after_rewriter : 0.000076s : 0.00% optimize.opt_b.b_1 : 0.002813s : 0.02% optimize.opt_b.b_2 : 0.000124s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000091s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000080s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000084s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000353s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000122s : 0.00% optimize.overlap_param_gather : 0.000022s : 0.00% optimize.cconv : 0.000077s : 0.00% optimize.loop_unroll : 0.001220s : 0.01% optimize.opt_after_cconv.c_1 : 0.000768s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000116s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000083s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000084s : 0.00% optimize.opt_after_cconv.cse : 0.000356s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000540s : 0.00% optimize.tuple_transform.d_1 : 0.000849s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000140s : 0.00% optimize.add_recomputation : 0.000650s : 0.00% optimize.cse_after_recomputation.cse : 0.000251s : 0.00% optimize.environ_conv : 0.000084s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000112s : 0.00% optimize.bias_add_comm_swap : 0.000024s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000012s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000028s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000148s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000046s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000135s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000199s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000119s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000195s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000335s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000929s : 0.01% distribtued_split : 0.000414s : 0.00% validate : 0.000300s : 0.00% task_emit : 13.187482s : 92.40% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.017278 3191 0.13% : 0.000022s : 9: substitution.addn_check_dump 0.37% : 0.000063s : 8: substitution.addn_zero_filter 0.10% : 0.000017s : 8: substitution.adjust_all_reduce_mul_add 1.46% : 0.000252s : 59: substitution.arithmetic_simplify 0.44% : 0.000076s : 11: substitution.cast_eliminate 0.24% : 0.000042s : 35: substitution.depend_value_elim 0.16% : 0.000028s : 97: substitution.elim_not_effective 0.01% : 0.000003s : 1: substitution.elim_shapecalc_of_broadcastargs 0.13% : 0.000022s : 6: substitution.environ_get_add_eliminate 0.03% : 0.000005s : 3: substitution.environ_get_depend_swap 0.09% : 0.000015s : 12: substitution.environ_get_eliminate 0.13% : 0.000023s : 6: substitution.environ_get_set_eliminate 0.23% : 0.000040s : 53: substitution.float_depend_g_call 0.04% : 0.000006s : 6: substitution.float_environ_get_switch 0.04% : 0.000006s : 4: substitution.float_tuple_getitem_switch 0.16% : 0.000028s : 97: substitution.fold_const_symbol 7.63% : 0.001318s : 8: substitution.getattr_setattr_resolve 0.44% : 0.000076s : 116: substitution.graph_param_transform 0.05% : 0.000008s : 10: substitution.incorporate_call 0.03% : 0.000005s : 10: substitution.incorporate_call_switch 68.05% : 0.011758s : 326: substitution.inline 1.25% : 0.000216s : 40: substitution.inline_without_move 0.67% : 0.000116s : 286: substitution.j_node_and_user_rematch 1.72% : 0.000297s : 40: substitution.less_batch_normalization 0.21% : 0.000036s : 66: substitution.load_eliminater 0.29% : 0.000050s : 10: substitution.merge_addn 0.38% : 0.000065s : 57: substitution.minmaximum_grad 0.01% : 0.000003s : 4: substitution.opt_reshape 0.21% : 0.000035s : 4: substitution.partial_defer_inline 0.62% : 0.000106s : 53: substitution.partial_eliminate 0.07% : 0.000012s : 15: substitution.reduce_all_const_elim 0.15% : 0.000026s : 11: substitution.reduce_eliminate 0.85% : 0.000147s : 286: substitution.remove_not_recompute_node 3.65% : 0.000630s : 326: substitution.replace_applicator 0.44% : 0.000077s : 162: substitution.replace_old_param 0.18% : 0.000031s : 8: substitution.reshape_eliminate 0.04% : 0.000007s : 5: substitution.set_cell_output_no_recompute 0.04% : 0.000006s : 2: substitution.specialize_transform 0.09% : 0.000015s : 12: substitution.split_environ_get_set_with_tuple_value 0.29% : 0.000050s : 24: substitution.switch_simplify 0.20% : 0.000035s : 14: substitution.tile_eliminate 0.87% : 0.000151s : 57: substitution.tuple_list_convert_item_index_to_positive 0.46% : 0.000079s : 63: substitution.tuple_list_get_item_const_eliminator 0.75% : 0.000129s : 63: substitution.tuple_list_get_item_depend_reorder 3.53% : 0.000610s : 242: substitution.tuple_list_get_item_eliminator 0.60% : 0.000105s : 63: substitution.tuple_list_get_set_item_eliminator 0.97% : 0.000167s : 178: substitution.updatestate_pure_node_eliminater 1.52% : 0.000263s : 215: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.781316 2 95.90% : 0.749265s : 1: type_inference.infer 4.10% : 0.032051s : 1: type_inference.specialize ------[replace.] 0.005990 566 0.10% : 0.000006s : 1: replace.arithmetic_simplify 0.61% : 0.000036s : 6: replace.cast_eliminate 0.89% : 0.000053s : 7: replace.depend_value_elim 0.62% : 0.000037s : 3: replace.environ_get_set_eliminate 1.87% : 0.000112s : 6: replace.getattr_setattr_resolve 50.05% : 0.002998s : 313: replace.inline 0.35% : 0.000021s : 1: replace.merge_addn 3.49% : 0.000209s : 13: replace.partial_eliminate 2.97% : 0.000178s : 10: replace.replace_applicator 4.25% : 0.000254s : 24: replace.switch_simplify 0.89% : 0.000053s : 6: replace.tuple_list_get_item_depend_reorder 33.66% : 0.002016s : 175: replace.tuple_list_get_item_eliminator 0.25% : 0.000015s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.013409 566 0.08% : 0.000011s : 1: match.arithmetic_simplify 0.32% : 0.000043s : 6: match.cast_eliminate 0.02% : 0.000003s : 7: match.depend_value_elim 0.13% : 0.000017s : 3: match.environ_get_set_eliminate 8.97% : 0.001202s : 6: match.getattr_setattr_resolve 86.14% : 0.011550s : 313: match.inline 0.17% : 0.000023s : 1: match.merge_addn 0.50% : 0.000067s : 13: match.partial_eliminate 0.35% : 0.000047s : 10: match.replace_applicator 0.29% : 0.000039s : 24: match.switch_simplify 0.23% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 2.75% : 0.000369s : 175: match.tuple_list_get_item_eliminator 0.06% : 0.000008s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.016754100237 0.82% : 0.000137s : 982: predicate.accumulaten_eliminater 0.20% : 0.000033s : 116: predicate.ad_related_special_op_eliminate 0.51% : 0.000085s : 621: predicate.addn_check_dump 0.79% : 0.000132s : 982: predicate.addn_zero_filter 0.76% : 0.000127s : 982: predicate.adjust_all_reduce_mul_add 1.77% : 0.000297s : 1604: predicate.arithmetic_simplify 0.81% : 0.000136s : 989: predicate.cast_eliminate 2.75% : 0.000460s : 2476: predicate.check_bprop_eliminate 0.51% : 0.000086s : 621: predicate.compare_switch_simplify 0.06% : 0.000010s : 122: predicate.const_output_eliminate 0.10% : 0.000017s : 116: predicate.convert_tensor_all_eliminate 1.33% : 0.000223s : 1173: predicate.convert_tensor_eliminate 0.52% : 0.000088s : 624: predicate.depend_value_elim 0.85% : 0.000143s : 992: predicate.dict_get_item_const_eliminator 0.88% : 0.000147s : 992: predicate.dict_get_item_eliminator 0.84% : 0.000140s : 992: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 116: predicate.elim_not_effective 0.12% : 0.000019s : 116: predicate.elim_shapecalc_of_broadcastargs 0.88% : 0.000148s : 1111: predicate.environ_add_const_eliminate 0.89% : 0.000150s : 1114: predicate.environ_get_add_eliminate 0.88% : 0.000148s : 1111: predicate.environ_get_depend_swap 1.46% : 0.000244s : 1735: predicate.environ_get_eliminate 0.88% : 0.000148s : 1114: predicate.environ_get_set_eliminate 1.24% : 0.000208s : 1500: predicate.exchange_switch_depend_value 1.53% : 0.000257s : 1500: predicate.float_depend_g_call 0.51% : 0.000085s : 621: predicate.float_environ_get_switch 0.61% : 0.000102s : 743: predicate.float_tuple_getitem_switch 0.05% : 0.000008s : 116: predicate.fold_const_symbol 0.31% : 0.000052s : 366: predicate.get_grad_eliminate 0.08% : 0.000013s : 40: predicate.getattr_setattr_resolve 0.06% : 0.000010s : 116: predicate.graph_param_transform 0.51% : 0.000085s : 621: predicate.incorporate_call 0.50% : 0.000084s : 621: predicate.incorporate_call_switch 4.09% : 0.000685s : 3713: predicate.inline 1.31% : 0.000220s : 1014: predicate.inline_without_move 0.16% : 0.000027s : 366: predicate.j_node_and_user_rematch 0.39% : 0.000065s : 374: predicate.less_batch_normalization 1.17% : 0.000196s : 1411: predicate.list_to_tuple_eliminator_ 2.00% : 0.000336s : 2412: predicate.load_eliminater 0.23% : 0.000039s : 122: predicate.loop_unroll_after_grad 2.48% : 0.000415s : 2198: predicate.loop_unroll_before_grad 1.04% : 0.000174s : 1242: predicate.make_slice_get_slice_eliminator 0.52% : 0.000087s : 623: predicate.merge_addn 2.66% : 0.000446s : 2432: predicate.micro_step_allgather_replace 2.69% : 0.000451s : 2432: predicate.mini_step_allgather_replace 0.79% : 0.000132s : 983: predicate.minmaximum_grad 0.11% : 0.000019s : 116: predicate.mutable_eliminate 0.10% : 0.000017s : 116: predicate.opt_reshape 0.12% : 0.000019s : 122: predicate.parallel_virtual_node 2.35% : 0.000393s : 1500: predicate.partial_defer_inline 1.17% : 0.000195s : 1308: predicate.partial_eliminate 0.85% : 0.000143s : 982: predicate.print_const_string_wrapper 0.50% : 0.000085s : 610: predicate.reduce_all_const_elim 1.05% : 0.000177s : 983: predicate.reduce_eliminate 0.16% : 0.000026s : 366: predicate.remove_not_recompute_node 1.84% : 0.000309s : 3625: predicate.replace_applicator 0.45% : 0.000075s : 1014: predicate.replace_old_param 0.06% : 0.000010s : 122: predicate.reset_defer_inline 0.82% : 0.000137s : 983: predicate.reshape_eliminate 3.06% : 0.000512s : 2432: predicate.row_tensor_add_zeros_like 0.12% : 0.000019s : 122: predicate.row_tensor_eliminate 2.87% : 0.000481s : 2476: predicate.same_eliminate 0.19% : 0.000032s : 417: predicate.set_cell_output_no_recompute 0.33% : 0.000055s : 366: predicate.shard_identity_eliminate 1.22% : 0.000205s : 1136: predicate.special_op_eliminate 0.59% : 0.000099s : 623: predicate.specialize_transform 2.93% : 0.000491s : 2432: predicate.split_environ_get_set_with_tuple_value 0.91% : 0.000153s : 1014: predicate.stack_unstack_eliminate 1.93% : 0.000323s : 2412: predicate.stopgrad_eliminater 0.10% : 0.000017s : 122: predicate.switch_call_monad_eliminater 1.38% : 0.000231s : 1500: predicate.switch_defer_inline 4.08% : 0.000684s : 3976: predicate.switch_layer_defer_inline 4.45% : 0.000746s : 4369: predicate.switch_simplify 0.82% : 0.000138s : 983: predicate.tile_eliminate 0.79% : 0.000132s : 983: predicate.transpose_eliminate 1.11% : 0.000187s : 1230: predicate.tuple_list_convert_item_index_to_positive 1.12% : 0.000188s : 1236: predicate.tuple_list_get_item_const_eliminator 0.98% : 0.000164s : 1236: predicate.tuple_list_get_item_depend_reorder 1.95% : 0.000327s : 2032: predicate.tuple_list_get_item_eliminator 1.03% : 0.000172s : 1236: predicate.tuple_list_get_set_item_eliminator 8.21% : 0.001375s : 1857: predicate.tuple_list_set_item_eliminator 1.16% : 0.000195s : 1411: predicate.tuple_to_list_eliminator_ 1.95% : 0.000327s : 2412: predicate.updatestate_pure_node_eliminater 2.70% : 0.000453s : 3034: predicate.updatestate_useless_node_eliminater 0.11% : 0.000018s : 122: predicate.value_based_eliminate 0.32% : 0.000053s : 366: predicate.virtual_dataset_eliminate 0.31% : 0.000052s : 366: predicate.virtual_output_eliminate 0.12% : 0.000020s : 122: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.045328 649 65.44% : 0.029662s : 290: func_graph_cloner_run.FuncGraphClonerGraph 4.45% : 0.002016s : 27: func_graph_cloner_run.FuncGraphClonerNode 30.11% : 0.013650s : 332: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.802850 280 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000147s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000661s : 1: add_recomputation 0.00% : 0.000015s : 1: assign_add_opt 0.02% : 0.002454s : 1: auto_monad 0.00% : 0.000350s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000029s : 1: bias_add_comm_swap 0.01% : 0.001891s : 1: bootstrap 0.00% : 0.000083s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000116s : 1: convert_after_rewriter 0.00% : 0.000275s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000429s : 1: distribtued_split 0.01% : 0.000943s : 1: eliminate_special_op_node 0.00% : 0.000092s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000047s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000010s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001230s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000166s : 1: opt.transform.loop_unroll_optimizer 0.67% : 0.098638s : 162: opt.transform.opt_a 0.01% : 0.000765s : 1: opt.transform.opt_after_cconv 0.02% : 0.002903s : 27: opt.transform.opt_b 0.01% : 0.001544s : 4: opt.transform.opt_resolve 0.01% : 0.000846s : 1: opt.transform.opt_trans_graph 0.00% : 0.000391s : 3: opt.transform.special_op_eliminate 0.00% : 0.000643s : 4: opt.transform.symbol_engine_opt 1.95% : 0.288829s : 1: opt_a 0.01% : 0.001475s : 1: opt_after_cconv 0.02% : 0.003612s : 1: opt_b 2.05% : 0.303578s : 1: optimize 0.00% : 0.000129s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000081s : 1: order_py_execute_after_rewriter 0.00% : 0.000154s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000033s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000028s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000010s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.000789s : 1: pre_auto_parallel 0.01% : 0.000880s : 1: py_interpret_to_execute 0.00% : 0.000137s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000552s : 1: remove_dup_value 0.52% : 0.077420s : 2: renormalize.infer 0.23% : 0.034667s : 2: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000974s : 1: rewriter_after_opt_a 0.01% : 0.002008s : 1: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000118s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000741s : 1: symbol_engine_optimizer 89.09% : 13.187531s : 1: task_emit 0.01% : 0.000876s : 1: tuple_transform 5.28% : 0.781932s : 1: type_inference 0.01% : 0.001434s : 1: validate TotalTime = 14.2755, [21] [bootstrap]: 0.00212062 [type_inference]: 0.811376 [auto_monad]: 0.00255724 [graph_reusing]: 4.36497e-05 [inline]: 2.00002e-06 [parallel-infer-symbol]: 1.632e-05 [pre_auto_parallel]: 0.00084387 [insert-virtual-dataset]: 4.11971e-06 [parallel-infer-symbol-second]: 1.22981e-06 [dataset_repeat_opt]: 1.45007e-06 [pipeline_split]: 1.91014e-06 [optimize]: 0.323847, [52] [py_interpret_to_execute]: 0.00088957 [rewriter_before_opt_a]: 0.00227077 [opt_a]: 0.307767, [3] [Cycle 1]: 0.223862, [43] [expand_dump_flag]: 4.93801e-05 [switch_simplify]: 0.00161772 [loop_unroll]: 0.00107351 [a_1]: 0.031187 [recompute_prepare]: 0.00022838 [updatestate_depend_eliminate]: 0.00040621 [updatestate_assign_eliminate]: 0.00012298 [updatestate_loads_eliminate]: 0.00023543 [parameter_eliminate]: 1.59396e-05 [a_2]: 0.00475054 [accelerated_algorithm]: 0.00046725 [shard]: 2.6999e-06 [meta_shard_fg_expand]: 8.51303e-05 [shard_inline]: 0.00015118 [auto_parallel]: 9.22401e-05 [parallel]: 1.255e-05 [flash_sp]: 4.44199e-05 [merge_comm]: 9.22703e-05 [allreduce_fusion]: 8.25496e-05 [matmul_add_comm_reduction]: 0.00013322 [allreduce_slice_to_reducescatter]: 5.99772e-07 [virtual_shard_identity]: 0.00015547 [virtual_dataset]: 0.00014607 [get_grad_eliminate_]: 0.00014727 [virtual_output]: 0.00014427 [merge_forward]: 0.00011179 [cell_reuse_recompute_pass]: 2.73995e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00028906 [before_grad]: 0.00027432 [inplace_validation]: 0.0001566 [meta_fg_expand]: 0.0497694 [inplace_validation_after_expand]: 0.00067833 [flash_sp_send_recv_attached]: 8.92999e-06 [receive_attached]: 1.54399e-05 [after_resolve]: 0.00114733 [a_after_grad]: 0.00188732 [special_op_eliminate]: 0.00089439 [renormalize]: 0.101336 [add_forward_monad_depend]: 0.00031625 [auto_monad_grad]: 0.00012382 [auto_monad_eliminator]: 0.00142008 [cse]: 0.00335431 [a_3]: 0.0200379 [Cycle 2]: 0.0653355, [43] [expand_dump_flag]: 3.81297e-05 [switch_simplify]: 0.00128677 [loop_unroll]: 0.00126963 [a_1]: 0.0311028 [recompute_prepare]: 0.00018548 [updatestate_depend_eliminate]: 0.00017822 [updatestate_assign_eliminate]: 9.71598e-05 [updatestate_loads_eliminate]: 0.00015876 [parameter_eliminate]: 3.99025e-06 [a_2]: 0.00464898 [accelerated_algorithm]: 0.00018006 [shard]: 2.08011e-06 [meta_shard_fg_expand]: 7.41598e-05 [shard_inline]: 0.00016149 [auto_parallel]: 0.00011665 [parallel]: 1.28099e-05 [flash_sp]: 4.54998e-06 [merge_comm]: 0.00010806 [allreduce_fusion]: 9.41399e-05 [matmul_add_comm_reduction]: 0.00011488 [allreduce_slice_to_reducescatter]: 5.49946e-07 [virtual_shard_identity]: 0.00015121 [virtual_dataset]: 0.00014403 [get_grad_eliminate_]: 0.00014233 [virtual_output]: 0.0001422 [merge_forward]: 8.78e-05 [cell_reuse_recompute_pass]: 2.78978e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00028899 [before_grad]: 0.00027488 [inplace_validation]: 8.25101e-05 [meta_fg_expand]: 0.00178717 [inplace_validation_after_expand]: 0.00075911 [flash_sp_send_recv_attached]: 2.98023e-06 [receive_attached]: 1.85007e-06 [after_resolve]: 0.00019373 [a_after_grad]: 0.00025383 [special_op_eliminate]: 0.00014519 [renormalize]: 0.0145786 [add_forward_monad_depend]: 4.63007e-06 [auto_monad_grad]: 2.7502e-06 [auto_monad_eliminator]: 0.00033485 [cse]: 0.00464535 [a_3]: 0.00104261 [Cycle 3]: 0.0126667, [43] [expand_dump_flag]: 2.10013e-06 [switch_simplify]: 0.00014607 [loop_unroll]: 0.00014105 [a_1]: 0.0047955 [recompute_prepare]: 0.00014724 [updatestate_depend_eliminate]: 0.00014133 [updatestate_assign_eliminate]: 9.15099e-05 [updatestate_loads_eliminate]: 8.83401e-05 [parameter_eliminate]: 3.01003e-06 [a_2]: 0.00228159 [accelerated_algorithm]: 0.00017634 [shard]: 1.89012e-06 [meta_shard_fg_expand]: 5.246e-05 [shard_inline]: 0.00014886 [auto_parallel]: 0.00010818 [parallel]: 1.02799e-05 [flash_sp]: 2.35997e-06 [merge_comm]: 0.00010345 [allreduce_fusion]: 9.44301e-05 [matmul_add_comm_reduction]: 0.00011506 [allreduce_slice_to_reducescatter]: 5.50412e-07 [virtual_shard_identity]: 0.00015006 [virtual_dataset]: 0.000142 [get_grad_eliminate_]: 0.0001401 [virtual_output]: 0.00013972 [merge_forward]: 8.97702e-05 [cell_reuse_recompute_pass]: 2.89036e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00028687 [before_grad]: 0.00026729 [inplace_validation]: 0.00013605 [meta_fg_expand]: 0.00011772 [inplace_validation_after_expand]: 0.00011236 [flash_sp_send_recv_attached]: 1.69035e-06 [receive_attached]: 1.63028e-06 [after_resolve]: 0.00016887 [a_after_grad]: 0.00025329 [special_op_eliminate]: 0.0001432 [renormalize]: 1.09896e-07 [add_forward_monad_depend]: 1.90036e-06 [auto_monad_grad]: 1.90968e-06 [auto_monad_eliminator]: 0.00016521 [cse]: 0.00037519 [a_3]: 0.00102146 [py_interpret_to_execute_after_opt_a]: 0.00012493 [slice_cell_reuse_recomputed_activation]: 2.92994e-06 [rewriter_after_opt_a]: 0.00098052 [convert_after_rewriter]: 0.00010792 [order_py_execute_after_rewriter]: 7.83503e-05 [opt_b]: 0.00421511, [1] [Cycle 1]: 0.00420704, [7] [b_1]: 0.0033472 [b_2]: 0.00015165 [updatestate_depend_eliminate]: 9.283e-05 [updatestate_assign_eliminate]: 8.42204e-05 [updatestate_loads_eliminate]: 8.65296e-05 [renormalize]: 6.50063e-07 [cse]: 0.00038621 [optimize_parallel_all_gather_comm]: 0.00012658 [overlap_param_gather]: 5.72996e-06 [cconv]: 6.56699e-05 [loop_unroll]: 0.00099913 [opt_after_cconv]: 0.0016149, [1] [Cycle 1]: 0.00160731, [7] [c_1]: 0.00088183 [parameter_eliminate]: 2.40002e-06 [updatestate_depend_eliminate]: 0.00011756 [updatestate_assign_eliminate]: 8.784e-05 [updatestate_loads_eliminate]: 8.94698e-05 [cse]: 0.0003702 [renormalize]: 5.0012e-07 [remove_dup_value]: 0.0005596 [tuple_transform]: 0.00113849, [1] [Cycle 1]: 0.0011308, [2] [d_1]: 0.00110968 [renormalize]: 5.40167e-07 [partial_unused_args_eliminate]: 3.31039e-06 [add_cache_embedding]: 0.00014668 [add_recomputation]: 0.00069404 [cse_after_recomputation]: 0.00028668, [1] [Cycle 1]: 0.00027807, [1] [cse]: 0.00026444 [environ_conv]: 8.79699e-05 [swap_dp_allreduce_reducescatter]: 0.0001173 [bias_add_comm_swap]: 3.09013e-06 [label_micro_interleaved_index]: 2.48989e-06 [label_fine_grained_interleaved_index]: 2.16998e-06 [merge_cast_opt]: 1.4999e-06 [slice_recompute_activation]: 2.31992e-06 [micro_interleaved_order_control]: 1.91992e-06 [assign_add_opt]: 1.18101e-05 [ForceFp32Comm]: 1.08965e-06 [remove_cast_before_assign_add]: 1.13994e-06 [full_micro_interleaved_order_control]: 2.66964e-06 [reorder_send_recv_between_fp_bp]: 2.16998e-06 [comm_op_add_attrs]: 1.13994e-06 [add_comm_op_reuse_tag]: 1.13994e-06 [interleave_split_concat_branches]: 8.79634e-07 [interleave_parallel_branches]: 9.19681e-07 [overlap_opt_shard_in_pipeline]: 1.31e-05 [overlap_opt_shard_grad_in_pipeline]: 2.25985e-06 [control_data_broadcast_order]: 1.22981e-06 [grouped_pairwise_exchange_alltoall]: 1.45007e-06 [offloading_packed_experts]: 1.22003e-06 [overlap_recompute_and_grad_model_parallel]: 2.33995e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.801e-07 [overlap_recompute_allgather_and_fa_grad]: 1.24006e-06 [overlap_grad_ring_attention]: 1.156e-05 [overlap_grad_flash_sp]: 0.00016186 [begin_end_overlap_inline]: 9.00123e-07 [split_matmul_comm_elemetwise]: 2.35019e-06 [split_layernorm_comm]: 2.50991e-06 [handle_group_info]: 1.02026e-06 [symbol_engine_optimizer]: 0.00088947, [1] [Cycle 1]: 0.00088256, [6] [build]: 4.47603e-05 [elim_shapecalc]: 0.00015605 [elim_not_effective]: 0.000253 [opt_reshape]: 0.00014417 [fold_const_symbol]: 0.00024274 [renormalize]: 4.09782e-07 [pipeline_parallel_scheduler]: 2.38977e-06 [auto_monad_reorder]: 0.00032659 [get_jit_bprop_graph]: 6.70087e-07 [rewriter_after_jit_bprop_graph]: 4.80097e-07 [eliminate_special_op_node]: 0.00111953 [distribtued_split]: 0.00039808 [validate]: 0.00030703 [task_emit]: 13.131 [execute]: 1.59303e-05 Sums bootstrap : 0.002121s : 0.01% type_inference : 0.811376s : 5.69% auto_monad : 0.002557s : 0.02% graph_reusing : 0.000044s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000016s : 0.00% pre_auto_parallel : 0.000844s : 0.01% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000890s : 0.01% optimize.rewriter_before_opt_a : 0.002271s : 0.02% optimize.opt_a.expand_dump_flag : 0.000090s : 0.00% optimize.opt_a.switch_simplify : 0.003051s : 0.02% optimize.opt_a.loop_unroll : 0.002484s : 0.02% optimize.opt_a.a_1 : 0.067085s : 0.47% optimize.opt_a.recompute_prepare : 0.000561s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000726s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000312s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000483s : 0.00% optimize.opt_a.parameter_eliminate : 0.000023s : 0.00% optimize.opt_a.a_2 : 0.011681s : 0.08% optimize.opt_a.accelerated_algorithm : 0.000824s : 0.01% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000212s : 0.00% optimize.opt_a.shard_inline : 0.000462s : 0.00% optimize.opt_a.auto_parallel : 0.000317s : 0.00% optimize.opt_a.parallel : 0.000036s : 0.00% optimize.opt_a.flash_sp : 0.000051s : 0.00% optimize.opt_a.merge_comm : 0.000304s : 0.00% optimize.opt_a.allreduce_fusion : 0.000271s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000363s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000457s : 0.00% optimize.opt_a.virtual_dataset : 0.000432s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000430s : 0.00% optimize.opt_a.virtual_output : 0.000426s : 0.00% optimize.opt_a.merge_forward : 0.000289s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000865s : 0.01% optimize.opt_a.before_grad : 0.000816s : 0.01% optimize.opt_a.inplace_validation : 0.000375s : 0.00% optimize.opt_a.meta_fg_expand : 0.051674s : 0.36% optimize.opt_a.inplace_validation_after_expand : 0.001550s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000014s : 0.00% optimize.opt_a.receive_attached : 0.000019s : 0.00% optimize.opt_a.after_resolve : 0.001510s : 0.01% optimize.opt_a.a_after_grad : 0.002394s : 0.02% optimize.opt_a.special_op_eliminate : 0.001183s : 0.01% optimize.opt_a.renormalize : 0.115914s : 0.81% optimize.opt_a.add_forward_monad_depend : 0.000323s : 0.00% optimize.opt_a.auto_monad_grad : 0.000128s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001920s : 0.01% optimize.opt_a.cse : 0.008375s : 0.06% optimize.opt_a.a_3 : 0.022102s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000125s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000981s : 0.01% optimize.convert_after_rewriter : 0.000108s : 0.00% optimize.order_py_execute_after_rewriter : 0.000078s : 0.00% optimize.opt_b.b_1 : 0.003347s : 0.02% optimize.opt_b.b_2 : 0.000152s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000093s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000084s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000087s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000386s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000127s : 0.00% optimize.overlap_param_gather : 0.000006s : 0.00% optimize.cconv : 0.000066s : 0.00% optimize.loop_unroll : 0.000999s : 0.01% optimize.opt_after_cconv.c_1 : 0.000882s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000118s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000088s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000089s : 0.00% optimize.opt_after_cconv.cse : 0.000370s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000560s : 0.00% optimize.tuple_transform.d_1 : 0.001110s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000147s : 0.00% optimize.add_recomputation : 0.000694s : 0.00% optimize.cse_after_recomputation.cse : 0.000264s : 0.00% optimize.environ_conv : 0.000088s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000117s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000012s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000013s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000012s : 0.00% optimize.overlap_grad_flash_sp : 0.000162s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000003s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000045s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000156s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000253s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000144s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000243s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000327s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001120s : 0.01% distribtued_split : 0.000398s : 0.00% validate : 0.000307s : 0.00% task_emit : 13.130951s : 92.04% execute : 0.000016s : 0.00% Time group info: ------[substitution.] 0.018965 3191 0.13% : 0.000025s : 9: substitution.addn_check_dump 0.36% : 0.000067s : 8: substitution.addn_zero_filter 0.11% : 0.000020s : 8: substitution.adjust_all_reduce_mul_add 1.58% : 0.000300s : 59: substitution.arithmetic_simplify 0.34% : 0.000064s : 11: substitution.cast_eliminate 0.27% : 0.000052s : 35: substitution.depend_value_elim 0.21% : 0.000040s : 97: substitution.elim_not_effective 0.02% : 0.000003s : 1: substitution.elim_shapecalc_of_broadcastargs 0.06% : 0.000012s : 6: substitution.environ_get_add_eliminate 0.03% : 0.000006s : 3: substitution.environ_get_depend_swap 0.10% : 0.000019s : 12: substitution.environ_get_eliminate 0.13% : 0.000025s : 6: substitution.environ_get_set_eliminate 0.25% : 0.000048s : 53: substitution.float_depend_g_call 0.04% : 0.000007s : 6: substitution.float_environ_get_switch 0.04% : 0.000007s : 4: substitution.float_tuple_getitem_switch 0.21% : 0.000039s : 97: substitution.fold_const_symbol 7.13% : 0.001352s : 8: substitution.getattr_setattr_resolve 0.52% : 0.000098s : 116: substitution.graph_param_transform 0.05% : 0.000009s : 10: substitution.incorporate_call 0.04% : 0.000007s : 10: substitution.incorporate_call_switch 66.29% : 0.012571s : 326: substitution.inline 1.33% : 0.000252s : 40: substitution.inline_without_move 0.84% : 0.000159s : 286: substitution.j_node_and_user_rematch 1.66% : 0.000315s : 40: substitution.less_batch_normalization 0.25% : 0.000047s : 66: substitution.load_eliminater 0.28% : 0.000053s : 10: substitution.merge_addn 0.40% : 0.000076s : 57: substitution.minmaximum_grad 0.02% : 0.000003s : 4: substitution.opt_reshape 0.08% : 0.000015s : 4: substitution.partial_defer_inline 0.68% : 0.000129s : 53: substitution.partial_eliminate 0.08% : 0.000015s : 15: substitution.reduce_all_const_elim 0.15% : 0.000028s : 11: substitution.reduce_eliminate 1.16% : 0.000219s : 286: substitution.remove_not_recompute_node 4.14% : 0.000786s : 326: substitution.replace_applicator 0.53% : 0.000100s : 162: substitution.replace_old_param 0.18% : 0.000034s : 8: substitution.reshape_eliminate 0.05% : 0.000010s : 5: substitution.set_cell_output_no_recompute 0.04% : 0.000007s : 2: substitution.specialize_transform 0.10% : 0.000020s : 12: substitution.split_environ_get_set_with_tuple_value 0.32% : 0.000061s : 24: substitution.switch_simplify 0.21% : 0.000039s : 14: substitution.tile_eliminate 0.91% : 0.000173s : 57: substitution.tuple_list_convert_item_index_to_positive 0.49% : 0.000093s : 63: substitution.tuple_list_get_item_const_eliminator 1.16% : 0.000221s : 63: substitution.tuple_list_get_item_depend_reorder 3.74% : 0.000709s : 242: substitution.tuple_list_get_item_eliminator 0.66% : 0.000126s : 63: substitution.tuple_list_get_set_item_eliminator 1.07% : 0.000202s : 178: substitution.updatestate_pure_node_eliminater 1.60% : 0.000303s : 215: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.810784 2 95.78% : 0.776536s : 1: type_inference.infer 4.22% : 0.034248s : 1: type_inference.specialize ------[replace.] 0.006364 566 0.09% : 0.000006s : 1: replace.arithmetic_simplify 0.55% : 0.000035s : 6: replace.cast_eliminate 0.86% : 0.000054s : 7: replace.depend_value_elim 0.63% : 0.000040s : 3: replace.environ_get_set_eliminate 1.84% : 0.000117s : 6: replace.getattr_setattr_resolve 49.63% : 0.003158s : 313: replace.inline 0.35% : 0.000022s : 1: replace.merge_addn 3.29% : 0.000209s : 13: replace.partial_eliminate 2.94% : 0.000187s : 10: replace.replace_applicator 4.17% : 0.000265s : 24: replace.switch_simplify 0.86% : 0.000055s : 6: replace.tuple_list_get_item_depend_reorder 34.54% : 0.002198s : 175: replace.tuple_list_get_item_eliminator 0.25% : 0.000016s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.014272 566 0.09% : 0.000012s : 1: match.arithmetic_simplify 0.20% : 0.000029s : 6: match.cast_eliminate 0.03% : 0.000004s : 7: match.depend_value_elim 0.12% : 0.000017s : 3: match.environ_get_set_eliminate 8.61% : 0.001229s : 6: match.getattr_setattr_resolve 86.34% : 0.012322s : 313: match.inline 0.17% : 0.000025s : 1: match.merge_addn 0.48% : 0.000069s : 13: match.partial_eliminate 0.38% : 0.000054s : 10: match.replace_applicator 0.33% : 0.000046s : 24: match.switch_simplify 0.25% : 0.000036s : 6: match.tuple_list_get_item_depend_reorder 2.95% : 0.000421s : 175: match.tuple_list_get_item_eliminator 0.05% : 0.000008s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.018351100237 0.83% : 0.000153s : 982: predicate.accumulaten_eliminater 0.19% : 0.000034s : 116: predicate.ad_related_special_op_eliminate 0.55% : 0.000101s : 621: predicate.addn_check_dump 0.86% : 0.000158s : 982: predicate.addn_zero_filter 0.82% : 0.000151s : 982: predicate.adjust_all_reduce_mul_add 1.86% : 0.000341s : 1604: predicate.arithmetic_simplify 0.84% : 0.000155s : 989: predicate.cast_eliminate 2.90% : 0.000533s : 2476: predicate.check_bprop_eliminate 0.61% : 0.000111s : 621: predicate.compare_switch_simplify 0.06% : 0.000012s : 122: predicate.const_output_eliminate 0.11% : 0.000020s : 116: predicate.convert_tensor_all_eliminate 1.35% : 0.000248s : 1173: predicate.convert_tensor_eliminate 0.57% : 0.000104s : 624: predicate.depend_value_elim 0.91% : 0.000167s : 992: predicate.dict_get_item_const_eliminator 0.96% : 0.000177s : 992: predicate.dict_get_item_eliminator 0.90% : 0.000165s : 992: predicate.dict_set_item_eliminator 0.06% : 0.000011s : 116: predicate.elim_not_effective 0.12% : 0.000022s : 116: predicate.elim_shapecalc_of_broadcastargs 0.96% : 0.000176s : 1111: predicate.environ_add_const_eliminate 0.98% : 0.000179s : 1114: predicate.environ_get_add_eliminate 0.92% : 0.000170s : 1111: predicate.environ_get_depend_swap 1.53% : 0.000281s : 1735: predicate.environ_get_eliminate 0.93% : 0.000171s : 1114: predicate.environ_get_set_eliminate 1.36% : 0.000250s : 1500: predicate.exchange_switch_depend_value 1.73% : 0.000318s : 1500: predicate.float_depend_g_call 0.55% : 0.000102s : 621: predicate.float_environ_get_switch 0.67% : 0.000122s : 743: predicate.float_tuple_getitem_switch 0.06% : 0.000011s : 116: predicate.fold_const_symbol 0.34% : 0.000062s : 366: predicate.get_grad_eliminate 0.08% : 0.000015s : 40: predicate.getattr_setattr_resolve 0.07% : 0.000012s : 116: predicate.graph_param_transform 0.57% : 0.000104s : 621: predicate.incorporate_call 0.56% : 0.000103s : 621: predicate.incorporate_call_switch 4.66% : 0.000856s : 3713: predicate.inline 1.50% : 0.000274s : 1014: predicate.inline_without_move 0.18% : 0.000033s : 366: predicate.j_node_and_user_rematch 0.40% : 0.000073s : 374: predicate.less_batch_normalization 1.29% : 0.000237s : 1411: predicate.list_to_tuple_eliminator_ 2.10% : 0.000386s : 2412: predicate.load_eliminater 0.21% : 0.000039s : 122: predicate.loop_unroll_after_grad 2.62% : 0.000481s : 2198: predicate.loop_unroll_before_grad 1.11% : 0.000204s : 1242: predicate.make_slice_get_slice_eliminator 0.57% : 0.000104s : 623: predicate.merge_addn 2.84% : 0.000522s : 2432: predicate.micro_step_allgather_replace 2.85% : 0.000524s : 2432: predicate.mini_step_allgather_replace 0.84% : 0.000155s : 983: predicate.minmaximum_grad 0.12% : 0.000022s : 116: predicate.mutable_eliminate 0.11% : 0.000020s : 116: predicate.opt_reshape 0.12% : 0.000022s : 122: predicate.parallel_virtual_node 2.38% : 0.000438s : 1500: predicate.partial_defer_inline 1.34% : 0.000247s : 1308: predicate.partial_eliminate 0.86% : 0.000158s : 982: predicate.print_const_string_wrapper 0.55% : 0.000100s : 610: predicate.reduce_all_const_elim 1.07% : 0.000196s : 983: predicate.reduce_eliminate 0.18% : 0.000033s : 366: predicate.remove_not_recompute_node 2.03% : 0.000373s : 3625: predicate.replace_applicator 0.51% : 0.000094s : 1014: predicate.replace_old_param 0.07% : 0.000012s : 122: predicate.reset_defer_inline 0.85% : 0.000157s : 983: predicate.reshape_eliminate 2.90% : 0.000532s : 2432: predicate.row_tensor_add_zeros_like 0.12% : 0.000023s : 122: predicate.row_tensor_eliminate 3.00% : 0.000550s : 2476: predicate.same_eliminate 0.22% : 0.000040s : 417: predicate.set_cell_output_no_recompute 0.35% : 0.000065s : 366: predicate.shard_identity_eliminate 1.34% : 0.000246s : 1136: predicate.special_op_eliminate 0.69% : 0.000127s : 623: predicate.specialize_transform 3.05% : 0.000561s : 2432: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000181s : 1014: predicate.stack_unstack_eliminate 2.12% : 0.000390s : 2412: predicate.stopgrad_eliminater 0.12% : 0.000022s : 122: predicate.switch_call_monad_eliminater 1.46% : 0.000268s : 1500: predicate.switch_defer_inline 4.31% : 0.000791s : 3976: predicate.switch_layer_defer_inline 4.77% : 0.000875s : 4369: predicate.switch_simplify 0.89% : 0.000164s : 983: predicate.tile_eliminate 0.84% : 0.000154s : 983: predicate.transpose_eliminate 1.18% : 0.000216s : 1230: predicate.tuple_list_convert_item_index_to_positive 1.24% : 0.000227s : 1236: predicate.tuple_list_get_item_const_eliminator 1.09% : 0.000201s : 1236: predicate.tuple_list_get_item_depend_reorder 2.11% : 0.000387s : 2032: predicate.tuple_list_get_item_eliminator 1.13% : 0.000208s : 1236: predicate.tuple_list_get_set_item_eliminator 1.81% : 0.000332s : 1857: predicate.tuple_list_set_item_eliminator 1.29% : 0.000237s : 1411: predicate.tuple_to_list_eliminator_ 2.11% : 0.000387s : 2412: predicate.updatestate_pure_node_eliminater 2.75% : 0.000505s : 3034: predicate.updatestate_useless_node_eliminater 0.12% : 0.000022s : 122: predicate.value_based_eliminate 0.34% : 0.000062s : 366: predicate.virtual_dataset_eliminate 0.33% : 0.000061s : 366: predicate.virtual_output_eliminate 0.12% : 0.000022s : 122: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.045104 649 63.62% : 0.028697s : 290: func_graph_cloner_run.FuncGraphClonerGraph 4.57% : 0.002060s : 27: func_graph_cloner_run.FuncGraphClonerNode 31.81% : 0.014347s : 332: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.839912 280 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000154s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000706s : 1: add_recomputation 0.00% : 0.000015s : 1: assign_add_opt 0.02% : 0.002582s : 1: auto_monad 0.00% : 0.000343s : 1: auto_monad_reorder 0.00% : 0.000006s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.002164s : 1: bootstrap 0.00% : 0.000072s : 1: cconv 0.00% : 0.000005s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000116s : 1: convert_after_rewriter 0.00% : 0.000292s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000413s : 1: distribtued_split 0.01% : 0.001136s : 1: eliminate_special_op_node 0.00% : 0.000096s : 1: environ_conv 0.00% : 0.000028s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000053s : 1: graph_reusing 0.00% : 0.000006s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.00% : 0.000008s : 1: inline 0.00% : 0.000011s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.001010s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000185s : 1: opt.transform.loop_unroll_optimizer 0.78% : 0.116455s : 162: opt.transform.opt_a 0.01% : 0.000879s : 1: opt.transform.opt_after_cconv 0.02% : 0.003463s : 27: opt.transform.opt_b 0.01% : 0.001598s : 4: opt.transform.opt_resolve 0.01% : 0.001106s : 1: opt.transform.opt_trans_graph 0.00% : 0.000457s : 3: opt.transform.special_op_eliminate 0.01% : 0.000790s : 4: opt.transform.symbol_engine_opt 2.07% : 0.307774s : 1: opt_a 0.01% : 0.001621s : 1: opt_after_cconv 0.03% : 0.004220s : 1: opt_b 2.18% : 0.323860s : 1: optimize 0.00% : 0.000135s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000084s : 1: order_py_execute_after_rewriter 0.00% : 0.000167s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000015s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000017s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000011s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000023s : 1: parallel-infer-symbol 0.00% : 0.000007s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.000865s : 1: pre_auto_parallel 0.01% : 0.000944s : 1: py_interpret_to_execute 0.00% : 0.000134s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000573s : 1: remove_dup_value 0.53% : 0.078129s : 2: renormalize.infer 0.25% : 0.037757s : 2: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000992s : 1: rewriter_after_opt_a 0.02% : 0.002286s : 1: rewriter_before_opt_a 0.00% : 0.000008s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000006s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000124s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000896s : 1: symbol_engine_optimizer 88.48% : 13.131035s : 1: task_emit 0.01% : 0.001144s : 1: tuple_transform 5.47% : 0.811413s : 1: type_inference 0.01% : 0.001384s : 1: validate TotalTime = 14.0613, [21] [bootstrap]: 0.00135132 [type_inference]: 0.783907 [auto_monad]: 0.00233042 [graph_reusing]: 3.957e-05 [inline]: 1.9297e-06 [parallel-infer-symbol]: 1.61999e-05 [pre_auto_parallel]: 0.00077837 [insert-virtual-dataset]: 3.95998e-06 [parallel-infer-symbol-second]: 1.07987e-06 [dataset_repeat_opt]: 1.68988e-06 [pipeline_split]: 1.7602e-06 [optimize]: 0.303435, [52] [py_interpret_to_execute]: 0.00083737 [rewriter_before_opt_a]: 0.00196067 [opt_a]: 0.288904, [3] [Cycle 1]: 0.211712, [43] [expand_dump_flag]: 4.70099e-05 [switch_simplify]: 0.00144067 [loop_unroll]: 0.0009034 [a_1]: 0.0266699 [recompute_prepare]: 0.00019077 [updatestate_depend_eliminate]: 0.00037944 [updatestate_assign_eliminate]: 0.00011164 [updatestate_loads_eliminate]: 0.00022303 [parameter_eliminate]: 1.49901e-05 [a_2]: 0.00394301 [accelerated_algorithm]: 0.00042166 [shard]: 2.36975e-06 [meta_shard_fg_expand]: 6.56098e-05 [shard_inline]: 0.00012587 [auto_parallel]: 8.57296e-05 [parallel]: 1.20997e-05 [flash_sp]: 4.07901e-05 [merge_comm]: 8.409e-05 [allreduce_fusion]: 7.22301e-05 [matmul_add_comm_reduction]: 0.0001239 [allreduce_slice_to_reducescatter]: 6.10016e-07 [virtual_shard_identity]: 0.00013287 [virtual_dataset]: 0.00012459 [get_grad_eliminate_]: 0.00012396 [virtual_output]: 0.0001227 [merge_forward]: 0.00010477 [cell_reuse_recompute_pass]: 2.65986e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022864 [before_grad]: 0.0002643 [inplace_validation]: 0.00015121 [meta_fg_expand]: 0.0501188 [inplace_validation_after_expand]: 0.00062519 [flash_sp_send_recv_attached]: 8.72975e-06 [receive_attached]: 3.13995e-06 [after_resolve]: 0.00102923 [a_after_grad]: 0.00153371 [special_op_eliminate]: 0.00074073 [renormalize]: 0.0988522 [add_forward_monad_depend]: 0.00032591 [auto_monad_grad]: 0.00012767 [auto_monad_eliminator]: 0.00128764 [cse]: 0.00334445 [a_3]: 0.0169492 [Cycle 2]: 0.0607308, [43] [expand_dump_flag]: 3.83398e-05 [switch_simplify]: 0.00113484 [loop_unroll]: 0.00108012 [a_1]: 0.0273592 [recompute_prepare]: 0.00016792 [updatestate_depend_eliminate]: 0.00018533 [updatestate_assign_eliminate]: 9.36501e-05 [updatestate_loads_eliminate]: 0.00016457 [parameter_eliminate]: 5.11995e-06 [a_2]: 0.00391598 [accelerated_algorithm]: 0.00015187 [shard]: 2.56021e-06 [meta_shard_fg_expand]: 8.11801e-05 [shard_inline]: 0.00012764 [auto_parallel]: 0.00010947 [parallel]: 1.34697e-05 [flash_sp]: 4.77955e-06 [merge_comm]: 9.62596e-05 [allreduce_fusion]: 8.39797e-05 [matmul_add_comm_reduction]: 0.00011316 [allreduce_slice_to_reducescatter]: 5.59725e-07 [virtual_shard_identity]: 0.00012838 [virtual_dataset]: 0.00012167 [get_grad_eliminate_]: 0.0001191 [virtual_output]: 0.00012079 [merge_forward]: 8.41198e-05 [cell_reuse_recompute_pass]: 3.43006e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022607 [before_grad]: 0.00021511 [inplace_validation]: 7.788e-05 [meta_fg_expand]: 0.00187301 [inplace_validation_after_expand]: 0.00076942 [flash_sp_send_recv_attached]: 3.28012e-06 [receive_attached]: 1.72015e-06 [after_resolve]: 0.00017419 [a_after_grad]: 0.00020992 [special_op_eliminate]: 0.00012218 [renormalize]: 0.015049 [add_forward_monad_depend]: 6.51972e-06 [auto_monad_grad]: 3.05986e-06 [auto_monad_eliminator]: 0.00029624 [cse]: 0.00485899 [a_3]: 0.00091426 [Cycle 3]: 0.0106317, [43] [expand_dump_flag]: 3.23029e-06 [switch_simplify]: 0.00012291 [loop_unroll]: 0.00011729 [a_1]: 0.00395757 [recompute_prepare]: 0.00012716 [updatestate_depend_eliminate]: 0.00014156 [updatestate_assign_eliminate]: 8.226e-05 [updatestate_loads_eliminate]: 8.04998e-05 [parameter_eliminate]: 3.70014e-06 [a_2]: 0.00187362 [accelerated_algorithm]: 0.00014409 [shard]: 2.17045e-06 [meta_shard_fg_expand]: 4.71999e-05 [shard_inline]: 0.00012181 [auto_parallel]: 0.00010079 [parallel]: 1.22599e-05 [flash_sp]: 2.91038e-06 [merge_comm]: 9.23197e-05 [allreduce_fusion]: 8.26702e-05 [matmul_add_comm_reduction]: 0.00010864 [allreduce_slice_to_reducescatter]: 9.69972e-07 [virtual_shard_identity]: 0.00012557 [virtual_dataset]: 0.00011978 [get_grad_eliminate_]: 0.000117 [virtual_output]: 0.00013686 [merge_forward]: 8.641e-05 [cell_reuse_recompute_pass]: 3.67034e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022466 [before_grad]: 0.0002109 [inplace_validation]: 7.85599e-05 [meta_fg_expand]: 9.917e-05 [inplace_validation_after_expand]: 9.719e-05 [flash_sp_send_recv_attached]: 1.68011e-06 [receive_attached]: 1.57999e-06 [after_resolve]: 0.00013982 [a_after_grad]: 0.00020156 [special_op_eliminate]: 0.0001184 [renormalize]: 1.20141e-07 [add_forward_monad_depend]: 2.99001e-06 [auto_monad_grad]: 1.72993e-06 [auto_monad_eliminator]: 0.000161 [cse]: 0.00036427 [a_3]: 0.00084938 [py_interpret_to_execute_after_opt_a]: 0.00012647 [slice_cell_reuse_recomputed_activation]: 2.8098e-06 [rewriter_after_opt_a]: 0.00095065 [convert_after_rewriter]: 0.0001473 [order_py_execute_after_rewriter]: 7.45803e-05 [opt_b]: 0.00355231, [1] [Cycle 1]: 0.00354343, [7] [b_1]: 0.00276908 [b_2]: 0.00012549 [updatestate_depend_eliminate]: 8.60197e-05 [updatestate_assign_eliminate]: 7.73198e-05 [updatestate_loads_eliminate]: 7.92299e-05 [renormalize]: 6.00237e-07 [cse]: 0.00035371 [optimize_parallel_all_gather_comm]: 0.00012313 [overlap_param_gather]: 2.44696e-05 [cconv]: 0.00010509 [loop_unroll]: 0.00095501 [opt_after_cconv]: 0.0014115, [1] [Cycle 1]: 0.00140348, [7] [c_1]: 0.00071656 [parameter_eliminate]: 2.90992e-06 [updatestate_depend_eliminate]: 0.00011404 [updatestate_assign_eliminate]: 7.97897e-05 [updatestate_loads_eliminate]: 8.21501e-05 [cse]: 0.00035421 [renormalize]: 5.09899e-07 [remove_dup_value]: 0.00054433 [tuple_transform]: 0.00088657, [1] [Cycle 1]: 0.00087936, [2] [d_1]: 0.0008622 [renormalize]: 3.7998e-07 [partial_unused_args_eliminate]: 3.18978e-06 [add_cache_embedding]: 0.00016123 [add_recomputation]: 0.00064885 [cse_after_recomputation]: 0.00027045, [1] [Cycle 1]: 0.00026277, [1] [cse]: 0.00025018 [environ_conv]: 8.57003e-05 [swap_dp_allreduce_reducescatter]: 0.00011203 [bias_add_comm_swap]: 2.78e-06 [label_micro_interleaved_index]: 2.64961e-06 [label_fine_grained_interleaved_index]: 2.54996e-06 [merge_cast_opt]: 1.27964e-06 [slice_recompute_activation]: 2.21003e-06 [micro_interleaved_order_control]: 1.93017e-06 [assign_add_opt]: 1.49403e-05 [ForceFp32Comm]: 9.69972e-07 [remove_cast_before_assign_add]: 1.50036e-06 [full_micro_interleaved_order_control]: 2.22027e-06 [reorder_send_recv_between_fp_bp]: 2.89967e-06 [comm_op_add_attrs]: 1.09989e-06 [add_comm_op_reuse_tag]: 1.37975e-06 [interleave_split_concat_branches]: 1.15996e-06 [interleave_parallel_branches]: 1.06962e-06 [overlap_opt_shard_in_pipeline]: 2.95197e-05 [overlap_opt_shard_grad_in_pipeline]: 2.45031e-06 [control_data_broadcast_order]: 1.37975e-06 [grouped_pairwise_exchange_alltoall]: 1.45007e-06 [offloading_packed_experts]: 1.53994e-06 [overlap_recompute_and_grad_model_parallel]: 2.08989e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89762e-07 [overlap_recompute_allgather_and_fa_grad]: 1.21025e-06 [overlap_grad_ring_attention]: 2.08989e-06 [overlap_grad_flash_sp]: 0.00013333 [begin_end_overlap_inline]: 8.40053e-07 [split_matmul_comm_elemetwise]: 2.38977e-06 [split_layernorm_comm]: 2.04006e-06 [handle_group_info]: 1.0198e-06 [symbol_engine_optimizer]: 0.00077904, [1] [Cycle 1]: 0.0007722, [6] [build]: 4.62998e-05 [elim_shapecalc]: 0.00013534 [elim_not_effective]: 0.00019997 [opt_reshape]: 0.00011912 [fold_const_symbol]: 0.00023191 [renormalize]: 5.80214e-07 [pipeline_parallel_scheduler]: 2.31992e-06 [auto_monad_reorder]: 0.00033204 [get_jit_bprop_graph]: 6.40284e-07 [rewriter_after_jit_bprop_graph]: 5.20144e-07 [eliminate_special_op_node]: 0.00096215 [distribtued_split]: 0.00039371 [validate]: 0.00029005 [task_emit]: 12.9661 [execute]: 1.28499e-05 Sums bootstrap : 0.001351s : 0.01% type_inference : 0.783907s : 5.58% auto_monad : 0.002330s : 0.02% graph_reusing : 0.000040s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000016s : 0.00% pre_auto_parallel : 0.000778s : 0.01% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000837s : 0.01% optimize.rewriter_before_opt_a : 0.001961s : 0.01% optimize.opt_a.expand_dump_flag : 0.000089s : 0.00% optimize.opt_a.switch_simplify : 0.002698s : 0.02% optimize.opt_a.loop_unroll : 0.002101s : 0.01% optimize.opt_a.a_1 : 0.057987s : 0.41% optimize.opt_a.recompute_prepare : 0.000486s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000706s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000288s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000468s : 0.00% optimize.opt_a.parameter_eliminate : 0.000024s : 0.00% optimize.opt_a.a_2 : 0.009733s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000718s : 0.01% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000194s : 0.00% optimize.opt_a.shard_inline : 0.000375s : 0.00% optimize.opt_a.auto_parallel : 0.000296s : 0.00% optimize.opt_a.parallel : 0.000038s : 0.00% optimize.opt_a.flash_sp : 0.000048s : 0.00% optimize.opt_a.merge_comm : 0.000273s : 0.00% optimize.opt_a.allreduce_fusion : 0.000239s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000346s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000387s : 0.00% optimize.opt_a.virtual_dataset : 0.000366s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000360s : 0.00% optimize.opt_a.virtual_output : 0.000380s : 0.00% optimize.opt_a.merge_forward : 0.000275s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000010s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000679s : 0.00% optimize.opt_a.before_grad : 0.000690s : 0.00% optimize.opt_a.inplace_validation : 0.000308s : 0.00% optimize.opt_a.meta_fg_expand : 0.052091s : 0.37% optimize.opt_a.inplace_validation_after_expand : 0.001492s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000014s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.001343s : 0.01% optimize.opt_a.a_after_grad : 0.001945s : 0.01% optimize.opt_a.special_op_eliminate : 0.000981s : 0.01% optimize.opt_a.renormalize : 0.113901s : 0.81% optimize.opt_a.add_forward_monad_depend : 0.000335s : 0.00% optimize.opt_a.auto_monad_grad : 0.000132s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.001745s : 0.01% optimize.opt_a.cse : 0.008568s : 0.06% optimize.opt_a.a_3 : 0.018713s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000126s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000951s : 0.01% optimize.convert_after_rewriter : 0.000147s : 0.00% optimize.order_py_execute_after_rewriter : 0.000075s : 0.00% optimize.opt_b.b_1 : 0.002769s : 0.02% optimize.opt_b.b_2 : 0.000125s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000086s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000077s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000079s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000354s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000123s : 0.00% optimize.overlap_param_gather : 0.000024s : 0.00% optimize.cconv : 0.000105s : 0.00% optimize.loop_unroll : 0.000955s : 0.01% optimize.opt_after_cconv.c_1 : 0.000717s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000114s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000080s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000082s : 0.00% optimize.opt_after_cconv.cse : 0.000354s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000544s : 0.00% optimize.tuple_transform.d_1 : 0.000862s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000161s : 0.00% optimize.add_recomputation : 0.000649s : 0.00% optimize.cse_after_recomputation.cse : 0.000250s : 0.00% optimize.environ_conv : 0.000086s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000112s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000003s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000015s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000002s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000030s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000133s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000046s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000135s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000200s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000119s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000232s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000332s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000962s : 0.01% distribtued_split : 0.000394s : 0.00% validate : 0.000290s : 0.00% task_emit : 12.966083s : 92.27% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.018078 3191 0.13% : 0.000023s : 9: substitution.addn_check_dump 0.39% : 0.000070s : 8: substitution.addn_zero_filter 0.11% : 0.000020s : 8: substitution.adjust_all_reduce_mul_add 1.46% : 0.000264s : 59: substitution.arithmetic_simplify 0.37% : 0.000067s : 11: substitution.cast_eliminate 0.23% : 0.000042s : 35: substitution.depend_value_elim 0.15% : 0.000028s : 97: substitution.elim_not_effective 0.01% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.06% : 0.000011s : 6: substitution.environ_get_add_eliminate 0.03% : 0.000005s : 3: substitution.environ_get_depend_swap 0.09% : 0.000016s : 12: substitution.environ_get_eliminate 0.14% : 0.000026s : 6: substitution.environ_get_set_eliminate 0.23% : 0.000041s : 53: substitution.float_depend_g_call 0.03% : 0.000006s : 6: substitution.float_environ_get_switch 0.03% : 0.000006s : 4: substitution.float_tuple_getitem_switch 0.21% : 0.000038s : 97: substitution.fold_const_symbol 8.17% : 0.001477s : 8: substitution.getattr_setattr_resolve 0.42% : 0.000075s : 116: substitution.graph_param_transform 0.04% : 0.000008s : 10: substitution.incorporate_call 0.03% : 0.000005s : 10: substitution.incorporate_call_switch 67.56% : 0.012214s : 326: substitution.inline 1.22% : 0.000221s : 40: substitution.inline_without_move 0.87% : 0.000157s : 286: substitution.j_node_and_user_rematch 1.59% : 0.000288s : 40: substitution.less_batch_normalization 0.19% : 0.000035s : 66: substitution.load_eliminater 0.31% : 0.000056s : 10: substitution.merge_addn 0.37% : 0.000067s : 57: substitution.minmaximum_grad 0.01% : 0.000002s : 4: substitution.opt_reshape 0.21% : 0.000038s : 4: substitution.partial_defer_inline 0.65% : 0.000118s : 53: substitution.partial_eliminate 0.07% : 0.000012s : 15: substitution.reduce_all_const_elim 0.15% : 0.000027s : 11: substitution.reduce_eliminate 0.81% : 0.000147s : 286: substitution.remove_not_recompute_node 3.60% : 0.000652s : 326: substitution.replace_applicator 0.43% : 0.000078s : 162: substitution.replace_old_param 0.19% : 0.000034s : 8: substitution.reshape_eliminate 0.04% : 0.000007s : 5: substitution.set_cell_output_no_recompute 0.03% : 0.000006s : 2: substitution.specialize_transform 0.10% : 0.000018s : 12: substitution.split_environ_get_set_with_tuple_value 0.29% : 0.000052s : 24: substitution.switch_simplify 0.21% : 0.000037s : 14: substitution.tile_eliminate 0.88% : 0.000159s : 57: substitution.tuple_list_convert_item_index_to_positive 0.45% : 0.000082s : 63: substitution.tuple_list_get_item_const_eliminator 0.76% : 0.000138s : 63: substitution.tuple_list_get_item_depend_reorder 3.48% : 0.000630s : 242: substitution.tuple_list_get_item_eliminator 0.61% : 0.000111s : 63: substitution.tuple_list_get_set_item_eliminator 0.96% : 0.000174s : 178: substitution.updatestate_pure_node_eliminater 1.57% : 0.000283s : 215: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.783333 2 95.85% : 0.750805s : 1: type_inference.infer 4.15% : 0.032527s : 1: type_inference.specialize ------[replace.] 0.006362 566 0.09% : 0.000006s : 1: replace.arithmetic_simplify 0.55% : 0.000035s : 6: replace.cast_eliminate 0.86% : 0.000055s : 7: replace.depend_value_elim 0.66% : 0.000042s : 3: replace.environ_get_set_eliminate 1.87% : 0.000119s : 6: replace.getattr_setattr_resolve 48.82% : 0.003106s : 313: replace.inline 0.38% : 0.000024s : 1: replace.merge_addn 3.33% : 0.000212s : 13: replace.partial_eliminate 3.12% : 0.000198s : 10: replace.replace_applicator 3.99% : 0.000254s : 24: replace.switch_simplify 0.81% : 0.000052s : 6: replace.tuple_list_get_item_depend_reorder 35.25% : 0.002243s : 175: replace.tuple_list_get_item_eliminator 0.27% : 0.000017s : 1: replace.updatestate_useless_node_eliminater ------[match.] 0.014043 566 0.07% : 0.000010s : 1: match.arithmetic_simplify 0.20% : 0.000028s : 6: match.cast_eliminate 0.02% : 0.000003s : 7: match.depend_value_elim 0.14% : 0.000020s : 3: match.environ_get_set_eliminate 9.62% : 0.001350s : 6: match.getattr_setattr_resolve 85.58% : 0.012017s : 313: match.inline 0.21% : 0.000029s : 1: match.merge_addn 0.45% : 0.000063s : 13: match.partial_eliminate 0.38% : 0.000054s : 10: match.replace_applicator 0.29% : 0.000040s : 24: match.switch_simplify 0.22% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 2.74% : 0.000385s : 175: match.tuple_list_get_item_eliminator 0.08% : 0.000012s : 1: match.updatestate_useless_node_eliminater ------[predicate.] 0.015669100237 0.86% : 0.000134s : 982: predicate.accumulaten_eliminater 0.21% : 0.000032s : 116: predicate.ad_related_special_op_eliminate 0.54% : 0.000084s : 621: predicate.addn_check_dump 0.86% : 0.000135s : 982: predicate.addn_zero_filter 0.81% : 0.000127s : 982: predicate.adjust_all_reduce_mul_add 1.88% : 0.000294s : 1604: predicate.arithmetic_simplify 0.90% : 0.000141s : 989: predicate.cast_eliminate 3.02% : 0.000474s : 2476: predicate.check_bprop_eliminate 0.55% : 0.000086s : 621: predicate.compare_switch_simplify 0.06% : 0.000009s : 122: predicate.const_output_eliminate 0.11% : 0.000017s : 116: predicate.convert_tensor_all_eliminate 1.29% : 0.000202s : 1173: predicate.convert_tensor_eliminate 0.56% : 0.000087s : 624: predicate.depend_value_elim 0.92% : 0.000144s : 992: predicate.dict_get_item_const_eliminator 0.94% : 0.000148s : 992: predicate.dict_get_item_eliminator 0.91% : 0.000142s : 992: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 116: predicate.elim_not_effective 0.12% : 0.000019s : 116: predicate.elim_shapecalc_of_broadcastargs 0.92% : 0.000145s : 1111: predicate.environ_add_const_eliminate 0.93% : 0.000146s : 1114: predicate.environ_get_add_eliminate 0.93% : 0.000145s : 1111: predicate.environ_get_depend_swap 1.51% : 0.000237s : 1735: predicate.environ_get_eliminate 0.94% : 0.000147s : 1114: predicate.environ_get_set_eliminate 1.31% : 0.000205s : 1500: predicate.exchange_switch_depend_value 1.66% : 0.000260s : 1500: predicate.float_depend_g_call 0.54% : 0.000085s : 621: predicate.float_environ_get_switch 0.64% : 0.000101s : 743: predicate.float_tuple_getitem_switch 0.05% : 0.000008s : 116: predicate.fold_const_symbol 0.34% : 0.000053s : 366: predicate.get_grad_eliminate 0.09% : 0.000014s : 40: predicate.getattr_setattr_resolve 0.06% : 0.000010s : 116: predicate.graph_param_transform 0.54% : 0.000085s : 621: predicate.incorporate_call 0.53% : 0.000083s : 621: predicate.incorporate_call_switch 4.21% : 0.000659s : 3713: predicate.inline 1.47% : 0.000231s : 1014: predicate.inline_without_move 0.17% : 0.000027s : 366: predicate.j_node_and_user_rematch 0.42% : 0.000066s : 374: predicate.less_batch_normalization 1.26% : 0.000197s : 1411: predicate.list_to_tuple_eliminator_ 2.09% : 0.000328s : 2412: predicate.load_eliminater 0.23% : 0.000036s : 122: predicate.loop_unroll_after_grad 2.67% : 0.000418s : 2198: predicate.loop_unroll_before_grad 1.09% : 0.000171s : 1242: predicate.make_slice_get_slice_eliminator 0.56% : 0.000088s : 623: predicate.merge_addn 3.13% : 0.000490s : 2432: predicate.micro_step_allgather_replace 2.87% : 0.000449s : 2432: predicate.mini_step_allgather_replace 0.84% : 0.000132s : 983: predicate.minmaximum_grad 0.12% : 0.000018s : 116: predicate.mutable_eliminate 0.11% : 0.000017s : 116: predicate.opt_reshape 0.12% : 0.000018s : 122: predicate.parallel_virtual_node 2.58% : 0.000405s : 1500: predicate.partial_defer_inline 1.24% : 0.000195s : 1308: predicate.partial_eliminate 0.88% : 0.000138s : 982: predicate.print_const_string_wrapper 0.53% : 0.000084s : 610: predicate.reduce_all_const_elim 1.07% : 0.000168s : 983: predicate.reduce_eliminate 0.17% : 0.000026s : 366: predicate.remove_not_recompute_node 1.98% : 0.000311s : 3625: predicate.replace_applicator 0.48% : 0.000076s : 1014: predicate.replace_old_param 0.06% : 0.000010s : 122: predicate.reset_defer_inline 0.91% : 0.000143s : 983: predicate.reshape_eliminate 2.95% : 0.000462s : 2432: predicate.row_tensor_add_zeros_like 0.12% : 0.000019s : 122: predicate.row_tensor_eliminate 3.05% : 0.000479s : 2476: predicate.same_eliminate 0.21% : 0.000033s : 417: predicate.set_cell_output_no_recompute 0.36% : 0.000056s : 366: predicate.shard_identity_eliminate 1.35% : 0.000212s : 1136: predicate.special_op_eliminate 0.63% : 0.000099s : 623: predicate.specialize_transform 3.13% : 0.000491s : 2432: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000155s : 1014: predicate.stack_unstack_eliminate 2.04% : 0.000320s : 2412: predicate.stopgrad_eliminater 0.11% : 0.000017s : 122: predicate.switch_call_monad_eliminater 1.49% : 0.000233s : 1500: predicate.switch_defer_inline 4.38% : 0.000686s : 3976: predicate.switch_layer_defer_inline 5.03% : 0.000788s : 4369: predicate.switch_simplify 0.85% : 0.000134s : 983: predicate.tile_eliminate 0.89% : 0.000140s : 983: predicate.transpose_eliminate 1.20% : 0.000189s : 1230: predicate.tuple_list_convert_item_index_to_positive 1.18% : 0.000185s : 1236: predicate.tuple_list_get_item_const_eliminator 1.05% : 0.000164s : 1236: predicate.tuple_list_get_item_depend_reorder 2.06% : 0.000323s : 2032: predicate.tuple_list_get_item_eliminator 1.12% : 0.000175s : 1236: predicate.tuple_list_get_set_item_eliminator 1.82% : 0.000286s : 1857: predicate.tuple_list_set_item_eliminator 1.26% : 0.000197s : 1411: predicate.tuple_to_list_eliminator_ 2.12% : 0.000332s : 2412: predicate.updatestate_pure_node_eliminater 2.69% : 0.000422s : 3034: predicate.updatestate_useless_node_eliminater 0.12% : 0.000018s : 122: predicate.value_based_eliminate 0.34% : 0.000053s : 366: predicate.virtual_dataset_eliminate 0.45% : 0.000070s : 366: predicate.virtual_output_eliminate 0.12% : 0.000019s : 122: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.043505 649 63.06% : 0.027434s : 290: func_graph_cloner_run.FuncGraphClonerGraph 4.76% : 0.002070s : 27: func_graph_cloner_run.FuncGraphClonerNode 32.18% : 0.014000s : 332: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 14.585418 280 0.00% : 0.000004s : 1: ForceFp32Comm 0.00% : 0.000170s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.00% : 0.000660s : 1: add_recomputation 0.00% : 0.000018s : 1: assign_add_opt 0.02% : 0.002353s : 1: auto_monad 0.00% : 0.000349s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001391s : 1: bootstrap 0.00% : 0.000113s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000155s : 1: convert_after_rewriter 0.00% : 0.000275s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000411s : 1: distribtued_split 0.01% : 0.000978s : 1: eliminate_special_op_node 0.00% : 0.000094s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000048s : 1: graph_reusing 0.00% : 0.000006s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000010s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.000966s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.00% : 0.000161s : 1: opt.transform.loop_unroll_optimizer 0.68% : 0.099675s : 162: opt.transform.opt_a 0.00% : 0.000714s : 1: opt.transform.opt_after_cconv 0.02% : 0.002865s : 27: opt.transform.opt_b 0.01% : 0.001717s : 4: opt.transform.opt_resolve 0.01% : 0.000859s : 1: opt.transform.opt_trans_graph 0.00% : 0.000389s : 3: opt.transform.special_op_eliminate 0.00% : 0.000680s : 4: opt.transform.symbol_engine_opt 1.98% : 0.288911s : 1: opt_a 0.01% : 0.001418s : 1: opt_after_cconv 0.02% : 0.003556s : 1: opt_b 2.08% : 0.303448s : 1: optimize 0.00% : 0.000131s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000080s : 1: order_py_execute_after_rewriter 0.00% : 0.000138s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000034s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000030s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000022s : 1: parallel-infer-symbol 0.00% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.01% : 0.000798s : 1: pre_auto_parallel 0.01% : 0.000891s : 1: py_interpret_to_execute 0.00% : 0.000134s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000557s : 1: remove_dup_value 0.54% : 0.078068s : 2: renormalize.infer 0.25% : 0.035799s : 2: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000962s : 1: rewriter_after_opt_a 0.01% : 0.001976s : 1: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000118s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000783s : 1: symbol_engine_optimizer 88.90% : 12.966122s : 1: task_emit 0.01% : 0.000891s : 1: tuple_transform 5.37% : 0.783933s : 1: type_inference 0.01% : 0.001365s : 1: validate distribute network loadcheckpoint. distribute network loadcheckpoint. [WARNING] ME(32903:281472839912464,MainProcess):2025-02-07-15:50:49.182.497 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: accum., continue to load. [WARNING] ME(32903:281472839912464,MainProcess):2025-02-07-15:50:49.183.157 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 10 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(32903:281472839912464,MainProcess):2025-02-07-15:50:49.183.291 [mindspore/train/serialization.py:1828] ['fc2_weight', 'fc3_weight', 'global_step', 'learning_rate', 'momentum', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. distribute network loadcheckpoint. [WARNING] ME(32826:281473151163408,MainProcess):2025-02-07-15:50:49.187.520 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: stat., continue to load. [WARNING] ME(32826:281473151163408,MainProcess):2025-02-07-15:50:49.188.171 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 9 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(32826:281473151163408,MainProcess):2025-02-07-15:50:49.188.306 [mindspore/train/serialization.py:1828] ['fc3_weight', 'global_step', 'learning_rate', 'momentum', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. [WARNING] ME(32856:281473523473424,MainProcess):2025-02-07-15:50:49.188.581 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: stat., continue to load. [WARNING] ME(32856:281473523473424,MainProcess):2025-02-07-15:50:49.189.208 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 9 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. distribute network loadcheckpoint. [WARNING] ME(32856:281473523473424,MainProcess):2025-02-07-15:50:49.189.342 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc2_weight', 'learning_rate', 'momentum', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight'] are not loaded. [WARNING] ME(32881:281473326545936,MainProcess):2025-02-07-15:50:49.191.341 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 11 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(32881:281473326545936,MainProcess):2025-02-07-15:50:49.191.713 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc2_weight', 'fc3_weight', 'global_step', 'learning_rate', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. distribute network loadcheckpoint. distribute network loadcheckpoint. [WARNING] ME(32811:281472858999824,MainProcess):2025-02-07-15:50:49.199.312 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: accum., continue to load. [WARNING] ME(32811:281472858999824,MainProcess):2025-02-07-15:50:49.200.287 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 9 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(32811:281472858999824,MainProcess):2025-02-07-15:50:49.200.416 [mindspore/train/serialization.py:1828] ['fc2_weight', 'global_step', 'learning_rate', 'momentum', 'accum.fc1_weight', 'accum.fc2_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. [WARNING] ME(32868:281473633745936,MainProcess):2025-02-07-15:50:49.201.917 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 11 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(32868:281473633745936,MainProcess):2025-02-07-15:50:49.202.543 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc2_weight', 'fc3_weight', 'global_step', 'momentum', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. distribute network loadcheckpoint. distribute network loadcheckpoint. [WARNING] ME(32916:281473536338960,MainProcess):2025-02-07-15:50:49.222.648 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: accum., continue to load. [WARNING] ME(32916:281473536338960,MainProcess):2025-02-07-15:50:49.223.340 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 10 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(32916:281473536338960,MainProcess):2025-02-07-15:50:49.223.482 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc3_weight', 'global_step', 'learning_rate', 'momentum', 'accum.fc1_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. [WARNING] ME(32835:281473163488272,MainProcess):2025-02-07-15:50:49.226.354 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: stat., continue to load. [WARNING] ME(32835:281473163488272,MainProcess):2025-02-07-15:50:49.227.264 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 9 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(32835:281473163488272,MainProcess):2025-02-07-15:50:49.227.398 [mindspore/train/serialization.py:1828] ['fc1_weight', 'global_step', 'learning_rate', 'momentum', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc3_weight'] are not loaded. .... TotalTime = 0.0929561, [21] [bootstrap]: 0.0003815 [type_inference]: 0.00484666 [auto_monad]: 0.00014551 [graph_reusing]: 2.38977e-06 [inline]: 1.18976e-06 [parallel-infer-symbol]: 1.22003e-06 [pre_auto_parallel]: 2.43597e-05 [insert-virtual-dataset]: 2.14996e-06 [parallel-infer-symbol-second]: 3.29688e-07 [dataset_repeat_opt]: 8.60076e-07 [pipeline_split]: 1.03004e-06 [optimize]: 0.00811042, [52] [py_interpret_to_execute]: 1.984e-05 [rewriter_before_opt_a]: 3.382e-05 [opt_a]: 0.00630647, [2] [Cycle 1]: 0.00161846, [43] [expand_dump_flag]: 2.23983e-06 [switch_simplify]: 2.60798e-05 [loop_unroll]: 1.36602e-05 [a_1]: 0.00037405 [recompute_prepare]: 9.11998e-06 [updatestate_depend_eliminate]: 8.32975e-06 [updatestate_assign_eliminate]: 6.8699e-06 [updatestate_loads_eliminate]: 7.68993e-06 [parameter_eliminate]: 2.48011e-06 [a_2]: 0.00012858 [accelerated_algorithm]: 8.82987e-06 [shard]: 2.46987e-06 [meta_shard_fg_expand]: 3.39979e-06 [shard_inline]: 8.80007e-06 [auto_parallel]: 1.19898e-05 [parallel]: 7.67037e-06 [flash_sp]: 8.88994e-06 [merge_comm]: 7.11996e-06 [allreduce_fusion]: 4.9402e-06 [matmul_add_comm_reduction]: 9.70997e-06 [allreduce_slice_to_reducescatter]: 3.50177e-07 [virtual_shard_identity]: 9.9102e-06 [virtual_dataset]: 9.45991e-06 [get_grad_eliminate_]: 8.76002e-06 [virtual_output]: 8.84011e-06 [merge_forward]: 5.22984e-06 [cell_reuse_recompute_pass]: 2.12993e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.84602e-05 [before_grad]: 1.64602e-05 [inplace_validation]: 4.80004e-06 [meta_fg_expand]: 5.32018e-06 [inplace_validation_after_expand]: 5.72996e-06 [flash_sp_send_recv_attached]: 2.37999e-06 [receive_attached]: 3.87011e-06 [after_resolve]: 1.19903e-05 [a_after_grad]: 1.48602e-05 [special_op_eliminate]: 8.16025e-06 [renormalize]: 0.00047944 [add_forward_monad_depend]: 2.62028e-06 [auto_monad_grad]: 1.81003e-06 [auto_monad_eliminator]: 2.586e-05 [cse]: 2.76002e-05 [a_3]: 5.93499e-05 [Cycle 2]: 0.00080538, [43] [expand_dump_flag]: 1.05985e-06 [switch_simplify]: 9.29041e-06 [loop_unroll]: 7.62008e-06 [a_1]: 0.0002088 [recompute_prepare]: 7.66013e-06 [updatestate_depend_eliminate]: 5.58002e-06 [updatestate_assign_eliminate]: 5.12972e-06 [updatestate_loads_eliminate]: 4.93973e-06 [parameter_eliminate]: 1.09989e-06 [a_2]: 0.00010775 [accelerated_algorithm]: 8.82009e-06 [shard]: 1.07987e-06 [meta_shard_fg_expand]: 2.42004e-06 [shard_inline]: 8.2599e-06 [auto_parallel]: 1.02399e-05 [parallel]: 2.96021e-06 [flash_sp]: 2.90992e-06 [merge_comm]: 5.66989e-06 [allreduce_fusion]: 4.82984e-06 [matmul_add_comm_reduction]: 7.69971e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 9.16002e-06 [virtual_dataset]: 7.83009e-06 [get_grad_eliminate_]: 7.70018e-06 [virtual_output]: 7.22986e-06 [merge_forward]: 4.98025e-06 [cell_reuse_recompute_pass]: 2.00979e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55303e-05 [before_grad]: 1.59601e-05 [inplace_validation]: 4.23007e-06 [meta_fg_expand]: 4.81028e-06 [inplace_validation_after_expand]: 5.08968e-06 [flash_sp_send_recv_attached]: 8.29808e-07 [receive_attached]: 6.9011e-07 [after_resolve]: 1.16499e-05 [a_after_grad]: 1.31796e-05 [special_op_eliminate]: 7.39004e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 9.29926e-07 [auto_monad_grad]: 1.03982e-06 [auto_monad_eliminator]: 1.71801e-05 [cse]: 2.20505e-05 [a_3]: 4.95398e-05 [py_interpret_to_execute_after_opt_a]: 1.171e-05 [slice_cell_reuse_recomputed_activation]: 1.74996e-06 [rewriter_after_opt_a]: 0.00013476 [convert_after_rewriter]: 8.49972e-06 [order_py_execute_after_rewriter]: 5.91995e-06 [opt_b]: 0.00025572, [1] [Cycle 1]: 0.00025014, [7] [b_1]: 0.00017109 [b_2]: 1.211e-05 [updatestate_depend_eliminate]: 5.17024e-06 [updatestate_assign_eliminate]: 4.21004e-06 [updatestate_loads_eliminate]: 6.13043e-06 [renormalize]: 3.1013e-07 [cse]: 1.84299e-05 [optimize_parallel_all_gather_comm]: 8.15e-06 [overlap_param_gather]: 1.08965e-06 [cconv]: 1.73301e-05 [loop_unroll]: 0.00053078 [opt_after_cconv]: 0.00014754, [1] [Cycle 1]: 0.00014182, [7] [c_1]: 5.65001e-05 [parameter_eliminate]: 1.74018e-06 [updatestate_depend_eliminate]: 6.85966e-06 [updatestate_assign_eliminate]: 4.57978e-06 [updatestate_loads_eliminate]: 1.746e-05 [cse]: 2.085e-05 [renormalize]: 3.50177e-07 [remove_dup_value]: 9.32999e-06 [tuple_transform]: 7.04401e-05, [1] [Cycle 1]: 6.57099e-05, [2] [d_1]: 5.50798e-05 [renormalize]: 2.19792e-07 [partial_unused_args_eliminate]: 1.60001e-06 [add_cache_embedding]: 1.24099e-05 [add_recomputation]: 6.659e-05 [cse_after_recomputation]: 3.09399e-05, [1] [Cycle 1]: 2.485e-05, [1] [cse]: 1.963e-05 [environ_conv]: 6.29015e-06 [swap_dp_allreduce_reducescatter]: 9.1698e-06 [bias_add_comm_swap]: 1.80025e-06 [label_micro_interleaved_index]: 1.84961e-06 [label_fine_grained_interleaved_index]: 1.56974e-06 [merge_cast_opt]: 8.30274e-07 [slice_recompute_activation]: 1.57999e-06 [micro_interleaved_order_control]: 1.17999e-06 [assign_add_opt]: 7.31973e-06 [ForceFp32Comm]: 5.99772e-07 [remove_cast_before_assign_add]: 6.50063e-07 [full_micro_interleaved_order_control]: 1.77976e-06 [reorder_send_recv_between_fp_bp]: 1.45985e-06 [comm_op_add_attrs]: 6.40284e-07 [add_comm_op_reuse_tag]: 6.59842e-07 [interleave_split_concat_branches]: 6.20261e-07 [interleave_parallel_branches]: 5.69969e-07 [overlap_opt_shard_in_pipeline]: 2.92994e-06 [overlap_opt_shard_grad_in_pipeline]: 1.47987e-06 [control_data_broadcast_order]: 7.19912e-07 [grouped_pairwise_exchange_alltoall]: 7.70204e-07 [offloading_packed_experts]: 8.09785e-07 [overlap_recompute_and_grad_model_parallel]: 2.1602e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.89876e-07 [overlap_recompute_allgather_and_fa_grad]: 7.19912e-07 [overlap_grad_ring_attention]: 1.56974e-06 [overlap_grad_flash_sp]: 1.44797e-05 [begin_end_overlap_inline]: 5.29923e-07 [split_matmul_comm_elemetwise]: 1.2801e-06 [split_layernorm_comm]: 1.22003e-06 [handle_group_info]: 9.99775e-07 [symbol_engine_optimizer]: 9.972e-05, [1] [Cycle 1]: 9.515e-05, [6] [build]: 3.60003e-06 [elim_shapecalc]: 1.38003e-05 [elim_not_effective]: 1.94497e-05 [opt_reshape]: 1.10697e-05 [fold_const_symbol]: 1.95298e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.07009e-06 [auto_monad_reorder]: 2.64701e-05 [get_jit_bprop_graph]: 3.50177e-07 [rewriter_after_jit_bprop_graph]: 3.00352e-07 [eliminate_special_op_node]: 0.00051623 [distribtued_split]: 3.53996e-05 [validate]: 3.28301e-05 [task_emit]: 0.0785631 [execute]: 9.70997e-06 Sums bootstrap : 0.000381s : 0.43% type_inference : 0.004847s : 5.51% auto_monad : 0.000146s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000020s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.02% optimize.opt_a.a_1 : 0.000583s : 0.66% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000236s : 0.27% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.01% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000032s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.01% optimize.opt_a.after_resolve : 0.000024s : 0.03% optimize.opt_a.a_after_grad : 0.000028s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000480s : 0.54% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000043s : 0.05% optimize.opt_a.cse : 0.000050s : 0.06% optimize.opt_a.a_3 : 0.000109s : 0.12% optimize.py_interpret_to_execute_after_opt_a : 0.000012s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000135s : 0.15% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000171s : 0.19% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000531s : 0.60% optimize.opt_after_cconv.c_1 : 0.000057s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000017s : 0.02% optimize.opt_after_cconv.cse : 0.000021s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.06% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.01% optimize.add_recomputation : 0.000067s : 0.08% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000003s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000019s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000020s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000026s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000516s : 0.59% distribtued_split : 0.000035s : 0.04% validate : 0.000033s : 0.04% task_emit : 0.078563s : 89.27% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000129 63 4.57% : 0.000006s : 2: substitution.depend_value_elim 3.04% : 0.000004s : 5: substitution.elim_not_effective 2.52% : 0.000003s : 5: substitution.fold_const_symbol 4.84% : 0.000006s : 6: substitution.graph_param_transform 49.13% : 0.000063s : 1: substitution.inline 4.76% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.39% : 0.000004s : 6: substitution.load_eliminater 2.10% : 0.000003s : 2: substitution.reduce_all_const_elim 6.75% : 0.000009s : 10: substitution.remove_not_recompute_node 2.04% : 0.000003s : 2: substitution.replace_old_param 8.87% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.00% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.004819 2 92.43% : 0.004454s : 1: type_inference.infer 7.57% : 0.000365s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000062 1 100.00% : 0.000062s : 1: match.inline ------[predicate.] 0.000230 1420 0.85% : 0.000002s : 13: predicate.accumulaten_eliminater 1.07% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.75% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.80% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.24% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.13% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.25% : 0.000003s : 19: predicate.environ_get_depend_swap 1.93% : 0.000004s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.30% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.59% : 0.000013s : 63: predicate.inline 1.08% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.66% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.36% : 0.000005s : 38: predicate.load_eliminater 1.11% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.83% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.62% : 0.000001s : 6: predicate.mutable_eliminate 0.53% : 0.000001s : 6: predicate.opt_reshape 0.63% : 0.000001s : 6: predicate.parallel_virtual_node 1.22% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.91% : 0.000002s : 12: predicate.reduce_all_const_elim 1.18% : 0.000003s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 1.01% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 0.97% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 0.94% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.99% : 0.000002s : 14: predicate.switch_defer_inline 1.69% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.11% : 0.000009s : 43: predicate.switch_simplify 0.85% : 0.000002s : 13: predicate.tile_eliminate 0.74% : 0.000002s : 13: predicate.transpose_eliminate 1.74% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.71% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.74% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.51% : 0.000003s : 25: predicate.tuple_to_list_eliminator_ 2.58% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 4.31% : 0.000010s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.92% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000249 4 4.89% : 0.000012s : 1: func_graph_cloner_run.FuncGraphClonerGraph 95.11% : 0.000237s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.102999 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000071s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000157s : 1: auto_monad 0.03% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.40% : 0.000416s : 1: bootstrap 0.02% : 0.000021s : 1: cconv 0.01% : 0.000006s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000034s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000043s : 1: distribtued_split 0.51% : 0.000529s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000005s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.52% : 0.000540s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.13% : 0.001167s : 80: opt.transform.opt_a 0.05% : 0.000055s : 1: opt.transform.opt_after_cconv 0.16% : 0.000164s : 27: opt.transform.opt_b 0.05% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.06% : 0.000060s : 4: opt.transform.symbol_engine_opt 6.13% : 0.006311s : 1: opt_a 0.15% : 0.000151s : 1: opt_after_cconv 0.25% : 0.000259s : 1: opt_b 7.88% : 0.008119s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000006s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000007s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000024s : 1: py_interpret_to_execute 0.02% : 0.000016s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000005s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.25% : 0.000262s : 1: renormalize.infer 0.21% : 0.000211s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000140s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000104s : 1: symbol_engine_optimizer 76.30% : 0.078590s : 1: task_emit 0.07% : 0.000074s : 1: tuple_transform 4.72% : 0.004863s : 1: type_inference 0.07% : 0.000067s : 1: validate TotalTime = 0.095755, [21] [bootstrap]: 0.00045746 [type_inference]: 0.00513908 [auto_monad]: 0.00017921 [graph_reusing]: 2.71993e-06 [inline]: 1.53994e-06 [parallel-infer-symbol]: 2.56998e-06 [pre_auto_parallel]: 3.08901e-05 [insert-virtual-dataset]: 3.09013e-06 [parallel-infer-symbol-second]: 4.80097e-07 [dataset_repeat_opt]: 1.76998e-06 [pipeline_split]: 1.64006e-06 [optimize]: 0.00858061, [52] [py_interpret_to_execute]: 2.31098e-05 [rewriter_before_opt_a]: 3.79998e-05 [opt_a]: 0.00662628, [2] [Cycle 1]: 0.00177591, [43] [expand_dump_flag]: 3.90969e-06 [switch_simplify]: 3.11201e-05 [loop_unroll]: 1.35e-05 [a_1]: 0.00038841 [recompute_prepare]: 9.51998e-06 [updatestate_depend_eliminate]: 1.07498e-05 [updatestate_assign_eliminate]: 7.78027e-06 [updatestate_loads_eliminate]: 9.41986e-06 [parameter_eliminate]: 3.34997e-06 [a_2]: 0.00013023 [accelerated_algorithm]: 1.10203e-05 [shard]: 2.27988e-06 [meta_shard_fg_expand]: 3.96976e-06 [shard_inline]: 1.02399e-05 [auto_parallel]: 1.31601e-05 [parallel]: 8.28039e-06 [flash_sp]: 1.24201e-05 [merge_comm]: 9.70997e-06 [allreduce_fusion]: 6.8699e-06 [matmul_add_comm_reduction]: 1.23102e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 1.05598e-05 [virtual_dataset]: 8.78982e-06 [get_grad_eliminate_]: 1.00802e-05 [virtual_output]: 9.43989e-06 [merge_forward]: 7.18003e-06 [cell_reuse_recompute_pass]: 1.97021e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.09697e-05 [before_grad]: 1.75601e-05 [inplace_validation]: 5.71972e-06 [meta_fg_expand]: 5.91017e-06 [inplace_validation_after_expand]: 7.16979e-06 [flash_sp_send_recv_attached]: 3.39001e-06 [receive_attached]: 4.98025e-06 [after_resolve]: 1.70399e-05 [a_after_grad]: 1.388e-05 [special_op_eliminate]: 8.55001e-06 [renormalize]: 0.00053762 [add_forward_monad_depend]: 3.91994e-06 [auto_monad_grad]: 2.16998e-06 [auto_monad_eliminator]: 3.38601e-05 [cse]: 3.61502e-05 [a_3]: 5.87902e-05 [Cycle 2]: 0.0008364, [43] [expand_dump_flag]: 1.15018e-06 [switch_simplify]: 9.06968e-06 [loop_unroll]: 8.00006e-06 [a_1]: 0.00020933 [recompute_prepare]: 7.43009e-06 [updatestate_depend_eliminate]: 6.06989e-06 [updatestate_assign_eliminate]: 5.03985e-06 [updatestate_loads_eliminate]: 5.21028e-06 [parameter_eliminate]: 1.51992e-06 [a_2]: 0.00010795 [accelerated_algorithm]: 8.82987e-06 [shard]: 1.20979e-06 [meta_shard_fg_expand]: 2.86009e-06 [shard_inline]: 8.30041e-06 [auto_parallel]: 1.19801e-05 [parallel]: 3.85987e-06 [flash_sp]: 3.70992e-06 [merge_comm]: 6.3302e-06 [allreduce_fusion]: 4.82006e-06 [matmul_add_comm_reduction]: 7.7202e-06 [allreduce_slice_to_reducescatter]: 2.30037e-07 [virtual_shard_identity]: 8.69017e-06 [virtual_dataset]: 7.56979e-06 [get_grad_eliminate_]: 7.64988e-06 [virtual_output]: 7.29971e-06 [merge_forward]: 4.9402e-06 [cell_reuse_recompute_pass]: 1.8701e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59498e-05 [before_grad]: 1.36099e-05 [inplace_validation]: 4.54998e-06 [meta_fg_expand]: 5.32018e-06 [inplace_validation_after_expand]: 8.15e-06 [flash_sp_send_recv_attached]: 9.59728e-07 [receive_attached]: 8.00006e-07 [after_resolve]: 1.06101e-05 [a_after_grad]: 1.22902e-05 [special_op_eliminate]: 1.05002e-05 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 1.03004e-06 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 2.165e-05 [cse]: 2.48901e-05 [a_3]: 5.09298e-05 [py_interpret_to_execute_after_opt_a]: 1.13696e-05 [slice_cell_reuse_recomputed_activation]: 2.66032e-06 [rewriter_after_opt_a]: 0.00015055 [convert_after_rewriter]: 9.91998e-06 [order_py_execute_after_rewriter]: 6.36e-06 [opt_b]: 0.00026678, [1] [Cycle 1]: 0.00026031, [7] [b_1]: 0.00017128 [b_2]: 1.234e-05 [updatestate_depend_eliminate]: 5.8501e-06 [updatestate_assign_eliminate]: 7.79005e-06 [updatestate_loads_eliminate]: 6.76978e-06 [renormalize]: 3.00352e-07 [cse]: 2.07499e-05 [optimize_parallel_all_gather_comm]: 1.04504e-05 [overlap_param_gather]: 1.68988e-06 [cconv]: 2.74098e-05 [loop_unroll]: 0.0005772 [opt_after_cconv]: 0.00014131, [1] [Cycle 1]: 0.00013437, [7] [c_1]: 5.49401e-05 [parameter_eliminate]: 2.52016e-06 [updatestate_depend_eliminate]: 8.15e-06 [updatestate_assign_eliminate]: 4.65987e-06 [updatestate_loads_eliminate]: 6.11972e-06 [cse]: 2.35899e-05 [renormalize]: 4.50294e-07 [remove_dup_value]: 1.41701e-05 [tuple_transform]: 8.04402e-05, [1] [Cycle 1]: 7.52104e-05, [2] [d_1]: 6.16703e-05 [renormalize]: 2.19792e-07 [partial_unused_args_eliminate]: 2.10991e-06 [add_cache_embedding]: 1.42199e-05 [add_recomputation]: 7.81701e-05 [cse_after_recomputation]: 3.29502e-05, [1] [Cycle 1]: 2.601e-05, [1] [cse]: 2.01301e-05 [environ_conv]: 8.33999e-06 [swap_dp_allreduce_reducescatter]: 1.01002e-05 [bias_add_comm_swap]: 2.59979e-06 [label_micro_interleaved_index]: 2.08011e-06 [label_fine_grained_interleaved_index]: 2.60025e-06 [merge_cast_opt]: 1.60979e-06 [slice_recompute_activation]: 1.91992e-06 [micro_interleaved_order_control]: 2.21981e-06 [assign_add_opt]: 8.36002e-06 [ForceFp32Comm]: 9.29926e-07 [remove_cast_before_assign_add]: 1.36998e-06 [full_micro_interleaved_order_control]: 2.54996e-06 [reorder_send_recv_between_fp_bp]: 2.39024e-06 [comm_op_add_attrs]: 1.09011e-06 [add_comm_op_reuse_tag]: 1.10036e-06 [interleave_split_concat_branches]: 9.29926e-07 [interleave_parallel_branches]: 9.4017e-07 [overlap_opt_shard_in_pipeline]: 2.36975e-06 [overlap_opt_shard_grad_in_pipeline]: 2.31992e-06 [control_data_broadcast_order]: 1.20001e-06 [grouped_pairwise_exchange_alltoall]: 1.30013e-06 [offloading_packed_experts]: 1.13994e-06 [overlap_recompute_and_grad_model_parallel]: 2.21003e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.00123e-07 [overlap_recompute_allgather_and_fa_grad]: 1.20979e-06 [overlap_grad_ring_attention]: 2.77022e-06 [overlap_grad_flash_sp]: 1.752e-05 [begin_end_overlap_inline]: 8.39587e-07 [split_matmul_comm_elemetwise]: 2.43029e-06 [split_layernorm_comm]: 2.06009e-06 [handle_group_info]: 1.02026e-06 [symbol_engine_optimizer]: 0.00010305, [1] [Cycle 1]: 9.772e-05, [6] [build]: 4.63007e-06 [elim_shapecalc]: 1.51303e-05 [elim_not_effective]: 1.84998e-05 [opt_reshape]: 1.16304e-05 [fold_const_symbol]: 1.65598e-05 [renormalize]: 3.39933e-07 [pipeline_parallel_scheduler]: 1.61026e-06 [auto_monad_reorder]: 3.41199e-05 [get_jit_bprop_graph]: 5.30388e-07 [rewriter_after_jit_bprop_graph]: 4.89876e-07 [eliminate_special_op_node]: 0.00053778 [distribtued_split]: 4.39999e-05 [validate]: 3.80799e-05 [task_emit]: 0.0803604 [execute]: 1.29403e-05 Sums bootstrap : 0.000457s : 0.51% type_inference : 0.005139s : 5.68% auto_monad : 0.000179s : 0.20% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000031s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000023s : 0.03% optimize.rewriter_before_opt_a : 0.000038s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.04% optimize.opt_a.loop_unroll : 0.000022s : 0.02% optimize.opt_a.a_1 : 0.000598s : 0.66% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000015s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000238s : 0.26% optimize.opt_a.accelerated_algorithm : 0.000020s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000019s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000017s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000037s : 0.04% optimize.opt_a.before_grad : 0.000031s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000015s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.01% optimize.opt_a.after_resolve : 0.000028s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000538s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000056s : 0.06% optimize.opt_a.cse : 0.000061s : 0.07% optimize.opt_a.a_3 : 0.000110s : 0.12% optimize.py_interpret_to_execute_after_opt_a : 0.000011s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000151s : 0.17% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000171s : 0.19% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000008s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000007s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000021s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000010s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000027s : 0.03% optimize.loop_unroll : 0.000577s : 0.64% optimize.opt_after_cconv.c_1 : 0.000055s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000062s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000078s : 0.09% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000010s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000015s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000012s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000034s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000538s : 0.59% distribtued_split : 0.000044s : 0.05% validate : 0.000038s : 0.04% task_emit : 0.080360s : 88.75% execute : 0.000013s : 0.01% Time group info: ------[substitution.] 0.000154 63 4.50% : 0.000007s : 2: substitution.depend_value_elim 2.48% : 0.000004s : 5: substitution.elim_not_effective 1.87% : 0.000003s : 5: substitution.fold_const_symbol 5.76% : 0.000009s : 6: substitution.graph_param_transform 50.75% : 0.000078s : 1: substitution.inline 4.71% : 0.000007s : 10: substitution.j_node_and_user_rematch 3.06% : 0.000005s : 6: substitution.load_eliminater 2.30% : 0.000004s : 2: substitution.reduce_all_const_elim 6.03% : 0.000009s : 10: substitution.remove_not_recompute_node 3.06% : 0.000005s : 2: substitution.replace_old_param 7.75% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.72% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.005106 2 92.88% : 0.004742s : 1: type_inference.infer 7.12% : 0.000364s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000077 1 100.00% : 0.000077s : 1: match.inline ------[predicate.] 0.000238 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.24% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 1.20% : 0.000003s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.19% : 0.000005s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.45% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.90% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.03% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000003s : 19: predicate.environ_get_depend_swap 2.04% : 0.000005s : 31: predicate.environ_get_eliminate 1.06% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000003s : 14: predicate.float_depend_g_call 0.68% : 0.000002s : 12: predicate.float_environ_get_switch 1.02% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.72% : 0.000002s : 12: predicate.incorporate_call 0.65% : 0.000002s : 12: predicate.incorporate_call_switch 5.51% : 0.000013s : 63: predicate.inline 1.02% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000003s : 12: predicate.less_batch_normalization 1.81% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.35% : 0.000006s : 38: predicate.load_eliminater 1.26% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.13% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.71% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.71% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.69% : 0.000002s : 13: predicate.minmaximum_grad 0.78% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.42% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.18% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 1.07% : 0.000003s : 12: predicate.reduce_all_const_elim 0.99% : 0.000002s : 13: predicate.reduce_eliminate 0.59% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.79% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000003s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.55% : 0.000004s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 0.97% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.04% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.24% : 0.000005s : 38: predicate.stopgrad_eliminater 0.39% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.34% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.76% : 0.000002s : 13: predicate.transpose_eliminate 1.72% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.65% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.47% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.73% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.47% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 4.41% : 0.000011s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.86% : 0.000002s : 12: predicate.virtual_output_eliminate 0.49% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000222 4 7.87% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.13% : 0.000204s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.106367 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000083s : 1: add_recomputation 0.01% : 0.000014s : 1: assign_add_opt 0.18% : 0.000193s : 1: auto_monad 0.04% : 0.000042s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000008s : 1: bias_add_comm_swap 0.47% : 0.000502s : 1: bootstrap 0.03% : 0.000032s : 1: cconv 0.00% : 0.000005s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000036s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000053s : 1: distribtued_split 0.52% : 0.000553s : 1: eliminate_special_op_node 0.01% : 0.000014s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000007s : 1: full_micro_interleaved_order_control 0.01% : 0.000008s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000005s : 1: interleave_parallel_branches 0.01% : 0.000007s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.55% : 0.000588s : 1: loop_unroll 0.01% : 0.000006s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.12% : 0.001196s : 80: opt.transform.opt_a 0.05% : 0.000053s : 1: opt.transform.opt_after_cconv 0.15% : 0.000163s : 27: opt.transform.opt_b 0.06% : 0.000060s : 1: opt.transform.opt_trans_graph 0.03% : 0.000036s : 3: opt.transform.special_op_eliminate 0.05% : 0.000058s : 4: opt.transform.symbol_engine_opt 6.23% : 0.006630s : 1: opt_a 0.14% : 0.000145s : 1: opt_after_cconv 0.25% : 0.000270s : 1: opt_b 8.07% : 0.008589s : 1: optimize 0.01% : 0.000014s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000022s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000006s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000007s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000006s : 1: parallel-infer-symbol-second 0.01% : 0.000007s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000037s : 1: pre_auto_parallel 0.03% : 0.000027s : 1: py_interpret_to_execute 0.01% : 0.000016s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000005s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.28% : 0.000302s : 1: renormalize.infer 0.22% : 0.000229s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000007s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000157s : 1: rewriter_after_opt_a 0.04% : 0.000043s : 1: rewriter_before_opt_a 0.01% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000014s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000109s : 1: symbol_engine_optimizer 75.58% : 0.080393s : 1: task_emit 0.08% : 0.000084s : 1: tuple_transform 4.85% : 0.005160s : 1: type_inference 0.10% : 0.000111s : 1: validate TotalTime = 0.090594, [21] [bootstrap]: 0.00038677 [type_inference]: 0.004758 [auto_monad]: 0.00014035 [graph_reusing]: 1.77976e-06 [inline]: 1.11014e-06 [parallel-infer-symbol]: 1.55997e-06 [pre_auto_parallel]: 2.57902e-05 [insert-virtual-dataset]: 2.29012e-06 [parallel-infer-symbol-second]: 4.09782e-07 [dataset_repeat_opt]: 8.801e-07 [pipeline_split]: 1.08033e-06 [optimize]: 0.00804178, [52] [py_interpret_to_execute]: 1.86199e-05 [rewriter_before_opt_a]: 3.44301e-05 [opt_a]: 0.00627552, [2] [Cycle 1]: 0.0016093, [43] [expand_dump_flag]: 2.82982e-06 [switch_simplify]: 2.62e-05 [loop_unroll]: 1.34003e-05 [a_1]: 0.00036136 [recompute_prepare]: 9.09017e-06 [updatestate_depend_eliminate]: 1.61901e-05 [updatestate_assign_eliminate]: 7.11018e-06 [updatestate_loads_eliminate]: 7.51996e-06 [parameter_eliminate]: 2.6403e-06 [a_2]: 0.00012634 [accelerated_algorithm]: 8.91974e-06 [shard]: 1.77976e-06 [meta_shard_fg_expand]: 2.90014e-06 [shard_inline]: 9.68017e-06 [auto_parallel]: 1.255e-05 [parallel]: 6.78981e-06 [flash_sp]: 7.97026e-06 [merge_comm]: 7.26013e-06 [allreduce_fusion]: 5.29969e-06 [matmul_add_comm_reduction]: 9.38028e-06 [allreduce_slice_to_reducescatter]: 3.29688e-07 [virtual_shard_identity]: 1.234e-05 [virtual_dataset]: 8.71019e-06 [get_grad_eliminate_]: 8.72975e-06 [virtual_output]: 9.05991e-06 [merge_forward]: 6.18026e-06 [cell_reuse_recompute_pass]: 1.57999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.71303e-05 [before_grad]: 1.75801e-05 [inplace_validation]: 4.49969e-06 [meta_fg_expand]: 5.24009e-06 [inplace_validation_after_expand]: 6.76e-06 [flash_sp_send_recv_attached]: 2.12993e-06 [receive_attached]: 3.30014e-06 [after_resolve]: 1.33798e-05 [a_after_grad]: 1.546e-05 [special_op_eliminate]: 9.84035e-06 [renormalize]: 0.0004729 [add_forward_monad_depend]: 2.5304e-06 [auto_monad_grad]: 1.39e-06 [auto_monad_eliminator]: 2.42302e-05 [cse]: 2.721e-05 [a_3]: 5.97402e-05 [Cycle 2]: 0.0008093, [43] [expand_dump_flag]: 9.30391e-07 [switch_simplify]: 9.58005e-06 [loop_unroll]: 7.84034e-06 [a_1]: 0.00020758 [recompute_prepare]: 7.38027e-06 [updatestate_depend_eliminate]: 5.90971e-06 [updatestate_assign_eliminate]: 4.92018e-06 [updatestate_loads_eliminate]: 4.99003e-06 [parameter_eliminate]: 1.06031e-06 [a_2]: 0.00010525 [accelerated_algorithm]: 8.74e-06 [shard]: 1.05985e-06 [meta_shard_fg_expand]: 2.61981e-06 [shard_inline]: 8.54023e-06 [auto_parallel]: 1.02399e-05 [parallel]: 3.23961e-06 [flash_sp]: 2.53972e-06 [merge_comm]: 5.72996e-06 [allreduce_fusion]: 4.92996e-06 [matmul_add_comm_reduction]: 7.87992e-06 [allreduce_slice_to_reducescatter]: 2.89641e-07 [virtual_shard_identity]: 9.01008e-06 [virtual_dataset]: 8.13976e-06 [get_grad_eliminate_]: 7.71042e-06 [virtual_output]: 7.52043e-06 [merge_forward]: 4.55976e-06 [cell_reuse_recompute_pass]: 1.70991e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55503e-05 [before_grad]: 1.348e-05 [inplace_validation]: 4.25009e-06 [meta_fg_expand]: 4.84008e-06 [inplace_validation_after_expand]: 5.20982e-06 [flash_sp_send_recv_attached]: 8.10251e-07 [receive_attached]: 7.69738e-07 [after_resolve]: 1.173e-05 [a_after_grad]: 1.411e-05 [special_op_eliminate]: 9.01986e-06 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 8.50298e-07 [auto_monad_grad]: 1.0198e-06 [auto_monad_eliminator]: 1.824e-05 [cse]: 2.15098e-05 [a_3]: 4.94402e-05 [py_interpret_to_execute_after_opt_a]: 9.57027e-06 [slice_cell_reuse_recomputed_activation]: 1.64006e-06 [rewriter_after_opt_a]: 0.00012923 [convert_after_rewriter]: 7.71042e-06 [order_py_execute_after_rewriter]: 5.58002e-06 [opt_b]: 0.00024847, [1] [Cycle 1]: 0.00024308, [7] [b_1]: 0.00016629 [b_2]: 1.01901e-05 [updatestate_depend_eliminate]: 5.37001e-06 [updatestate_assign_eliminate]: 4.42984e-06 [updatestate_loads_eliminate]: 5.26011e-06 [renormalize]: 3.30154e-07 [cse]: 1.89599e-05 [optimize_parallel_all_gather_comm]: 7.89016e-06 [overlap_param_gather]: 1.09011e-06 [cconv]: 1.61501e-05 [loop_unroll]: 0.00054943 [opt_after_cconv]: 0.00013255, [1] [Cycle 1]: 0.0001267, [7] [c_1]: 5.37802e-05 [parameter_eliminate]: 1.91992e-06 [updatestate_depend_eliminate]: 7.30017e-06 [updatestate_assign_eliminate]: 4.58024e-06 [updatestate_loads_eliminate]: 5.0501e-06 [cse]: 2.13301e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 1.00899e-05 [tuple_transform]: 6.68801e-05, [1] [Cycle 1]: 6.27898e-05, [2] [d_1]: 5.39403e-05 [renormalize]: 1.99769e-07 [partial_unused_args_eliminate]: 1.36998e-06 [add_cache_embedding]: 1.33398e-05 [add_recomputation]: 6.42398e-05 [cse_after_recomputation]: 3.058e-05, [1] [Cycle 1]: 2.56803e-05, [1] [cse]: 2.03e-05 [environ_conv]: 7.22986e-06 [swap_dp_allreduce_reducescatter]: 9.14e-06 [bias_add_comm_swap]: 1.55997e-06 [label_micro_interleaved_index]: 1.86032e-06 [label_fine_grained_interleaved_index]: 1.81003e-06 [merge_cast_opt]: 8.49832e-07 [slice_recompute_activation]: 1.15996e-06 [micro_interleaved_order_control]: 1.34017e-06 [assign_add_opt]: 7.06967e-06 [ForceFp32Comm]: 5.20144e-07 [remove_cast_before_assign_add]: 5.99772e-07 [full_micro_interleaved_order_control]: 1.89012e-06 [reorder_send_recv_between_fp_bp]: 1.12969e-06 [comm_op_add_attrs]: 7.60425e-07 [add_comm_op_reuse_tag]: 5.99772e-07 [interleave_split_concat_branches]: 5.39701e-07 [interleave_parallel_branches]: 5.20144e-07 [overlap_opt_shard_in_pipeline]: 2.34041e-06 [overlap_opt_shard_grad_in_pipeline]: 1.24983e-06 [control_data_broadcast_order]: 6.79865e-07 [grouped_pairwise_exchange_alltoall]: 6.99889e-07 [offloading_packed_experts]: 6.50063e-07 [overlap_recompute_and_grad_model_parallel]: 1.55997e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.89876e-07 [overlap_recompute_allgather_and_fa_grad]: 5.79748e-07 [overlap_grad_ring_attention]: 1.4198e-06 [overlap_grad_flash_sp]: 1.38702e-05 [begin_end_overlap_inline]: 4.70318e-07 [split_matmul_comm_elemetwise]: 1.34017e-06 [split_layernorm_comm]: 1.15996e-06 [handle_group_info]: 5.69969e-07 [symbol_engine_optimizer]: 9.955e-05, [1] [Cycle 1]: 9.52203e-05, [6] [build]: 5.09014e-06 [elim_shapecalc]: 1.26897e-05 [elim_not_effective]: 2.04304e-05 [opt_reshape]: 1.15098e-05 [fold_const_symbol]: 1.731e-05 [renormalize]: 2.10013e-07 [pipeline_parallel_scheduler]: 1.09011e-06 [auto_monad_reorder]: 2.38703e-05 [get_jit_bprop_graph]: 3.7998e-07 [rewriter_after_jit_bprop_graph]: 3.29688e-07 [eliminate_special_op_node]: 0.00057712 [distribtued_split]: 3.41102e-05 [validate]: 3.33702e-05 [task_emit]: 0.0763044 [execute]: 8.14022e-06 Sums bootstrap : 0.000387s : 0.45% type_inference : 0.004758s : 5.55% auto_monad : 0.000140s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.02% optimize.opt_a.a_1 : 0.000569s : 0.66% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000022s : 0.03% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000232s : 0.27% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000021s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000017s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000031s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000025s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.03% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000473s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000042s : 0.05% optimize.opt_a.cse : 0.000049s : 0.06% optimize.opt_a.a_3 : 0.000109s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000129s : 0.15% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000166s : 0.19% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000549s : 0.64% optimize.opt_after_cconv.c_1 : 0.000054s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.06% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000064s : 0.07% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000012s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000577s : 0.67% distribtued_split : 0.000034s : 0.04% validate : 0.000033s : 0.04% task_emit : 0.076304s : 89.06% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000127 63 4.23% : 0.000005s : 2: substitution.depend_value_elim 4.29% : 0.000005s : 5: substitution.elim_not_effective 2.31% : 0.000003s : 5: substitution.fold_const_symbol 4.85% : 0.000006s : 6: substitution.graph_param_transform 49.56% : 0.000063s : 1: substitution.inline 4.14% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.14% : 0.000004s : 6: substitution.load_eliminater 2.25% : 0.000003s : 2: substitution.reduce_all_const_elim 5.89% : 0.000007s : 10: substitution.remove_not_recompute_node 2.08% : 0.000003s : 2: substitution.replace_old_param 8.08% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 9.19% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.004732 2 93.50% : 0.004424s : 1: type_inference.infer 6.50% : 0.000307s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000062 1 100.00% : 0.000062s : 1: match.inline ------[predicate.] 0.000232 1420 0.88% : 0.000002s : 13: predicate.accumulaten_eliminater 1.04% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.30% : 0.000005s : 25: predicate.arithmetic_simplify 0.93% : 0.000002s : 13: predicate.cast_eliminate 0.99% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.19% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 1.00% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.23% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000005s : 31: predicate.environ_get_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.22% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.19% : 0.000000s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.86% : 0.000014s : 63: predicate.inline 1.17% : 0.000003s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000002s : 12: predicate.less_batch_normalization 1.72% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000006s : 38: predicate.load_eliminater 1.24% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.36% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.74% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.82% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.79% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.32% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.19% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.77% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.56% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.49% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.14% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.08% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.13% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.76% : 0.000002s : 13: predicate.transpose_eliminate 1.70% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.73% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.79% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.56% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.66% : 0.000002s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000201 4 5.57% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.43% : 0.000190s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.100545 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000069s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.15% : 0.000153s : 1: auto_monad 0.03% : 0.000032s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.42% : 0.000422s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000011s : 1: convert_after_rewriter 0.03% : 0.000034s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000042s : 1: distribtued_split 0.59% : 0.000591s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000007s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.56% : 0.000559s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.15% : 0.001155s : 80: opt.transform.opt_a 0.05% : 0.000052s : 1: opt.transform.opt_after_cconv 0.16% : 0.000157s : 27: opt.transform.opt_b 0.05% : 0.000053s : 1: opt.transform.opt_trans_graph 0.03% : 0.000034s : 3: opt.transform.special_op_eliminate 0.06% : 0.000058s : 4: opt.transform.symbol_engine_opt 6.25% : 0.006279s : 1: opt_a 0.14% : 0.000137s : 1: opt_after_cconv 0.25% : 0.000252s : 1: opt_b 8.01% : 0.008050s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000006s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000023s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.01% : 0.000014s : 1: remove_dup_value 0.25% : 0.000251s : 1: renormalize.infer 0.22% : 0.000217s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.13% : 0.000135s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000013s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000103s : 1: symbol_engine_optimizer 75.91% : 0.076328s : 1: task_emit 0.07% : 0.000072s : 1: tuple_transform 4.75% : 0.004775s : 1: type_inference 0.06% : 0.000065s : 1: validate . TotalTime = 0.094017, [21] [bootstrap]: 0.00043298 [type_inference]: 0.00501199 [auto_monad]: 0.0001862 [graph_reusing]: 2.59001e-06 [inline]: 1.36998e-06 [parallel-infer-symbol]: 2.37022e-06 [pre_auto_parallel]: 3.19299e-05 [insert-virtual-dataset]: 3.59025e-06 [parallel-infer-symbol-second]: 4.30271e-07 [dataset_repeat_opt]: 1.48034e-06 [pipeline_split]: 1.62981e-06 [optimize]: 0.00841313, [52] [py_interpret_to_execute]: 2.24202e-05 [rewriter_before_opt_a]: 3.99197e-05 [opt_a]: 0.00652769, [2] [Cycle 1]: 0.00171239, [43] [expand_dump_flag]: 3.79002e-06 [switch_simplify]: 3.09399e-05 [loop_unroll]: 1.32001e-05 [a_1]: 0.00038852 [recompute_prepare]: 9.51998e-06 [updatestate_depend_eliminate]: 8.95001e-06 [updatestate_assign_eliminate]: 7.82032e-06 [updatestate_loads_eliminate]: 8.29995e-06 [parameter_eliminate]: 3.36999e-06 [a_2]: 0.00012922 [accelerated_algorithm]: 8.92021e-06 [shard]: 2.40002e-06 [meta_shard_fg_expand]: 3.82029e-06 [shard_inline]: 9.83989e-06 [auto_parallel]: 1.36602e-05 [parallel]: 9.34023e-06 [flash_sp]: 1.25999e-05 [merge_comm]: 9.06037e-06 [allreduce_fusion]: 5.56977e-06 [matmul_add_comm_reduction]: 1.23298e-05 [allreduce_slice_to_reducescatter]: 4.69852e-07 [virtual_shard_identity]: 1.04099e-05 [virtual_dataset]: 9.58005e-06 [get_grad_eliminate_]: 7.79983e-06 [virtual_output]: 9.10973e-06 [merge_forward]: 6.00005e-06 [cell_reuse_recompute_pass]: 1.81003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.889e-05 [before_grad]: 1.74702e-05 [inplace_validation]: 5.34998e-06 [meta_fg_expand]: 6.25988e-06 [inplace_validation_after_expand]: 7.4096e-06 [flash_sp_send_recv_attached]: 3.36021e-06 [receive_attached]: 5.22006e-06 [after_resolve]: 1.54302e-05 [a_after_grad]: 1.64304e-05 [special_op_eliminate]: 9.29972e-06 [renormalize]: 0.00049479 [add_forward_monad_depend]: 3.98001e-06 [auto_monad_grad]: 1.91992e-06 [auto_monad_eliminator]: 3.36701e-05 [cse]: 3.56999e-05 [a_3]: 5.87604e-05 [Cycle 2]: 0.00080732, [43] [expand_dump_flag]: 1.13016e-06 [switch_simplify]: 8.94023e-06 [loop_unroll]: 7.83987e-06 [a_1]: 0.00020745 [recompute_prepare]: 7.62008e-06 [updatestate_depend_eliminate]: 6.07967e-06 [updatestate_assign_eliminate]: 5.20004e-06 [updatestate_loads_eliminate]: 5.78025e-06 [parameter_eliminate]: 1.55019e-06 [a_2]: 0.00010594 [accelerated_algorithm]: 8.53045e-06 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 2.52994e-06 [shard_inline]: 8.17003e-06 [auto_parallel]: 1.097e-05 [parallel]: 3.69968e-06 [flash_sp]: 3.57023e-06 [merge_comm]: 5.82961e-06 [allreduce_fusion]: 4.89969e-06 [matmul_add_comm_reduction]: 1.02501e-05 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 9.16002e-06 [virtual_dataset]: 7.73976e-06 [get_grad_eliminate_]: 7.37002e-06 [virtual_output]: 7.11996e-06 [merge_forward]: 4.92996e-06 [cell_reuse_recompute_pass]: 1.83005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52602e-05 [before_grad]: 1.31899e-05 [inplace_validation]: 4.33018e-06 [meta_fg_expand]: 4.74975e-06 [inplace_validation_after_expand]: 5.43008e-06 [flash_sp_send_recv_attached]: 9.89996e-07 [receive_attached]: 8.40053e-07 [after_resolve]: 1.13603e-05 [a_after_grad]: 1.19903e-05 [special_op_eliminate]: 9.35979e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.20001e-06 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 1.99797e-05 [cse]: 2.17902e-05 [a_3]: 4.92199e-05 [py_interpret_to_execute_after_opt_a]: 9.45991e-06 [slice_cell_reuse_recomputed_activation]: 2.66964e-06 [rewriter_after_opt_a]: 0.00013558 [convert_after_rewriter]: 9.58005e-06 [order_py_execute_after_rewriter]: 5.76023e-06 [opt_b]: 0.00024854, [1] [Cycle 1]: 0.00024271, [7] [b_1]: 0.00016701 [b_2]: 9.87994e-06 [updatestate_depend_eliminate]: 5.58002e-06 [updatestate_assign_eliminate]: 4.40981e-06 [updatestate_loads_eliminate]: 5.25964e-06 [renormalize]: 3.1013e-07 [cse]: 1.851e-05 [optimize_parallel_all_gather_comm]: 8.94023e-06 [overlap_param_gather]: 1.79e-06 [cconv]: 2.49199e-05 [loop_unroll]: 0.0005524 [opt_after_cconv]: 0.00013779, [1] [Cycle 1]: 0.00013162, [7] [c_1]: 5.503e-05 [parameter_eliminate]: 2.38977e-06 [updatestate_depend_eliminate]: 8.52998e-06 [updatestate_assign_eliminate]: 4.71994e-06 [updatestate_loads_eliminate]: 6.01029e-06 [cse]: 2.17101e-05 [renormalize]: 3.39933e-07 [remove_dup_value]: 1.354e-05 [tuple_transform]: 7.02604e-05, [1] [Cycle 1]: 6.539e-05, [2] [d_1]: 5.63902e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 2.50014e-06 [add_cache_embedding]: 1.548e-05 [add_recomputation]: 7.69799e-05 [cse_after_recomputation]: 3.494e-05, [1] [Cycle 1]: 2.63401e-05, [1] [cse]: 2.07401e-05 [environ_conv]: 7.53999e-06 [swap_dp_allreduce_reducescatter]: 8.74e-06 [bias_add_comm_swap]: 2.59001e-06 [label_micro_interleaved_index]: 2.26963e-06 [label_fine_grained_interleaved_index]: 2.52016e-06 [merge_cast_opt]: 1.44029e-06 [slice_recompute_activation]: 2.06009e-06 [micro_interleaved_order_control]: 2.10013e-06 [assign_add_opt]: 8.44011e-06 [ForceFp32Comm]: 1.22981e-06 [remove_cast_before_assign_add]: 1.09989e-06 [full_micro_interleaved_order_control]: 2.04006e-06 [reorder_send_recv_between_fp_bp]: 2.21003e-06 [comm_op_add_attrs]: 1.03982e-06 [add_comm_op_reuse_tag]: 1.15018e-06 [interleave_split_concat_branches]: 7.79983e-07 [interleave_parallel_branches]: 8.801e-07 [overlap_opt_shard_in_pipeline]: 2.86009e-06 [overlap_opt_shard_grad_in_pipeline]: 2.31992e-06 [control_data_broadcast_order]: 1.09989e-06 [grouped_pairwise_exchange_alltoall]: 1.36998e-06 [offloading_packed_experts]: 1.42958e-06 [overlap_recompute_and_grad_model_parallel]: 2.33995e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.0198e-06 [overlap_recompute_allgather_and_fa_grad]: 1.17021e-06 [overlap_grad_ring_attention]: 1.62981e-06 [overlap_grad_flash_sp]: 1.651e-05 [begin_end_overlap_inline]: 7.79983e-07 [split_matmul_comm_elemetwise]: 2.03028e-06 [split_layernorm_comm]: 1.95997e-06 [handle_group_info]: 9.89996e-07 [symbol_engine_optimizer]: 0.00010014, [1] [Cycle 1]: 9.39597e-05, [6] [build]: 4.40003e-06 [elim_shapecalc]: 1.388e-05 [elim_not_effective]: 1.96001e-05 [opt_reshape]: 9.26014e-06 [fold_const_symbol]: 1.65896e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 2.04984e-06 [auto_monad_reorder]: 3.178e-05 [get_jit_bprop_graph]: 4.69852e-07 [rewriter_after_jit_bprop_graph]: 4.29805e-07 [eliminate_special_op_node]: 0.00054334 [distribtued_split]: 4.28399e-05 [validate]: 4.31202e-05 [task_emit]: 0.0789637 [execute]: 1.27102e-05 Sums bootstrap : 0.000433s : 0.49% type_inference : 0.005012s : 5.64% auto_monad : 0.000186s : 0.21% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000032s : 0.04% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000022s : 0.03% optimize.rewriter_before_opt_a : 0.000040s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.02% optimize.opt_a.a_1 : 0.000596s : 0.67% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000235s : 0.26% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000023s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000031s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.01% optimize.opt_a.after_resolve : 0.000027s : 0.03% optimize.opt_a.a_after_grad : 0.000028s : 0.03% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000495s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000054s : 0.06% optimize.opt_a.cse : 0.000057s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.12% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000136s : 0.15% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000167s : 0.19% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000552s : 0.62% optimize.opt_after_cconv.c_1 : 0.000055s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.06% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000077s : 0.09% optimize.cse_after_recomputation.cse : 0.000021s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000003s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000543s : 0.61% distribtued_split : 0.000043s : 0.05% validate : 0.000043s : 0.05% task_emit : 0.078964s : 88.85% execute : 0.000013s : 0.01% Time group info: ------[substitution.] 0.000148 63 5.02% : 0.000007s : 2: substitution.depend_value_elim 2.62% : 0.000004s : 5: substitution.elim_not_effective 1.87% : 0.000003s : 5: substitution.fold_const_symbol 5.02% : 0.000007s : 6: substitution.graph_param_transform 52.14% : 0.000077s : 1: substitution.inline 3.70% : 0.000005s : 10: substitution.j_node_and_user_rematch 2.84% : 0.000004s : 6: substitution.load_eliminater 2.34% : 0.000003s : 2: substitution.reduce_all_const_elim 6.09% : 0.000009s : 10: substitution.remove_not_recompute_node 2.41% : 0.000004s : 2: substitution.replace_old_param 8.54% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 7.41% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.004979 2 92.91% : 0.004626s : 1: type_inference.infer 7.09% : 0.000353s : 1: type_inference.specialize ------[replace.] 0.000014 1 100.00% : 0.000014s : 1: replace.inline ------[match.] 0.000076 1 100.00% : 0.000076s : 1: match.inline ------[predicate.] 0.000233 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.20% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.15% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.74% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.26% : 0.000001s : 6: predicate.const_output_eliminate 0.39% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.38% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.78% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.51% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.37% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.83% : 0.000014s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000006s : 38: predicate.load_eliminater 1.38% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.88% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.75% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000003s : 13: predicate.reduce_eliminate 0.62% : 0.000001s : 12: predicate.remove_not_recompute_node 1.21% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.85% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.42% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.94% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.32% : 0.000010s : 43: predicate.switch_simplify 0.87% : 0.000002s : 13: predicate.tile_eliminate 0.74% : 0.000002s : 13: predicate.transpose_eliminate 1.76% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.49% : 0.000003s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 4.22% : 0.000010s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.58% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000211 4 8.48% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.52% : 0.000193s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.104386 192 0.01% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000082s : 1: add_recomputation 0.01% : 0.000013s : 1: assign_add_opt 0.19% : 0.000199s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.46% : 0.000475s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.00% : 0.000005s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.04% : 0.000038s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000051s : 1: distribtued_split 0.54% : 0.000559s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000008s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000005s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.54% : 0.000563s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000019s : 1: opt.transform.loop_unroll_optimizer 1.14% : 0.001186s : 80: opt.transform.opt_a 0.05% : 0.000054s : 1: opt.transform.opt_after_cconv 0.15% : 0.000157s : 27: opt.transform.opt_b 0.05% : 0.000055s : 1: opt.transform.opt_trans_graph 0.03% : 0.000035s : 3: opt.transform.special_op_eliminate 0.05% : 0.000055s : 4: opt.transform.symbol_engine_opt 6.26% : 0.006532s : 1: opt_a 0.14% : 0.000142s : 1: opt_after_cconv 0.24% : 0.000252s : 1: opt_b 8.07% : 0.008422s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000006s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.02% : 0.000016s : 1: parallel-infer-symbol 0.01% : 0.000007s : 1: parallel-infer-symbol-second 0.02% : 0.000023s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000038s : 1: pre_auto_parallel 0.03% : 0.000027s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.25% : 0.000265s : 1: renormalize.infer 0.21% : 0.000222s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000142s : 1: rewriter_after_opt_a 0.04% : 0.000044s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000006s : 1: slice_recompute_activation 0.01% : 0.000007s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000104s : 1: symbol_engine_optimizer 75.67% : 0.078992s : 1: task_emit 0.07% : 0.000073s : 1: tuple_transform 4.82% : 0.005032s : 1: type_inference 0.08% : 0.000083s : 1: validate TotalTime = 0.104493, [21] [bootstrap]: 0.00045499 [type_inference]: 0.00602573 [auto_monad]: 0.00019845 [graph_reusing]: 2.12016e-06 [inline]: 1.44029e-06 [parallel-infer-symbol]: 2.42982e-06 [pre_auto_parallel]: 3.06903e-05 [insert-virtual-dataset]: 2.50991e-06 [parallel-infer-symbol-second]: 4.60073e-07 [dataset_repeat_opt]: 1.29966e-06 [pipeline_split]: 1.55997e-06 [optimize]: 0.00892531, [52] [py_interpret_to_execute]: 2.064e-05 [rewriter_before_opt_a]: 5.58002e-05 [opt_a]: 0.00687499, [2] [Cycle 1]: 0.00176989, [43] [expand_dump_flag]: 3.85987e-06 [switch_simplify]: 3.06601e-05 [loop_unroll]: 1.38297e-05 [a_1]: 0.00040165 [recompute_prepare]: 8.78982e-06 [updatestate_depend_eliminate]: 9.26014e-06 [updatestate_assign_eliminate]: 2.742e-05 [updatestate_loads_eliminate]: 7.6401e-06 [parameter_eliminate]: 3.33972e-06 [a_2]: 0.00012477 [accelerated_algorithm]: 8.76002e-06 [shard]: 2.65986e-06 [meta_shard_fg_expand]: 3.65963e-06 [shard_inline]: 1.049e-05 [auto_parallel]: 1.27302e-05 [parallel]: 7.62986e-06 [flash_sp]: 1.24997e-05 [merge_comm]: 9.30019e-06 [allreduce_fusion]: 7.03987e-06 [matmul_add_comm_reduction]: 1.20001e-05 [allreduce_slice_to_reducescatter]: 6.50063e-07 [virtual_shard_identity]: 9.30019e-06 [virtual_dataset]: 9.15024e-06 [get_grad_eliminate_]: 8.90996e-06 [virtual_output]: 7.98982e-06 [merge_forward]: 8.42987e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.851e-05 [before_grad]: 1.49002e-05 [inplace_validation]: 5.9302e-06 [meta_fg_expand]: 5.77001e-06 [inplace_validation_after_expand]: 7.92975e-06 [flash_sp_send_recv_attached]: 2.92994e-06 [receive_attached]: 4.80004e-06 [after_resolve]: 1.26399e-05 [a_after_grad]: 1.47498e-05 [special_op_eliminate]: 9.71043e-06 [renormalize]: 0.00053506 [add_forward_monad_depend]: 3.78024e-06 [auto_monad_grad]: 2.02004e-06 [auto_monad_eliminator]: 3.66e-05 [cse]: 3.536e-05 [a_3]: 5.99702e-05 [Cycle 2]: 0.00081974, [43] [expand_dump_flag]: 1.01002e-06 [switch_simplify]: 9.34023e-06 [loop_unroll]: 7.94977e-06 [a_1]: 0.00020445 [recompute_prepare]: 7.96001e-06 [updatestate_depend_eliminate]: 5.96e-06 [updatestate_assign_eliminate]: 5.18002e-06 [updatestate_loads_eliminate]: 5.15999e-06 [parameter_eliminate]: 1.09989e-06 [a_2]: 0.00010662 [accelerated_algorithm]: 8.40984e-06 [shard]: 1.32015e-06 [meta_shard_fg_expand]: 2.6701e-06 [shard_inline]: 7.95024e-06 [auto_parallel]: 1.12099e-05 [parallel]: 3.23961e-06 [flash_sp]: 3.01003e-06 [merge_comm]: 5.89993e-06 [allreduce_fusion]: 5.41005e-06 [matmul_add_comm_reduction]: 8.02008e-06 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 8.59005e-06 [virtual_dataset]: 1.42702e-05 [get_grad_eliminate_]: 8.78004e-06 [virtual_output]: 7.60006e-06 [merge_forward]: 7.16001e-06 [cell_reuse_recompute_pass]: 1.93994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.83103e-05 [before_grad]: 1.28201e-05 [inplace_validation]: 4.63007e-06 [meta_fg_expand]: 4.88991e-06 [inplace_validation_after_expand]: 6.8699e-06 [flash_sp_send_recv_attached]: 9.4017e-07 [receive_attached]: 6.80331e-07 [after_resolve]: 9.67039e-06 [a_after_grad]: 1.28201e-05 [special_op_eliminate]: 7.74022e-06 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 9.80217e-07 [auto_monad_grad]: 1.15996e-06 [auto_monad_eliminator]: 2.085e-05 [cse]: 2.57501e-05 [a_3]: 5.09899e-05 [py_interpret_to_execute_after_opt_a]: 1.06003e-05 [slice_cell_reuse_recomputed_activation]: 2.38977e-06 [rewriter_after_opt_a]: 0.0001509 [convert_after_rewriter]: 1.08201e-05 [order_py_execute_after_rewriter]: 6.88015e-06 [opt_b]: 0.00028046, [1] [Cycle 1]: 0.00027318, [7] [b_1]: 0.00018914 [b_2]: 1.228e-05 [updatestate_depend_eliminate]: 5.70994e-06 [updatestate_assign_eliminate]: 6.48014e-06 [updatestate_loads_eliminate]: 6.71018e-06 [renormalize]: 3.40398e-07 [cse]: 2.009e-05 [optimize_parallel_all_gather_comm]: 2.90498e-05 [overlap_param_gather]: 1.61026e-06 [cconv]: 2.71997e-05 [loop_unroll]: 0.00056809 [opt_after_cconv]: 0.00013639, [1] [Cycle 1]: 0.00013063, [7] [c_1]: 5.423e-05 [parameter_eliminate]: 2.33017e-06 [updatestate_depend_eliminate]: 8.34977e-06 [updatestate_assign_eliminate]: 4.63985e-06 [updatestate_loads_eliminate]: 5.66989e-06 [cse]: 2.28798e-05 [renormalize]: 5.59725e-07 [remove_dup_value]: 1.42199e-05 [tuple_transform]: 7.61901e-05, [1] [Cycle 1]: 7.10003e-05, [2] [d_1]: 5.89602e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 2.08011e-06 [add_cache_embedding]: 1.489e-05 [add_recomputation]: 9.502e-05 [cse_after_recomputation]: 3.37497e-05, [1] [Cycle 1]: 2.83197e-05, [1] [cse]: 2.285e-05 [environ_conv]: 6.76e-06 [swap_dp_allreduce_reducescatter]: 9.07015e-06 [bias_add_comm_swap]: 2.47965e-06 [label_micro_interleaved_index]: 2.33995e-06 [label_fine_grained_interleaved_index]: 2.29012e-06 [merge_cast_opt]: 1.09989e-06 [slice_recompute_activation]: 1.60979e-06 [micro_interleaved_order_control]: 1.84961e-06 [assign_add_opt]: 8.88994e-06 [ForceFp32Comm]: 8.40053e-07 [remove_cast_before_assign_add]: 1.26008e-06 [full_micro_interleaved_order_control]: 2.04006e-06 [reorder_send_recv_between_fp_bp]: 2.35019e-06 [comm_op_add_attrs]: 1.03004e-06 [add_comm_op_reuse_tag]: 1.11992e-06 [interleave_split_concat_branches]: 8.60076e-07 [interleave_parallel_branches]: 9.19681e-07 [overlap_opt_shard_in_pipeline]: 1.90102e-05 [overlap_opt_shard_grad_in_pipeline]: 2.21003e-06 [control_data_broadcast_order]: 1.43982e-06 [grouped_pairwise_exchange_alltoall]: 1.47987e-06 [offloading_packed_experts]: 1.24006e-06 [overlap_recompute_and_grad_model_parallel]: 1.91014e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.39936e-07 [overlap_recompute_allgather_and_fa_grad]: 1.09011e-06 [overlap_grad_ring_attention]: 2.1602e-06 [overlap_grad_flash_sp]: 1.468e-05 [begin_end_overlap_inline]: 1.05985e-06 [split_matmul_comm_elemetwise]: 2.09967e-06 [split_layernorm_comm]: 1.68988e-06 [handle_group_info]: 9.39704e-07 [symbol_engine_optimizer]: 0.00010248, [1] [Cycle 1]: 9.74499e-05, [6] [build]: 4.50993e-06 [elim_shapecalc]: 1.546e-05 [elim_not_effective]: 2.17003e-05 [opt_reshape]: 1.08299e-05 [fold_const_symbol]: 1.70898e-05 [renormalize]: 2.99886e-07 [pipeline_parallel_scheduler]: 1.91992e-06 [auto_monad_reorder]: 3.38499e-05 [get_jit_bprop_graph]: 6.70087e-07 [rewriter_after_jit_bprop_graph]: 6.80331e-07 [eliminate_special_op_node]: 0.00051635 [distribtued_split]: 4.18397e-05 [validate]: 3.795e-05 [task_emit]: 0.0879396 [execute]: 1.049e-05 Sums bootstrap : 0.000455s : 0.46% type_inference : 0.006026s : 6.08% auto_monad : 0.000198s : 0.20% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000031s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000021s : 0.02% optimize.rewriter_before_opt_a : 0.000056s : 0.06% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000040s : 0.04% optimize.opt_a.loop_unroll : 0.000022s : 0.02% optimize.opt_a.a_1 : 0.000606s : 0.61% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000033s : 0.03% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000231s : 0.23% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.02% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000023s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000016s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000037s : 0.04% optimize.opt_a.before_grad : 0.000028s : 0.03% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000015s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.01% optimize.opt_a.after_resolve : 0.000022s : 0.02% optimize.opt_a.a_after_grad : 0.000028s : 0.03% optimize.opt_a.special_op_eliminate : 0.000017s : 0.02% optimize.opt_a.renormalize : 0.000535s : 0.54% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000057s : 0.06% optimize.opt_a.cse : 0.000061s : 0.06% optimize.opt_a.a_3 : 0.000111s : 0.11% optimize.py_interpret_to_execute_after_opt_a : 0.000011s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000151s : 0.15% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000189s : 0.19% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000007s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000029s : 0.03% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000027s : 0.03% optimize.loop_unroll : 0.000568s : 0.57% optimize.opt_after_cconv.c_1 : 0.000054s : 0.05% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.02% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000014s : 0.01% optimize.tuple_transform.d_1 : 0.000059s : 0.06% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000095s : 0.10% optimize.cse_after_recomputation.cse : 0.000023s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000009s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000019s : 0.02% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.01% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000015s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000022s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000034s : 0.03% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000516s : 0.52% distribtued_split : 0.000042s : 0.04% validate : 0.000038s : 0.04% task_emit : 0.087940s : 88.76% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000152 63 4.76% : 0.000007s : 2: substitution.depend_value_elim 2.53% : 0.000004s : 5: substitution.elim_not_effective 2.34% : 0.000004s : 5: substitution.fold_const_symbol 5.19% : 0.000008s : 6: substitution.graph_param_transform 51.21% : 0.000078s : 1: substitution.inline 3.94% : 0.000006s : 10: substitution.j_node_and_user_rematch 2.77% : 0.000004s : 6: substitution.load_eliminater 2.41% : 0.000004s : 2: substitution.reduce_all_const_elim 6.30% : 0.000010s : 10: substitution.remove_not_recompute_node 2.24% : 0.000003s : 2: substitution.replace_old_param 8.05% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.25% : 0.000013s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.005995 2 92.87% : 0.005568s : 1: type_inference.infer 7.13% : 0.000427s : 1: type_inference.specialize ------[replace.] 0.000014 1 100.00% : 0.000014s : 1: replace.inline ------[match.] 0.000076 1 100.00% : 0.000076s : 1: match.inline ------[predicate.] 0.000276 1420 0.73% : 0.000002s : 13: predicate.accumulaten_eliminater 0.99% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.59% : 0.000002s : 12: predicate.addn_check_dump 0.63% : 0.000002s : 13: predicate.addn_zero_filter 0.61% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.75% : 0.000005s : 25: predicate.arithmetic_simplify 0.70% : 0.000002s : 13: predicate.cast_eliminate 0.71% : 0.000002s : 12: predicate.check_bprop_eliminate 0.60% : 0.000002s : 12: predicate.compare_switch_simplify 0.18% : 0.000001s : 6: predicate.const_output_eliminate 0.37% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.18% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.72% : 0.000002s : 12: predicate.depend_value_elim 0.69% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.82% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.71% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.25% : 0.000001s : 6: predicate.elim_not_effective 0.47% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 0.98% : 0.000003s : 19: predicate.environ_add_const_eliminate 0.88% : 0.000002s : 19: predicate.environ_get_add_eliminate 0.93% : 0.000003s : 19: predicate.environ_get_depend_swap 1.59% : 0.000004s : 31: predicate.environ_get_eliminate 1.01% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.70% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.13% : 0.000003s : 14: predicate.float_depend_g_call 0.57% : 0.000002s : 12: predicate.float_environ_get_switch 0.93% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000001s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.63% : 0.000002s : 12: predicate.incorporate_call 0.56% : 0.000002s : 12: predicate.incorporate_call_switch 4.77% : 0.000013s : 63: predicate.inline 0.86% : 0.000002s : 12: predicate.inline_without_move 0.36% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.85% : 0.000002s : 12: predicate.less_batch_normalization 1.37% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.13% : 0.000006s : 38: predicate.load_eliminater 1.01% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.12% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.60% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.62% : 0.000002s : 12: predicate.merge_addn 0.61% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.67% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.61% : 0.000002s : 13: predicate.minmaximum_grad 0.61% : 0.000002s : 6: predicate.mutable_eliminate 0.42% : 0.000001s : 6: predicate.opt_reshape 8.62% : 0.000024s : 6: predicate.parallel_virtual_node 1.01% : 0.000003s : 14: predicate.partial_defer_inline 1.00% : 0.000003s : 19: predicate.partial_eliminate 0.74% : 0.000002s : 13: predicate.print_const_string_wrapper 0.67% : 0.000002s : 12: predicate.reduce_all_const_elim 0.86% : 0.000002s : 13: predicate.reduce_eliminate 0.50% : 0.000001s : 12: predicate.remove_not_recompute_node 0.97% : 0.000003s : 25: predicate.replace_applicator 0.38% : 0.000001s : 12: predicate.replace_old_param 0.19% : 0.000001s : 6: predicate.reset_defer_inline 0.73% : 0.000002s : 13: predicate.reshape_eliminate 0.67% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.44% : 0.000001s : 6: predicate.row_tensor_eliminate 0.84% : 0.000002s : 12: predicate.same_eliminate 0.44% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.76% : 0.000002s : 12: predicate.shard_identity_eliminate 1.25% : 0.000003s : 18: predicate.special_op_eliminate 0.80% : 0.000002s : 12: predicate.specialize_transform 0.95% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.80% : 0.000002s : 12: predicate.stack_unstack_eliminate 1.95% : 0.000005s : 38: predicate.stopgrad_eliminater 0.36% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.79% : 0.000002s : 14: predicate.switch_defer_inline 1.35% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.61% : 0.000010s : 43: predicate.switch_simplify 0.67% : 0.000002s : 13: predicate.tile_eliminate 0.65% : 0.000002s : 13: predicate.transpose_eliminate 1.42% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.36% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.21% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.25% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.25% : 0.000003s : 25: predicate.tuple_list_get_set_item_eliminator 2.17% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 9.55% : 0.000026s : 25: predicate.tuple_to_list_eliminator_ 2.07% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 2.81% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.45% : 0.000001s : 6: predicate.value_based_eliminate 0.72% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.71% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000232 4 7.43% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.57% : 0.000214s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.115447 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.09% : 0.000101s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.18% : 0.000211s : 1: auto_monad 0.04% : 0.000041s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000007s : 1: bias_add_comm_swap 0.43% : 0.000491s : 1: bootstrap 0.03% : 0.000031s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.01% : 0.000006s : 1: control_data_broadcast_order 0.01% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000037s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000050s : 1: distribtued_split 0.46% : 0.000530s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000007s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000628s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.01% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.03% : 0.001192s : 80: opt.transform.opt_a 0.05% : 0.000053s : 1: opt.transform.opt_after_cconv 0.16% : 0.000181s : 27: opt.transform.opt_b 0.05% : 0.000056s : 1: opt.transform.opt_trans_graph 0.03% : 0.000036s : 3: opt.transform.special_op_eliminate 0.05% : 0.000058s : 4: opt.transform.symbol_engine_opt 5.96% : 0.006879s : 1: opt_a 0.12% : 0.000140s : 1: opt_after_cconv 0.25% : 0.000284s : 1: opt_b 7.74% : 0.008934s : 1: optimize 0.03% : 0.000033s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.02% : 0.000023s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000007s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000009s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000037s : 1: pre_auto_parallel 0.02% : 0.000025s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.26% : 0.000299s : 1: renormalize.infer 0.20% : 0.000229s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000156s : 1: rewriter_after_opt_a 0.05% : 0.000060s : 1: rewriter_before_opt_a 0.01% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000006s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000107s : 1: symbol_engine_optimizer 76.20% : 0.087967s : 1: task_emit 0.07% : 0.000080s : 1: tuple_transform 5.24% : 0.006046s : 1: type_inference 0.07% : 0.000076s : 1: validate ... TotalTime = 0.10817, [21] [bootstrap]: 0.00046624 [type_inference]: 0.00567258 [auto_monad]: 0.00019145 [graph_reusing]: 2.1304e-06 [inline]: 1.73971e-06 [parallel-infer-symbol]: 1.91992e-06 [pre_auto_parallel]: 3.14801e-05 [insert-virtual-dataset]: 2.54018e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 7.79983e-07 [pipeline_split]: 1.66008e-06 [optimize]: 0.009207, [52] [py_interpret_to_execute]: 2.44998e-05 [rewriter_before_opt_a]: 3.86499e-05 [opt_a]: 0.00717872, [2] [Cycle 1]: 0.00175065, [43] [expand_dump_flag]: 2.99001e-06 [switch_simplify]: 2.93199e-05 [loop_unroll]: 1.32802e-05 [a_1]: 0.0003922 [recompute_prepare]: 9.24012e-06 [updatestate_depend_eliminate]: 1.089e-05 [updatestate_assign_eliminate]: 7.66013e-06 [updatestate_loads_eliminate]: 8.31019e-06 [parameter_eliminate]: 3.89013e-06 [a_2]: 0.00013539 [accelerated_algorithm]: 1.06501e-05 [shard]: 2.03028e-06 [meta_shard_fg_expand]: 3.48967e-06 [shard_inline]: 1.04699e-05 [auto_parallel]: 1.27e-05 [parallel]: 8.29995e-06 [flash_sp]: 1.06497e-05 [merge_comm]: 8.32975e-06 [allreduce_fusion]: 6.64033e-06 [matmul_add_comm_reduction]: 1.08602e-05 [allreduce_slice_to_reducescatter]: 4.70318e-07 [virtual_shard_identity]: 1.07801e-05 [virtual_dataset]: 1.11898e-05 [get_grad_eliminate_]: 8.3698e-06 [virtual_output]: 9.47993e-06 [merge_forward]: 7.11018e-06 [cell_reuse_recompute_pass]: 1.95997e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.11201e-05 [before_grad]: 1.54902e-05 [inplace_validation]: 6.67013e-06 [meta_fg_expand]: 5.54975e-06 [inplace_validation_after_expand]: 6.52019e-06 [flash_sp_send_recv_attached]: 3.28012e-06 [receive_attached]: 4.72972e-06 [after_resolve]: 1.588e-05 [a_after_grad]: 1.57901e-05 [special_op_eliminate]: 8.67993e-06 [renormalize]: 0.00052789 [add_forward_monad_depend]: 3.83984e-06 [auto_monad_grad]: 1.89012e-06 [auto_monad_eliminator]: 3.039e-05 [cse]: 3.064e-05 [a_3]: 6.05099e-05 [Cycle 2]: 0.00082786, [43] [expand_dump_flag]: 1.15018e-06 [switch_simplify]: 9.47993e-06 [loop_unroll]: 7.79983e-06 [a_1]: 0.00020919 [recompute_prepare]: 7.24988e-06 [updatestate_depend_eliminate]: 5.9302e-06 [updatestate_assign_eliminate]: 5.11995e-06 [updatestate_loads_eliminate]: 5.39003e-06 [parameter_eliminate]: 1.47009e-06 [a_2]: 0.00010675 [accelerated_algorithm]: 8.44011e-06 [shard]: 1.21025e-06 [meta_shard_fg_expand]: 2.6999e-06 [shard_inline]: 7.83009e-06 [auto_parallel]: 1.12699e-05 [parallel]: 3.49991e-06 [flash_sp]: 3.36999e-06 [merge_comm]: 5.86035e-06 [allreduce_fusion]: 4.52017e-06 [matmul_add_comm_reduction]: 1.00303e-05 [allreduce_slice_to_reducescatter]: 3.09665e-07 [virtual_shard_identity]: 9.47993e-06 [virtual_dataset]: 7.89994e-06 [get_grad_eliminate_]: 7.43009e-06 [virtual_output]: 7.45011e-06 [merge_forward]: 4.62029e-06 [cell_reuse_recompute_pass]: 2.10991e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.54297e-05 [before_grad]: 1.26204e-05 [inplace_validation]: 4.20026e-06 [meta_fg_expand]: 4.90993e-06 [inplace_validation_after_expand]: 5.91017e-06 [flash_sp_send_recv_attached]: 9.4017e-07 [receive_attached]: 8.49832e-07 [after_resolve]: 1.196e-05 [a_after_grad]: 1.39801e-05 [special_op_eliminate]: 1.02799e-05 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 9.00123e-07 [auto_monad_grad]: 1.08965e-06 [auto_monad_eliminator]: 2.106e-05 [cse]: 2.485e-05 [a_3]: 4.97997e-05 [py_interpret_to_execute_after_opt_a]: 9.81009e-06 [slice_cell_reuse_recomputed_activation]: 2.61981e-06 [rewriter_after_opt_a]: 0.00017673 [convert_after_rewriter]: 9.99961e-06 [order_py_execute_after_rewriter]: 5.8501e-06 [opt_b]: 0.00026991, [1] [Cycle 1]: 0.00026361, [7] [b_1]: 0.00017285 [b_2]: 1.45701e-05 [updatestate_depend_eliminate]: 5.39981e-06 [updatestate_assign_eliminate]: 4.97e-06 [updatestate_loads_eliminate]: 7.37002e-06 [renormalize]: 2.70084e-07 [cse]: 2.47699e-05 [optimize_parallel_all_gather_comm]: 1.10897e-05 [overlap_param_gather]: 1.83983e-06 [cconv]: 2.29701e-05 [loop_unroll]: 0.00060351 [opt_after_cconv]: 0.00014048, [1] [Cycle 1]: 0.00013411, [7] [c_1]: 5.642e-05 [parameter_eliminate]: 2.6999e-06 [updatestate_depend_eliminate]: 8.44011e-06 [updatestate_assign_eliminate]: 4.71994e-06 [updatestate_loads_eliminate]: 5.68014e-06 [cse]: 2.266e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.42902e-05 [tuple_transform]: 7.91e-05, [1] [Cycle 1]: 7.40397e-05, [2] [d_1]: 6.37998e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 2.28966e-06 [add_cache_embedding]: 1.69799e-05 [add_recomputation]: 7.925e-05 [cse_after_recomputation]: 3.34e-05, [1] [Cycle 1]: 2.68701e-05, [1] [cse]: 2.10898e-05 [environ_conv]: 9.28016e-06 [swap_dp_allreduce_reducescatter]: 1.08099e-05 [bias_add_comm_swap]: 2.02004e-06 [label_micro_interleaved_index]: 1.91014e-06 [label_fine_grained_interleaved_index]: 1.94972e-06 [merge_cast_opt]: 1.43005e-06 [slice_recompute_activation]: 1.71969e-06 [micro_interleaved_order_control]: 1.89012e-06 [assign_add_opt]: 7.12043e-06 [ForceFp32Comm]: 9.20147e-07 [remove_cast_before_assign_add]: 6.79865e-07 [full_micro_interleaved_order_control]: 1.83005e-06 [reorder_send_recv_between_fp_bp]: 2.36975e-06 [comm_op_add_attrs]: 7.70204e-07 [add_comm_op_reuse_tag]: 1.06962e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 6.9011e-07 [overlap_opt_shard_in_pipeline]: 2.86009e-06 [overlap_opt_shard_grad_in_pipeline]: 2.35997e-06 [control_data_broadcast_order]: 1.13994e-06 [grouped_pairwise_exchange_alltoall]: 1.01002e-06 [offloading_packed_experts]: 1.26008e-06 [overlap_recompute_and_grad_model_parallel]: 1.54972e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.49946e-07 [overlap_recompute_allgather_and_fa_grad]: 1.26008e-06 [overlap_grad_ring_attention]: 1.49012e-06 [overlap_grad_flash_sp]: 1.79401e-05 [begin_end_overlap_inline]: 8.00006e-07 [split_matmul_comm_elemetwise]: 2.33995e-06 [split_layernorm_comm]: 1.83005e-06 [handle_group_info]: 7.59959e-07 [symbol_engine_optimizer]: 0.00010641, [1] [Cycle 1]: 0.00010004, [6] [build]: 5.22984e-06 [elim_shapecalc]: 1.39899e-05 [elim_not_effective]: 2.04197e-05 [opt_reshape]: 1.11097e-05 [fold_const_symbol]: 1.68402e-05 [renormalize]: 3.69735e-07 [pipeline_parallel_scheduler]: 1.60001e-06 [auto_monad_reorder]: 3.096e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 4.49829e-07 [eliminate_special_op_node]: 0.00056927 [distribtued_split]: 4.14602e-05 [validate]: 3.95398e-05 [task_emit]: 0.0916067 [execute]: 1.25798e-05 Sums bootstrap : 0.000466s : 0.46% type_inference : 0.005673s : 5.54% auto_monad : 0.000191s : 0.19% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000031s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000024s : 0.02% optimize.rewriter_before_opt_a : 0.000039s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000039s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.02% optimize.opt_a.a_1 : 0.000601s : 0.59% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.01% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000242s : 0.24% optimize.opt_a.accelerated_algorithm : 0.000019s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.02% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.01% optimize.opt_a.merge_comm : 0.000014s : 0.01% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000021s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.02% optimize.opt_a.virtual_dataset : 0.000019s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000017s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000037s : 0.04% optimize.opt_a.before_grad : 0.000028s : 0.03% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.01% optimize.opt_a.after_resolve : 0.000028s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.03% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000528s : 0.52% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.05% optimize.opt_a.cse : 0.000055s : 0.05% optimize.opt_a.a_3 : 0.000110s : 0.11% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000177s : 0.17% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000173s : 0.17% optimize.opt_b.b_2 : 0.000015s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000007s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000025s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000011s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000023s : 0.02% optimize.loop_unroll : 0.000604s : 0.59% optimize.opt_after_cconv.c_1 : 0.000056s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.01% optimize.tuple_transform.d_1 : 0.000064s : 0.06% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000017s : 0.02% optimize.add_recomputation : 0.000079s : 0.08% optimize.cse_after_recomputation.cse : 0.000021s : 0.02% optimize.environ_conv : 0.000009s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000011s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000003s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000569s : 0.56% distribtued_split : 0.000041s : 0.04% validate : 0.000040s : 0.04% task_emit : 0.091607s : 89.45% execute : 0.000013s : 0.01% Time group info: ------[substitution.] 0.000149 63 3.93% : 0.000006s : 2: substitution.depend_value_elim 2.16% : 0.000003s : 5: substitution.elim_not_effective 1.80% : 0.000003s : 5: substitution.fold_const_symbol 7.13% : 0.000011s : 6: substitution.graph_param_transform 50.55% : 0.000075s : 1: substitution.inline 3.70% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.08% : 0.000005s : 6: substitution.load_eliminater 2.45% : 0.000004s : 2: substitution.reduce_all_const_elim 6.16% : 0.000009s : 10: substitution.remove_not_recompute_node 3.17% : 0.000005s : 2: substitution.replace_old_param 7.97% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.91% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.005641 2 91.96% : 0.005188s : 1: type_inference.infer 8.04% : 0.000454s : 1: type_inference.specialize ------[replace.] 0.000014 1 100.00% : 0.000014s : 1: replace.inline ------[match.] 0.000074 1 100.00% : 0.000074s : 1: match.inline ------[predicate.] 0.000240 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 1.17% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.75% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.20% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.68% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.36% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.75% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.98% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.87% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.21% : 0.000003s : 14: predicate.float_depend_g_call 0.69% : 0.000002s : 12: predicate.float_environ_get_switch 1.02% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.73% : 0.000002s : 12: predicate.incorporate_call 0.64% : 0.000002s : 12: predicate.incorporate_call_switch 5.64% : 0.000014s : 63: predicate.inline 1.09% : 0.000003s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000003s : 12: predicate.less_batch_normalization 1.76% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000006s : 38: predicate.load_eliminater 1.42% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.68% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.72% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.50% : 0.000001s : 6: predicate.opt_reshape 0.61% : 0.000001s : 6: predicate.parallel_virtual_node 1.29% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000003s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.93% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.57% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000003s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.03% : 0.000002s : 12: predicate.shard_identity_eliminate 1.54% : 0.000004s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 0.95% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.23% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.30% : 0.000010s : 43: predicate.switch_simplify 1.00% : 0.000002s : 13: predicate.tile_eliminate 0.76% : 0.000002s : 13: predicate.transpose_eliminate 1.72% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.55% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.53% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.25% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 4.02% : 0.000010s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.90% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.83% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000309 4 5.01% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.99% : 0.000293s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.119396 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000021s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000084s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.17% : 0.000206s : 1: auto_monad 0.03% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.01% : 0.000007s : 1: bias_add_comm_swap 0.42% : 0.000501s : 1: bootstrap 0.02% : 0.000029s : 1: cconv 0.00% : 0.000005s : 1: comm_op_add_attrs 0.01% : 0.000006s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000037s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000050s : 1: distribtued_split 0.49% : 0.000584s : 1: eliminate_special_op_node 0.01% : 0.000014s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000009s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000007s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.51% : 0.000614s : 1: loop_unroll 0.01% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000019s : 1: opt.transform.loop_unroll_optimizer 1.01% : 0.001204s : 80: opt.transform.opt_a 0.05% : 0.000055s : 1: opt.transform.opt_after_cconv 0.14% : 0.000167s : 27: opt.transform.opt_b 0.05% : 0.000062s : 1: opt.transform.opt_trans_graph 0.03% : 0.000037s : 3: opt.transform.special_op_eliminate 0.05% : 0.000057s : 4: opt.transform.symbol_engine_opt 6.02% : 0.007183s : 1: opt_a 0.12% : 0.000145s : 1: opt_after_cconv 0.23% : 0.000275s : 1: opt_b 7.72% : 0.009216s : 1: optimize 0.01% : 0.000015s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000023s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000006s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000006s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000010s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000039s : 1: pre_auto_parallel 0.02% : 0.000030s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000005s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.24% : 0.000283s : 1: renormalize.infer 0.20% : 0.000238s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000183s : 1: rewriter_after_opt_a 0.04% : 0.000043s : 1: rewriter_before_opt_a 0.01% : 0.000008s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000014s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000112s : 1: symbol_engine_optimizer 76.76% : 0.091643s : 1: task_emit 0.07% : 0.000084s : 1: tuple_transform 4.77% : 0.005693s : 1: type_inference 0.07% : 0.000080s : 1: validate TotalTime = 0.109242, [21] [bootstrap]: 0.00052557 [type_inference]: 0.00629099 [auto_monad]: 0.00018989 [graph_reusing]: 2.77022e-06 [inline]: 1.37044e-06 [parallel-infer-symbol]: 2.19978e-06 [pre_auto_parallel]: 3.178e-05 [insert-virtual-dataset]: 3.20002e-06 [parallel-infer-symbol-second]: 5.50412e-07 [dataset_repeat_opt]: 1.53994e-06 [pipeline_split]: 1.63959e-06 [optimize]: 0.00951862, [52] [py_interpret_to_execute]: 2.304e-05 [rewriter_before_opt_a]: 6.54701e-05 [opt_a]: 0.00733505, [2] [Cycle 1]: 0.00189952, [43] [expand_dump_flag]: 3.73041e-06 [switch_simplify]: 3.26303e-05 [loop_unroll]: 1.62204e-05 [a_1]: 0.00043993 [recompute_prepare]: 1.10599e-05 [updatestate_depend_eliminate]: 9.58005e-06 [updatestate_assign_eliminate]: 2.50102e-05 [updatestate_loads_eliminate]: 1.015e-05 [parameter_eliminate]: 3.55998e-06 [a_2]: 0.00014384 [accelerated_algorithm]: 1.076e-05 [shard]: 2.70968e-06 [meta_shard_fg_expand]: 4.40003e-06 [shard_inline]: 1.19503e-05 [auto_parallel]: 1.41403e-05 [parallel]: 1.04601e-05 [flash_sp]: 1.28401e-05 [merge_comm]: 1.09999e-05 [allreduce_fusion]: 6.42985e-06 [matmul_add_comm_reduction]: 1.37198e-05 [allreduce_slice_to_reducescatter]: 4.59608e-07 [virtual_shard_identity]: 1.23703e-05 [virtual_dataset]: 1.072e-05 [get_grad_eliminate_]: 1.11102e-05 [virtual_output]: 1.04103e-05 [merge_forward]: 8.59005e-06 [cell_reuse_recompute_pass]: 2.13971e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.567e-05 [before_grad]: 1.80099e-05 [inplace_validation]: 6.23986e-06 [meta_fg_expand]: 6.82985e-06 [inplace_validation_after_expand]: 8.60961e-06 [flash_sp_send_recv_attached]: 3.30992e-06 [receive_attached]: 5.27967e-06 [after_resolve]: 1.66399e-05 [a_after_grad]: 1.746e-05 [special_op_eliminate]: 1.26199e-05 [renormalize]: 0.00052128 [add_forward_monad_depend]: 3.64985e-06 [auto_monad_grad]: 2.08989e-06 [auto_monad_eliminator]: 3.52501e-05 [cse]: 3.597e-05 [a_3]: 6.95302e-05 [Cycle 2]: 0.00096487, [43] [expand_dump_flag]: 1.15996e-06 [switch_simplify]: 1.14497e-05 [loop_unroll]: 9.11998e-06 [a_1]: 0.0002517 [recompute_prepare]: 9.33977e-06 [updatestate_depend_eliminate]: 6.4699e-06 [updatestate_assign_eliminate]: 5.5898e-06 [updatestate_loads_eliminate]: 5.66011e-06 [parameter_eliminate]: 1.42027e-06 [a_2]: 0.00013065 [accelerated_algorithm]: 9.92976e-06 [shard]: 1.2801e-06 [meta_shard_fg_expand]: 2.75997e-06 [shard_inline]: 9.83989e-06 [auto_parallel]: 1.14697e-05 [parallel]: 4.00981e-06 [flash_sp]: 3.60981e-06 [merge_comm]: 6.4401e-06 [allreduce_fusion]: 5.4203e-06 [matmul_add_comm_reduction]: 8.23988e-06 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 1.04699e-05 [virtual_dataset]: 1.188e-05 [get_grad_eliminate_]: 1.369e-05 [virtual_output]: 9.49018e-06 [merge_forward]: 7.03987e-06 [cell_reuse_recompute_pass]: 2.09967e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.27699e-05 [before_grad]: 1.63298e-05 [inplace_validation]: 6.12997e-06 [meta_fg_expand]: 5.79003e-06 [inplace_validation_after_expand]: 6.23986e-06 [flash_sp_send_recv_attached]: 1.13994e-06 [receive_attached]: 8.79634e-07 [after_resolve]: 1.371e-05 [a_after_grad]: 1.67596e-05 [special_op_eliminate]: 1.15102e-05 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 9.89996e-07 [auto_monad_grad]: 1.39e-06 [auto_monad_eliminator]: 2.19699e-05 [cse]: 2.61799e-05 [a_3]: 6.042e-05 [py_interpret_to_execute_after_opt_a]: 9.96003e-06 [slice_cell_reuse_recomputed_activation]: 2.40002e-06 [rewriter_after_opt_a]: 0.00016531 [convert_after_rewriter]: 1.04303e-05 [order_py_execute_after_rewriter]: 7.66991e-06 [opt_b]: 0.00029582, [1] [Cycle 1]: 0.00028922, [7] [b_1]: 0.00019831 [b_2]: 1.369e-05 [updatestate_depend_eliminate]: 6.29993e-06 [updatestate_assign_eliminate]: 5.17024e-06 [updatestate_loads_eliminate]: 5.81006e-06 [renormalize]: 4.20026e-07 [cse]: 2.38302e-05 [optimize_parallel_all_gather_comm]: 2.88398e-05 [overlap_param_gather]: 1.78022e-06 [cconv]: 2.74298e-05 [loop_unroll]: 0.0006058 [opt_after_cconv]: 0.00015443, [1] [Cycle 1]: 0.00014762, [7] [c_1]: 6.43902e-05 [parameter_eliminate]: 2.73995e-06 [updatestate_depend_eliminate]: 8.82009e-06 [updatestate_assign_eliminate]: 5.15999e-06 [updatestate_loads_eliminate]: 6.33998e-06 [cse]: 2.38698e-05 [renormalize]: 4.79631e-07 [remove_dup_value]: 1.42399e-05 [tuple_transform]: 8.99304e-05, [1] [Cycle 1]: 8.45799e-05, [2] [d_1]: 7.173e-05 [renormalize]: 2.79862e-07 [partial_unused_args_eliminate]: 2.25008e-06 [add_cache_embedding]: 1.68397e-05 [add_recomputation]: 9.555e-05 [cse_after_recomputation]: 3.33199e-05, [1] [Cycle 1]: 2.65101e-05, [1] [cse]: 2.04002e-05 [environ_conv]: 8.86992e-06 [swap_dp_allreduce_reducescatter]: 9.30997e-06 [bias_add_comm_swap]: 2.25008e-06 [label_micro_interleaved_index]: 2.21003e-06 [label_fine_grained_interleaved_index]: 2.44007e-06 [merge_cast_opt]: 1.39978e-06 [slice_recompute_activation]: 2.1602e-06 [micro_interleaved_order_control]: 1.98977e-06 [assign_add_opt]: 7.74022e-06 [ForceFp32Comm]: 1.09989e-06 [remove_cast_before_assign_add]: 1.07987e-06 [full_micro_interleaved_order_control]: 2.56021e-06 [reorder_send_recv_between_fp_bp]: 2.55974e-06 [comm_op_add_attrs]: 9.39704e-07 [add_comm_op_reuse_tag]: 1.07009e-06 [interleave_split_concat_branches]: 8.89879e-07 [interleave_parallel_branches]: 9.79751e-07 [overlap_opt_shard_in_pipeline]: 1.607e-05 [overlap_opt_shard_grad_in_pipeline]: 2.72039e-06 [control_data_broadcast_order]: 1.48965e-06 [grouped_pairwise_exchange_alltoall]: 1.26986e-06 [offloading_packed_experts]: 1.41002e-06 [overlap_recompute_and_grad_model_parallel]: 2.15974e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.09902e-07 [overlap_recompute_allgather_and_fa_grad]: 1.15018e-06 [overlap_grad_ring_attention]: 2.38977e-06 [overlap_grad_flash_sp]: 3.218e-05 [begin_end_overlap_inline]: 7.59959e-07 [split_matmul_comm_elemetwise]: 2.07964e-06 [split_layernorm_comm]: 2.00002e-06 [handle_group_info]: 1.09011e-06 [symbol_engine_optimizer]: 0.00011801, [1] [Cycle 1]: 0.00011295, [6] [build]: 5.4501e-06 [elim_shapecalc]: 1.66702e-05 [elim_not_effective]: 2.42703e-05 [opt_reshape]: 1.154e-05 [fold_const_symbol]: 2.15499e-05 [renormalize]: 3.30154e-07 [pipeline_parallel_scheduler]: 1.60001e-06 [auto_monad_reorder]: 3.43001e-05 [get_jit_bprop_graph]: 5.19678e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00055631 [distribtued_split]: 4.54201e-05 [validate]: 3.89698e-05 [task_emit]: 0.0916546 [execute]: 1.31498e-05 Sums bootstrap : 0.000526s : 0.51% type_inference : 0.006291s : 6.08% auto_monad : 0.000190s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000032s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000023s : 0.02% optimize.rewriter_before_opt_a : 0.000065s : 0.06% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000044s : 0.04% optimize.opt_a.loop_unroll : 0.000025s : 0.02% optimize.opt_a.a_1 : 0.000692s : 0.67% optimize.opt_a.recompute_prepare : 0.000020s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000031s : 0.03% optimize.opt_a.updatestate_loads_eliminate : 0.000016s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000274s : 0.27% optimize.opt_a.accelerated_algorithm : 0.000021s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000022s : 0.02% optimize.opt_a.auto_parallel : 0.000026s : 0.02% optimize.opt_a.parallel : 0.000014s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000017s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000022s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000023s : 0.02% optimize.opt_a.virtual_dataset : 0.000023s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000025s : 0.02% optimize.opt_a.virtual_output : 0.000020s : 0.02% optimize.opt_a.merge_forward : 0.000016s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000048s : 0.05% optimize.opt_a.before_grad : 0.000034s : 0.03% optimize.opt_a.inplace_validation : 0.000012s : 0.01% optimize.opt_a.meta_fg_expand : 0.000013s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000015s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.01% optimize.opt_a.after_resolve : 0.000030s : 0.03% optimize.opt_a.a_after_grad : 0.000034s : 0.03% optimize.opt_a.special_op_eliminate : 0.000024s : 0.02% optimize.opt_a.renormalize : 0.000521s : 0.50% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000057s : 0.06% optimize.opt_a.cse : 0.000062s : 0.06% optimize.opt_a.a_3 : 0.000130s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000165s : 0.16% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000008s : 0.01% optimize.opt_b.b_1 : 0.000198s : 0.19% optimize.opt_b.b_2 : 0.000014s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000024s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000029s : 0.03% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000027s : 0.03% optimize.loop_unroll : 0.000606s : 0.59% optimize.opt_after_cconv.c_1 : 0.000064s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.01% optimize.tuple_transform.d_1 : 0.000072s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000017s : 0.02% optimize.add_recomputation : 0.000096s : 0.09% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000009s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000016s : 0.02% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000032s : 0.03% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000017s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000024s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000012s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000022s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000034s : 0.03% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000556s : 0.54% distribtued_split : 0.000045s : 0.04% validate : 0.000039s : 0.04% task_emit : 0.091655s : 88.53% execute : 0.000013s : 0.01% Time group info: ------[substitution.] 0.000167 63 4.53% : 0.000008s : 2: substitution.depend_value_elim 2.30% : 0.000004s : 5: substitution.elim_not_effective 2.45% : 0.000004s : 5: substitution.fold_const_symbol 5.79% : 0.000010s : 6: substitution.graph_param_transform 47.21% : 0.000079s : 1: substitution.inline 4.51% : 0.000008s : 10: substitution.j_node_and_user_rematch 3.08% : 0.000005s : 6: substitution.load_eliminater 2.41% : 0.000004s : 2: substitution.reduce_all_const_elim 8.96% : 0.000015s : 10: substitution.remove_not_recompute_node 2.68% : 0.000004s : 2: substitution.replace_old_param 7.99% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 8.08% : 0.000013s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.006257 2 94.09% : 0.005887s : 1: type_inference.infer 5.91% : 0.000370s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000077 1 100.00% : 0.000077s : 1: match.inline ------[predicate.] 0.000270 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.21% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.07% : 0.000006s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.50% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.20% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000005s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000004s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.82% : 0.000002s : 12: predicate.incorporate_call 0.73% : 0.000002s : 12: predicate.incorporate_call_switch 6.35% : 0.000017s : 63: predicate.inline 1.29% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.98% : 0.000003s : 12: predicate.less_batch_normalization 1.69% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000006s : 38: predicate.load_eliminater 1.25% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.70% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.38% : 0.000004s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 0.95% : 0.000003s : 13: predicate.reduce_eliminate 0.60% : 0.000002s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.50% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.76% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000003s : 12: predicate.same_eliminate 0.52% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.53% : 0.000004s : 18: predicate.special_op_eliminate 1.04% : 0.000003s : 12: predicate.specialize_transform 1.04% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.90% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.21% : 0.000006s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.07% : 0.000011s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.74% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000008s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.35% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 4.10% : 0.000011s : 50: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000211 4 8.14% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.86% : 0.000194s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.121025 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000021s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000102s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.17% : 0.000204s : 1: auto_monad 0.03% : 0.000042s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.01% : 0.000007s : 1: bias_add_comm_swap 0.47% : 0.000568s : 1: bootstrap 0.03% : 0.000032s : 1: cconv 0.00% : 0.000005s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000023s : 1: convert_after_rewriter 0.03% : 0.000037s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000055s : 1: distribtued_split 0.47% : 0.000571s : 1: eliminate_special_op_node 0.01% : 0.000014s : 1: environ_conv 0.02% : 0.000024s : 1: execute 0.01% : 0.000007s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000006s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000006s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000007s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.51% : 0.000617s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000006s : 1: offloading_packed_experts 0.01% : 0.000018s : 1: opt.transform.loop_unroll_optimizer 1.15% : 0.001397s : 80: opt.transform.opt_a 0.05% : 0.000062s : 1: opt.transform.opt_after_cconv 0.15% : 0.000187s : 27: opt.transform.opt_b 0.06% : 0.000069s : 1: opt.transform.opt_trans_graph 0.03% : 0.000041s : 3: opt.transform.special_op_eliminate 0.05% : 0.000066s : 4: opt.transform.symbol_engine_opt 6.06% : 0.007340s : 1: opt_a 0.13% : 0.000161s : 1: opt_after_cconv 0.25% : 0.000299s : 1: opt_b 7.87% : 0.009528s : 1: optimize 0.03% : 0.000034s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000012s : 1: order_py_execute_after_rewriter 0.03% : 0.000037s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.02% : 0.000021s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000007s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000039s : 1: pre_auto_parallel 0.02% : 0.000027s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.24% : 0.000286s : 1: renormalize.infer 0.19% : 0.000229s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000172s : 1: rewriter_after_opt_a 0.06% : 0.000071s : 1: rewriter_before_opt_a 0.01% : 0.000008s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000006s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.01% : 0.000007s : 1: split_matmul_comm_elemetwise 0.01% : 0.000013s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000123s : 1: symbol_engine_optimizer 75.77% : 0.091696s : 1: task_emit 0.08% : 0.000094s : 1: tuple_transform 5.24% : 0.006339s : 1: type_inference 0.06% : 0.000078s : 1: validate TotalTime = 0.128211, [21] [bootstrap]: 0.0007959 [type_inference]: 0.0085528 [auto_monad]: 0.00048296 [graph_reusing]: 3.07988e-06 [inline]: 1.43005e-06 [parallel-infer-symbol]: 2.74042e-06 [pre_auto_parallel]: 5.05997e-05 [insert-virtual-dataset]: 2.66032e-06 [parallel-infer-symbol-second]: 6.79865e-07 [dataset_repeat_opt]: 1.75973e-06 [pipeline_split]: 1.47987e-06 [optimize]: 0.0142019, [52] [py_interpret_to_execute]: 5.14803e-05 [rewriter_before_opt_a]: 4.905e-05 [opt_a]: 0.0114239, [2] [Cycle 1]: 0.00254045, [43] [expand_dump_flag]: 3.43006e-06 [switch_simplify]: 3.05297e-05 [loop_unroll]: 1.38902e-05 [a_1]: 0.00056287 [recompute_prepare]: 9.85991e-06 [updatestate_depend_eliminate]: 8.81962e-06 [updatestate_assign_eliminate]: 8.08015e-06 [updatestate_loads_eliminate]: 3.17199e-05 [parameter_eliminate]: 3.75975e-06 [a_2]: 0.00020679 [accelerated_algorithm]: 9.14e-06 [shard]: 2.5304e-06 [meta_shard_fg_expand]: 4.27011e-06 [shard_inline]: 9.39984e-06 [auto_parallel]: 1.413e-05 [parallel]: 7.93021e-06 [flash_sp]: 1.30599e-05 [merge_comm]: 1.95401e-05 [allreduce_fusion]: 5.79981e-06 [matmul_add_comm_reduction]: 2.09296e-05 [allreduce_slice_to_reducescatter]: 7.10133e-07 [virtual_shard_identity]: 1.06897e-05 [virtual_dataset]: 1.70297e-05 [get_grad_eliminate_]: 2.824e-05 [virtual_output]: 1.67e-05 [merge_forward]: 1.67601e-05 [cell_reuse_recompute_pass]: 2.37999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.944e-05 [before_grad]: 2.50302e-05 [inplace_validation]: 5.38025e-06 [meta_fg_expand]: 6.29015e-06 [inplace_validation_after_expand]: 6.24964e-06 [flash_sp_send_recv_attached]: 3.91016e-06 [receive_attached]: 4.93973e-06 [after_resolve]: 1.251e-05 [a_after_grad]: 1.371e-05 [special_op_eliminate]: 8.65012e-06 [renormalize]: 0.00093635 [add_forward_monad_depend]: 3.90038e-06 [auto_monad_grad]: 2.02982e-06 [auto_monad_eliminator]: 3.42e-05 [cse]: 3.45497e-05 [a_3]: 5.899e-05 [Cycle 2]: 0.00097719, [43] [expand_dump_flag]: 1.1404e-06 [switch_simplify]: 9.81987e-06 [loop_unroll]: 7.91997e-06 [a_1]: 0.0002101 [recompute_prepare]: 7.91019e-06 [updatestate_depend_eliminate]: 6.27991e-06 [updatestate_assign_eliminate]: 5.09992e-06 [updatestate_loads_eliminate]: 5.67036e-06 [parameter_eliminate]: 1.26008e-06 [a_2]: 0.00010619 [accelerated_algorithm]: 8.67015e-06 [shard]: 1.2801e-06 [meta_shard_fg_expand]: 2.49036e-06 [shard_inline]: 7.84965e-06 [auto_parallel]: 1.14501e-05 [parallel]: 3.53018e-06 [flash_sp]: 3.39001e-06 [merge_comm]: 5.81983e-06 [allreduce_fusion]: 4.71016e-06 [matmul_add_comm_reduction]: 7.95024e-06 [allreduce_slice_to_reducescatter]: 2.60305e-07 [virtual_shard_identity]: 9.34023e-06 [virtual_dataset]: 7.41985e-06 [get_grad_eliminate_]: 7.07991e-06 [virtual_output]: 7.3798e-06 [merge_forward]: 2.464e-05 [cell_reuse_recompute_pass]: 2.2701e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.85702e-05 [before_grad]: 2.49799e-05 [inplace_validation]: 5.9302e-06 [meta_fg_expand]: 5.24987e-06 [inplace_validation_after_expand]: 5.75976e-06 [flash_sp_send_recv_attached]: 1.09011e-06 [receive_attached]: 8.60076e-07 [after_resolve]: 1.236e-05 [a_after_grad]: 1.33296e-05 [special_op_eliminate]: 1.666e-05 [renormalize]: 4.98258e-08 [add_forward_monad_depend]: 1.16974e-06 [auto_monad_grad]: 1.37975e-06 [auto_monad_eliminator]: 2.27001e-05 [cse]: 4.25298e-05 [a_3]: 7.32001e-05 [py_interpret_to_execute_after_opt_a]: 9.76957e-06 [slice_cell_reuse_recomputed_activation]: 2.70968e-06 [rewriter_after_opt_a]: 0.00018026 [convert_after_rewriter]: 1.06702e-05 [order_py_execute_after_rewriter]: 6.10016e-06 [opt_b]: 0.00032962, [1] [Cycle 1]: 0.00032327, [7] [b_1]: 0.00020681 [b_2]: 2.028e-05 [updatestate_depend_eliminate]: 6.12019e-06 [updatestate_assign_eliminate]: 1.71298e-05 [updatestate_loads_eliminate]: 5.72996e-06 [renormalize]: 2.39816e-07 [cse]: 3.15597e-05 [optimize_parallel_all_gather_comm]: 1.88602e-05 [overlap_param_gather]: 1.74018e-06 [cconv]: 2.426e-05 [loop_unroll]: 0.00084694 [opt_after_cconv]: 0.00015267, [1] [Cycle 1]: 0.00014606, [7] [c_1]: 6.92899e-05 [parameter_eliminate]: 2.67988e-06 [updatestate_depend_eliminate]: 8.14022e-06 [updatestate_assign_eliminate]: 4.67012e-06 [updatestate_loads_eliminate]: 5.54975e-06 [cse]: 2.23303e-05 [renormalize]: 3.30154e-07 [remove_dup_value]: 1.36499e-05 [tuple_transform]: 0.00014618, [1] [Cycle 1]: 0.00013243, [2] [d_1]: 0.00010049 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 2.35997e-06 [add_cache_embedding]: 2.33101e-05 [add_recomputation]: 9.42298e-05 [cse_after_recomputation]: 4.10103e-05, [1] [Cycle 1]: 2.42903e-05, [1] [cse]: 1.864e-05 [environ_conv]: 7.70995e-06 [swap_dp_allreduce_reducescatter]: 1.69901e-05 [bias_add_comm_swap]: 2.31015e-06 [label_micro_interleaved_index]: 2.05031e-06 [label_fine_grained_interleaved_index]: 2.46009e-06 [merge_cast_opt]: 1.31968e-06 [slice_recompute_activation]: 1.74996e-06 [micro_interleaved_order_control]: 1.79e-06 [assign_add_opt]: 8.23988e-06 [ForceFp32Comm]: 9.09902e-07 [remove_cast_before_assign_add]: 1.15018e-06 [full_micro_interleaved_order_control]: 2.37999e-06 [reorder_send_recv_between_fp_bp]: 1.85985e-06 [comm_op_add_attrs]: 9.20147e-07 [add_comm_op_reuse_tag]: 1.01002e-06 [interleave_split_concat_branches]: 1.13994e-06 [interleave_parallel_branches]: 8.59611e-07 [overlap_opt_shard_in_pipeline]: 2.3297e-06 [overlap_opt_shard_grad_in_pipeline]: 2.23005e-06 [control_data_broadcast_order]: 1.17999e-06 [grouped_pairwise_exchange_alltoall]: 1.20001e-06 [offloading_packed_experts]: 1.21025e-06 [overlap_recompute_and_grad_model_parallel]: 1.87987e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.03004e-06 [overlap_recompute_allgather_and_fa_grad]: 1.25961e-06 [overlap_grad_ring_attention]: 1.66986e-06 [overlap_grad_flash_sp]: 1.84798e-05 [begin_end_overlap_inline]: 7.89762e-07 [split_matmul_comm_elemetwise]: 2.06009e-06 [split_layernorm_comm]: 1.82027e-06 [handle_group_info]: 8.801e-07 [symbol_engine_optimizer]: 0.00015323, [1] [Cycle 1]: 0.00014805, [6] [build]: 4.99003e-06 [elim_shapecalc]: 3.03001e-05 [elim_not_effective]: 3.41302e-05 [opt_reshape]: 1.06501e-05 [fold_const_symbol]: 3.717e-05 [renormalize]: 2.90107e-07 [pipeline_parallel_scheduler]: 1.46031e-06 [auto_monad_reorder]: 4.77699e-05 [get_jit_bprop_graph]: 4.29805e-07 [rewriter_after_jit_bprop_graph]: 3.89758e-07 [eliminate_special_op_node]: 0.00055852 [distribtued_split]: 4.23603e-05 [validate]: 6.84699e-05 [task_emit]: 0.103032 [execute]: 1.274e-05 Sums bootstrap : 0.000796s : 0.67% type_inference : 0.008553s : 7.20% auto_monad : 0.000483s : 0.41% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000051s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000051s : 0.04% optimize.rewriter_before_opt_a : 0.000049s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.00% optimize.opt_a.switch_simplify : 0.000040s : 0.03% optimize.opt_a.loop_unroll : 0.000022s : 0.02% optimize.opt_a.a_1 : 0.000773s : 0.65% optimize.opt_a.recompute_prepare : 0.000018s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.01% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000037s : 0.03% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000313s : 0.26% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.01% optimize.opt_a.auto_parallel : 0.000026s : 0.02% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.01% optimize.opt_a.merge_comm : 0.000025s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000029s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.02% optimize.opt_a.virtual_dataset : 0.000024s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000035s : 0.03% optimize.opt_a.virtual_output : 0.000024s : 0.02% optimize.opt_a.merge_forward : 0.000041s : 0.03% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000048s : 0.04% optimize.opt_a.before_grad : 0.000050s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000025s : 0.02% optimize.opt_a.a_after_grad : 0.000027s : 0.02% optimize.opt_a.special_op_eliminate : 0.000025s : 0.02% optimize.opt_a.renormalize : 0.000936s : 0.79% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000057s : 0.05% optimize.opt_a.cse : 0.000077s : 0.06% optimize.opt_a.a_3 : 0.000132s : 0.11% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000180s : 0.15% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000207s : 0.17% optimize.opt_b.b_2 : 0.000020s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000017s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000032s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000019s : 0.02% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000024s : 0.02% optimize.loop_unroll : 0.000847s : 0.71% optimize.opt_after_cconv.c_1 : 0.000069s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.cse : 0.000022s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.01% optimize.tuple_transform.d_1 : 0.000100s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000023s : 0.02% optimize.add_recomputation : 0.000094s : 0.08% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000017s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000030s : 0.03% optimize.symbol_engine_optimizer.elim_not_effective : 0.000034s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.03% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000048s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000559s : 0.47% distribtued_split : 0.000042s : 0.04% validate : 0.000068s : 0.06% task_emit : 0.103032s : 86.79% execute : 0.000013s : 0.01% Time group info: ------[substitution.] 0.000225 63 6.07% : 0.000014s : 2: substitution.depend_value_elim 1.86% : 0.000004s : 5: substitution.elim_not_effective 1.18% : 0.000003s : 5: substitution.fold_const_symbol 14.66% : 0.000033s : 6: substitution.graph_param_transform 43.68% : 0.000098s : 1: substitution.inline 7.42% : 0.000017s : 10: substitution.j_node_and_user_rematch 2.12% : 0.000005s : 6: substitution.load_eliminater 1.61% : 0.000004s : 2: substitution.reduce_all_const_elim 8.42% : 0.000019s : 10: substitution.remove_not_recompute_node 1.69% : 0.000004s : 2: substitution.replace_old_param 5.71% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 5.58% : 0.000013s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.008517 2 91.88% : 0.007826s : 1: type_inference.infer 8.12% : 0.000691s : 1: type_inference.specialize ------[replace.] 0.000016 1 100.00% : 0.000016s : 1: replace.inline ------[match.] 0.000097 1 100.00% : 0.000097s : 1: match.inline ------[predicate.] 0.000242 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.12% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.77% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.29% : 0.000006s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.74% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000001s : 6: predicate.const_output_eliminate 0.36% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.36% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_depend_swap 1.89% : 0.000005s : 31: predicate.environ_get_eliminate 1.03% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.22% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.04% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.25% : 0.000001s : 6: predicate.fold_const_symbol 0.91% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.74% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.57% : 0.000013s : 63: predicate.inline 1.08% : 0.000003s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000003s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.33% : 0.000006s : 38: predicate.load_eliminater 1.43% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.29% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.70% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.69% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.72% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.74% : 0.000002s : 6: predicate.mutable_eliminate 0.51% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.00% : 0.000002s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.18% : 0.000003s : 25: predicate.replace_applicator 0.51% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.76% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.57% : 0.000001s : 6: predicate.row_tensor_eliminate 1.10% : 0.000003s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.00% : 0.000002s : 12: predicate.shard_identity_eliminate 1.43% : 0.000003s : 18: predicate.special_op_eliminate 0.90% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.03% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.23% : 0.000005s : 38: predicate.stopgrad_eliminater 0.48% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.52% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.23% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.69% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.84% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.64% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.39% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 4.37% : 0.000011s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.75% : 0.000002s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000616 4 2.72% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 97.28% : 0.000599s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.145111 192 0.00% : 0.000006s : 1: ForceFp32Comm 0.02% : 0.000027s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000099s : 1: add_recomputation 0.02% : 0.000033s : 1: assign_add_opt 0.34% : 0.000500s : 1: auto_monad 0.04% : 0.000064s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.58% : 0.000849s : 1: bootstrap 0.02% : 0.000030s : 1: cconv 0.01% : 0.000011s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000045s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.03% : 0.000051s : 1: distribtued_split 0.39% : 0.000573s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000010s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000012s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.01% : 0.000012s : 1: interleave_parallel_branches 0.00% : 0.000006s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000015s : 1: label_micro_interleaved_index 0.59% : 0.000858s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000013s : 1: micro_interleaved_order_control 0.01% : 0.000014s : 1: offloading_packed_experts 0.01% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.04% : 0.001502s : 80: opt.transform.opt_a 0.05% : 0.000067s : 1: opt.transform.opt_after_cconv 0.14% : 0.000206s : 27: opt.transform.opt_b 0.06% : 0.000091s : 1: opt.transform.opt_trans_graph 0.02% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000084s : 4: opt.transform.symbol_engine_opt 7.88% : 0.011428s : 1: opt_a 0.12% : 0.000169s : 1: opt_after_cconv 0.24% : 0.000344s : 1: opt_b 9.79% : 0.014212s : 1: optimize 0.02% : 0.000023s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000023s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000014s : 1: overlap_grad_ring_attention 0.01% : 0.000013s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000013s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000014s : 1: partial_unused_args_eliminate 0.01% : 0.000017s : 1: pipeline_parallel_scheduler 0.00% : 0.000006s : 1: pipeline_split 0.04% : 0.000058s : 1: pre_auto_parallel 0.04% : 0.000057s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000028s : 1: remove_dup_value 0.32% : 0.000457s : 1: renormalize.infer 0.32% : 0.000460s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000014s : 1: rewriter_after_jit_bprop_graph 0.13% : 0.000187s : 1: rewriter_after_opt_a 0.04% : 0.000053s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000021s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000157s : 1: symbol_engine_optimizer 71.03% : 0.103068s : 1: task_emit 0.11% : 0.000160s : 1: tuple_transform 5.91% : 0.008573s : 1: type_inference 0.08% : 0.000110s : 1: validate TotalTime = 0.0791411, [21] [bootstrap]: 0.00031321 [type_inference]: 0.00237662 [auto_monad]: 0.00010607 [graph_reusing]: 1.53994e-06 [inline]: 1.10036e-06 [parallel-infer-symbol]: 1.43982e-06 [pre_auto_parallel]: 2.17399e-05 [insert-virtual-dataset]: 2.13971e-06 [parallel-infer-symbol-second]: 4.30271e-07 [dataset_repeat_opt]: 9.69972e-07 [pipeline_split]: 1.17021e-06 [optimize]: 0.00708514, [52] [py_interpret_to_execute]: 1.55699e-05 [rewriter_before_opt_a]: 2.96198e-05 [opt_a]: 0.00545041, [2] [Cycle 1]: 0.00150895, [43] [expand_dump_flag]: 2.54996e-06 [switch_simplify]: 2.62205e-05 [loop_unroll]: 1.37398e-05 [a_1]: 0.00033577 [recompute_prepare]: 9.42033e-06 [updatestate_depend_eliminate]: 7.41007e-06 [updatestate_assign_eliminate]: 5.35976e-06 [updatestate_loads_eliminate]: 5.83008e-06 [parameter_eliminate]: 2.42982e-06 [a_2]: 0.00011877 [accelerated_algorithm]: 8.95979e-06 [shard]: 1.68011e-06 [meta_shard_fg_expand]: 3.13018e-06 [shard_inline]: 9.02032e-06 [auto_parallel]: 1.14897e-05 [parallel]: 6.07967e-06 [flash_sp]: 8.04011e-06 [merge_comm]: 7.38027e-06 [allreduce_fusion]: 5.33974e-06 [matmul_add_comm_reduction]: 8.90996e-06 [allreduce_slice_to_reducescatter]: 3.50177e-07 [virtual_shard_identity]: 9.92976e-06 [virtual_dataset]: 8.31019e-06 [get_grad_eliminate_]: 8.74e-06 [virtual_output]: 7.85012e-06 [merge_forward]: 5.03007e-06 [cell_reuse_recompute_pass]: 1.57999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.708e-05 [before_grad]: 1.34497e-05 [inplace_validation]: 4.81028e-06 [meta_fg_expand]: 4.99003e-06 [inplace_validation_after_expand]: 5.58002e-06 [flash_sp_send_recv_attached]: 3.39979e-06 [receive_attached]: 2.38977e-06 [after_resolve]: 1.08499e-05 [a_after_grad]: 1.26297e-05 [special_op_eliminate]: 8.40006e-06 [renormalize]: 0.00045057 [add_forward_monad_depend]: 2.60957e-06 [auto_monad_grad]: 1.2801e-06 [auto_monad_eliminator]: 2.506e-05 [cse]: 2.64002e-05 [a_3]: 6.06598e-05 [Cycle 2]: 0.0007926, [43] [expand_dump_flag]: 9.69972e-07 [switch_simplify]: 9.6201e-06 [loop_unroll]: 8.17003e-06 [a_1]: 0.00020389 [recompute_prepare]: 7.70018e-06 [updatestate_depend_eliminate]: 5.89015e-06 [updatestate_assign_eliminate]: 5.09992e-06 [updatestate_loads_eliminate]: 5.18002e-06 [parameter_eliminate]: 1.17021e-06 [a_2]: 0.00010859 [accelerated_algorithm]: 8.91974e-06 [shard]: 1.10967e-06 [meta_shard_fg_expand]: 2.46009e-06 [shard_inline]: 8.46013e-06 [auto_parallel]: 1.04401e-05 [parallel]: 3.11993e-06 [flash_sp]: 2.6403e-06 [merge_comm]: 5.93998e-06 [allreduce_fusion]: 5.07012e-06 [matmul_add_comm_reduction]: 7.13021e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 9.77982e-06 [virtual_dataset]: 8.04989e-06 [get_grad_eliminate_]: 7.43987e-06 [virtual_output]: 7.61962e-06 [merge_forward]: 4.2296e-06 [cell_reuse_recompute_pass]: 1.69966e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.57799e-05 [before_grad]: 1.30599e-05 [inplace_validation]: 4.59002e-06 [meta_fg_expand]: 4.92018e-06 [inplace_validation_after_expand]: 5.22006e-06 [flash_sp_send_recv_attached]: 8.29808e-07 [receive_attached]: 7.30157e-07 [after_resolve]: 1.03298e-05 [a_after_grad]: 1.295e-05 [special_op_eliminate]: 8.02008e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.69855e-07 [auto_monad_grad]: 1.11014e-06 [auto_monad_eliminator]: 1.628e-05 [cse]: 1.96104e-05 [a_3]: 5.19897e-05 [py_interpret_to_execute_after_opt_a]: 8.97003e-06 [slice_cell_reuse_recomputed_activation]: 1.81003e-06 [rewriter_after_opt_a]: 0.00012735 [convert_after_rewriter]: 1.02101e-05 [order_py_execute_after_rewriter]: 5.37001e-06 [opt_b]: 0.00025277, [1] [Cycle 1]: 0.00024741, [7] [b_1]: 0.00016927 [b_2]: 1.032e-05 [updatestate_depend_eliminate]: 5.41983e-06 [updatestate_assign_eliminate]: 4.80982e-06 [updatestate_loads_eliminate]: 5.98002e-06 [renormalize]: 3.30154e-07 [cse]: 1.94404e-05 [optimize_parallel_all_gather_comm]: 7.57026e-06 [overlap_param_gather]: 9.49949e-07 [cconv]: 1.51997e-05 [loop_unroll]: 0.00048954 [opt_after_cconv]: 0.00013238, [1] [Cycle 1]: 0.00012639, [7] [c_1]: 5.30398e-05 [parameter_eliminate]: 1.81003e-06 [updatestate_depend_eliminate]: 7.62008e-06 [updatestate_assign_eliminate]: 4.78001e-06 [updatestate_loads_eliminate]: 5.5898e-06 [cse]: 2.06102e-05 [renormalize]: 3.40398e-07 [remove_dup_value]: 9.60985e-06 [tuple_transform]: 6.94101e-05, [1] [Cycle 1]: 6.51097e-05, [2] [d_1]: 5.49904e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.62981e-06 [add_cache_embedding]: 1.14697e-05 [add_recomputation]: 5.39403e-05 [cse_after_recomputation]: 2.84701e-05, [1] [Cycle 1]: 2.39699e-05, [1] [cse]: 1.81799e-05 [environ_conv]: 6.92997e-06 [swap_dp_allreduce_reducescatter]: 7.11018e-06 [bias_add_comm_swap]: 1.72993e-06 [label_micro_interleaved_index]: 1.55997e-06 [label_fine_grained_interleaved_index]: 1.46963e-06 [merge_cast_opt]: 7.5018e-07 [slice_recompute_activation]: 1.06031e-06 [micro_interleaved_order_control]: 1.53016e-06 [assign_add_opt]: 7.28993e-06 [ForceFp32Comm]: 6.19795e-07 [remove_cast_before_assign_add]: 6.19795e-07 [full_micro_interleaved_order_control]: 1.53016e-06 [reorder_send_recv_between_fp_bp]: 1.15996e-06 [comm_op_add_attrs]: 6.20261e-07 [add_comm_op_reuse_tag]: 5.89993e-07 [interleave_split_concat_branches]: 5.0012e-07 [interleave_parallel_branches]: 5.10365e-07 [overlap_opt_shard_in_pipeline]: 1.06031e-06 [overlap_opt_shard_grad_in_pipeline]: 1.51014e-06 [control_data_broadcast_order]: 6.9011e-07 [grouped_pairwise_exchange_alltoall]: 6.59842e-07 [offloading_packed_experts]: 6.3004e-07 [overlap_recompute_and_grad_model_parallel]: 1.09011e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.0012e-07 [overlap_recompute_allgather_and_fa_grad]: 6.70087e-07 [overlap_grad_ring_attention]: 1.26008e-06 [overlap_grad_flash_sp]: 1.18003e-05 [begin_end_overlap_inline]: 4.89876e-07 [split_matmul_comm_elemetwise]: 1.66986e-06 [split_layernorm_comm]: 1.16043e-06 [handle_group_info]: 5.19678e-07 [symbol_engine_optimizer]: 8.61101e-05, [1] [Cycle 1]: 8.18898e-05, [6] [build]: 3.37977e-06 [elim_shapecalc]: 1.23102e-05 [elim_not_effective]: 1.67298e-05 [opt_reshape]: 9.6499e-06 [fold_const_symbol]: 1.38702e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 9.59728e-07 [auto_monad_reorder]: 2.31303e-05 [get_jit_bprop_graph]: 3.69735e-07 [rewriter_after_jit_bprop_graph]: 2.99886e-07 [eliminate_special_op_node]: 0.0005002 [distribtued_split]: 3.36301e-05 [validate]: 3.01497e-05 [task_emit]: 0.0683997 [execute]: 8.76002e-06 Sums bootstrap : 0.000313s : 0.42% type_inference : 0.002377s : 3.17% auto_monad : 0.000106s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000540s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000227s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000451s : 0.60% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.06% optimize.opt_a.cse : 0.000046s : 0.06% optimize.opt_a.a_3 : 0.000113s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000127s : 0.17% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000169s : 0.23% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000490s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000500s : 0.67% distribtued_split : 0.000034s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.068400s : 91.18% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000115 63 4.45% : 0.000005s : 2: substitution.depend_value_elim 2.10% : 0.000002s : 5: substitution.elim_not_effective 1.81% : 0.000002s : 5: substitution.fold_const_symbol 5.44% : 0.000006s : 6: substitution.graph_param_transform 50.66% : 0.000058s : 1: substitution.inline 4.22% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.53% : 0.000004s : 6: substitution.load_eliminater 2.12% : 0.000002s : 2: substitution.reduce_all_const_elim 6.75% : 0.000008s : 10: substitution.remove_not_recompute_node 2.26% : 0.000003s : 2: substitution.replace_old_param 8.91% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.77% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002352 2 90.07% : 0.002119s : 1: type_inference.infer 9.93% : 0.000234s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000057 1 100.00% : 0.000057s : 1: match.inline ------[predicate.] 0.000229 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.24% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.74% : 0.000002s : 13: predicate.addn_zero_filter 0.70% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.44% : 0.000006s : 25: predicate.arithmetic_simplify 0.80% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.51% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.22% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.29% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.17% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 5.57% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000003s : 12: predicate.less_batch_normalization 1.65% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.47% : 0.000006s : 38: predicate.load_eliminater 1.24% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.14% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.66% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.75% : 0.000002s : 6: predicate.mutable_eliminate 0.50% : 0.000001s : 6: predicate.opt_reshape 0.51% : 0.000001s : 6: predicate.parallel_virtual_node 1.20% : 0.000003s : 14: predicate.partial_defer_inline 1.30% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.92% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.60% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.80% : 0.000002s : 13: predicate.reshape_eliminate 0.87% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.57% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000003s : 18: predicate.special_op_eliminate 0.90% : 0.000002s : 12: predicate.specialize_transform 1.12% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.94% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.22% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.85% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.76% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.53% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.38% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.61% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.75% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.49% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000140 4 7.10% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.90% : 0.000130s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088079 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000058s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.14% : 0.000119s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000336s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.04% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.58% : 0.000513s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.01% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000498s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.26% : 0.001112s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.18% : 0.000160s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.19% : 0.005454s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.29% : 0.000256s : 1: opt_b 8.05% : 0.007094s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000028s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000013s : 1: remove_dup_value 0.25% : 0.000223s : 1: renormalize.infer 0.25% : 0.000222s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000133s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000089s : 1: symbol_engine_optimizer 77.69% : 0.068426s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.72% : 0.002393s : 1: type_inference 0.07% : 0.000062s : 1: validate TotalTime = 0.0803355, [21] [bootstrap]: 0.0003133 [type_inference]: 0.00231664 [auto_monad]: 0.00011142 [graph_reusing]: 1.87987e-06 [inline]: 1.17021e-06 [parallel-infer-symbol]: 1.91014e-06 [pre_auto_parallel]: 2.39699e-05 [insert-virtual-dataset]: 1.93994e-06 [parallel-infer-symbol-second]: 3.70201e-07 [dataset_repeat_opt]: 8.50298e-07 [pipeline_split]: 1.06031e-06 [optimize]: 0.00708619, [52] [py_interpret_to_execute]: 1.61403e-05 [rewriter_before_opt_a]: 3.15802e-05 [opt_a]: 0.00545051, [2] [Cycle 1]: 0.00154029, [43] [expand_dump_flag]: 2.82004e-06 [switch_simplify]: 2.95201e-05 [loop_unroll]: 1.35801e-05 [a_1]: 0.00033676 [recompute_prepare]: 8.86014e-06 [updatestate_depend_eliminate]: 7.68993e-06 [updatestate_assign_eliminate]: 5.52973e-06 [updatestate_loads_eliminate]: 6.63009e-06 [parameter_eliminate]: 2.47033e-06 [a_2]: 0.0001192 [accelerated_algorithm]: 8.64035e-06 [shard]: 1.51014e-06 [meta_shard_fg_expand]: 2.93972e-06 [shard_inline]: 9.02032e-06 [auto_parallel]: 1.07703e-05 [parallel]: 5.72996e-06 [flash_sp]: 8.82009e-06 [merge_comm]: 7.05011e-06 [allreduce_fusion]: 5.39981e-06 [matmul_add_comm_reduction]: 9.37004e-06 [allreduce_slice_to_reducescatter]: 4.89876e-07 [virtual_shard_identity]: 9.37004e-06 [virtual_dataset]: 8.46991e-06 [get_grad_eliminate_]: 8.19005e-06 [virtual_output]: 7.78027e-06 [merge_forward]: 5.5097e-06 [cell_reuse_recompute_pass]: 1.70013e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.69701e-05 [before_grad]: 1.35899e-05 [inplace_validation]: 4.73997e-06 [meta_fg_expand]: 5.60004e-06 [inplace_validation_after_expand]: 5.84964e-06 [flash_sp_send_recv_attached]: 3.71039e-06 [receive_attached]: 2.00979e-06 [after_resolve]: 1.114e-05 [a_after_grad]: 1.28499e-05 [special_op_eliminate]: 8.06991e-06 [renormalize]: 0.0004784 [add_forward_monad_depend]: 2.59979e-06 [auto_monad_grad]: 1.57999e-06 [auto_monad_eliminator]: 2.76999e-05 [cse]: 2.62898e-05 [a_3]: 6.10603e-05 [Cycle 2]: 0.00079113, [43] [expand_dump_flag]: 1.17999e-06 [switch_simplify]: 9.18005e-06 [loop_unroll]: 7.94977e-06 [a_1]: 0.00020658 [recompute_prepare]: 7.95024e-06 [updatestate_depend_eliminate]: 6.00982e-06 [updatestate_assign_eliminate]: 4.9402e-06 [updatestate_loads_eliminate]: 4.85033e-06 [parameter_eliminate]: 1.01002e-06 [a_2]: 0.0001085 [accelerated_algorithm]: 8.91974e-06 [shard]: 1.20001e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 8.17003e-06 [auto_parallel]: 1.02301e-05 [parallel]: 3.0701e-06 [flash_sp]: 2.6701e-06 [merge_comm]: 5.72018e-06 [allreduce_fusion]: 5.22006e-06 [matmul_add_comm_reduction]: 7.22008e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 9.37982e-06 [virtual_dataset]: 8.33021e-06 [get_grad_eliminate_]: 7.72998e-06 [virtual_output]: 7.78027e-06 [merge_forward]: 4.5402e-06 [cell_reuse_recompute_pass]: 2.04006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51503e-05 [before_grad]: 1.27e-05 [inplace_validation]: 4.17978e-06 [meta_fg_expand]: 4.61983e-06 [inplace_validation_after_expand]: 5.38025e-06 [flash_sp_send_recv_attached]: 8.69855e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 9.63965e-06 [a_after_grad]: 1.24397e-05 [special_op_eliminate]: 7.56001e-06 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 8.00006e-07 [auto_monad_grad]: 9.19681e-07 [auto_monad_eliminator]: 1.66604e-05 [cse]: 1.90502e-05 [a_3]: 5.21704e-05 [py_interpret_to_execute_after_opt_a]: 9.26014e-06 [slice_cell_reuse_recomputed_activation]: 2.26032e-06 [rewriter_after_opt_a]: 0.00012635 [convert_after_rewriter]: 9.91998e-06 [order_py_execute_after_rewriter]: 5.81983e-06 [opt_b]: 0.00025185, [1] [Cycle 1]: 0.00024652, [7] [b_1]: 0.00017012 [b_2]: 1.02897e-05 [updatestate_depend_eliminate]: 5.4501e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.15999e-06 [renormalize]: 2.69618e-07 [cse]: 1.91401e-05 [optimize_parallel_all_gather_comm]: 7.60984e-06 [overlap_param_gather]: 8.60076e-07 [cconv]: 1.651e-05 [loop_unroll]: 0.00048904 [opt_after_cconv]: 0.0001316, [1] [Cycle 1]: 0.00012562, [7] [c_1]: 5.34803e-05 [parameter_eliminate]: 2.01026e-06 [updatestate_depend_eliminate]: 7.58003e-06 [updatestate_assign_eliminate]: 4.72041e-06 [updatestate_loads_eliminate]: 4.93973e-06 [cse]: 1.95997e-05 [renormalize]: 2.99886e-07 [remove_dup_value]: 1.10101e-05 [tuple_transform]: 6.73202e-05, [1] [Cycle 1]: 6.29602e-05, [2] [d_1]: 5.389e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.87987e-06 [add_cache_embedding]: 1.20401e-05 [add_recomputation]: 5.41401e-05 [cse_after_recomputation]: 2.59797e-05, [1] [Cycle 1]: 2.14302e-05, [1] [cse]: 1.647e-05 [environ_conv]: 6.51972e-06 [swap_dp_allreduce_reducescatter]: 7.22008e-06 [bias_add_comm_swap]: 1.79e-06 [label_micro_interleaved_index]: 1.97021e-06 [label_fine_grained_interleaved_index]: 1.20979e-06 [merge_cast_opt]: 7.79983e-07 [slice_recompute_activation]: 1.13994e-06 [micro_interleaved_order_control]: 1.79e-06 [assign_add_opt]: 6.48014e-06 [ForceFp32Comm]: 7.00355e-07 [remove_cast_before_assign_add]: 6.29574e-07 [full_micro_interleaved_order_control]: 1.53994e-06 [reorder_send_recv_between_fp_bp]: 1.39e-06 [comm_op_add_attrs]: 5.89993e-07 [add_comm_op_reuse_tag]: 6.3004e-07 [interleave_split_concat_branches]: 5.49946e-07 [interleave_parallel_branches]: 5.20144e-07 [overlap_opt_shard_in_pipeline]: 6.59842e-07 [overlap_opt_shard_grad_in_pipeline]: 1.34995e-06 [control_data_broadcast_order]: 7.39936e-07 [grouped_pairwise_exchange_alltoall]: 7.69738e-07 [offloading_packed_experts]: 1.02026e-06 [overlap_recompute_and_grad_model_parallel]: 1.34995e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.39701e-07 [overlap_recompute_allgather_and_fa_grad]: 7.39936e-07 [overlap_grad_ring_attention]: 1.31968e-06 [overlap_grad_flash_sp]: 1.24797e-05 [begin_end_overlap_inline]: 5.49946e-07 [split_matmul_comm_elemetwise]: 1.8198e-06 [split_layernorm_comm]: 1.30991e-06 [handle_group_info]: 5.80214e-07 [symbol_engine_optimizer]: 8.75997e-05, [1] [Cycle 1]: 8.28695e-05, [6] [build]: 3.59025e-06 [elim_shapecalc]: 1.22399e-05 [elim_not_effective]: 1.63601e-05 [opt_reshape]: 9.89018e-06 [fold_const_symbol]: 1.42003e-05 [renormalize]: 2.59839e-07 [pipeline_parallel_scheduler]: 1.39e-06 [auto_monad_reorder]: 2.39001e-05 [get_jit_bprop_graph]: 3.50177e-07 [rewriter_after_jit_bprop_graph]: 3.29688e-07 [eliminate_special_op_node]: 0.00050142 [distribtued_split]: 3.46201e-05 [validate]: 3.07602e-05 [task_emit]: 0.0696413 [execute]: 8.62032e-06 Sums bootstrap : 0.000313s : 0.41% type_inference : 0.002317s : 3.04% auto_monad : 0.000111s : 0.15% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000543s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.02% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000228s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000478s : 0.63% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000044s : 0.06% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000113s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000126s : 0.17% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000170s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000489s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000501s : 0.66% distribtued_split : 0.000035s : 0.05% validate : 0.000031s : 0.04% task_emit : 0.069641s : 91.33% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000120 63 4.34% : 0.000005s : 2: substitution.depend_value_elim 1.99% : 0.000002s : 5: substitution.elim_not_effective 2.16% : 0.000003s : 5: substitution.fold_const_symbol 5.08% : 0.000006s : 6: substitution.graph_param_transform 50.81% : 0.000061s : 1: substitution.inline 4.14% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.50% : 0.000004s : 6: substitution.load_eliminater 2.31% : 0.000003s : 2: substitution.reduce_all_const_elim 6.65% : 0.000008s : 10: substitution.remove_not_recompute_node 2.25% : 0.000003s : 2: substitution.replace_old_param 8.95% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.82% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002291 2 89.14% : 0.002043s : 1: type_inference.infer 10.86% : 0.000249s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000060 1 100.00% : 0.000060s : 1: match.inline ------[predicate.] 0.000229 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.15% : 0.000005s : 25: predicate.arithmetic_simplify 0.81% : 0.000002s : 13: predicate.cast_eliminate 0.87% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.25% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.92% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.94% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.52% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.27% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.99% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.30% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.54% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.64% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.49% : 0.000006s : 38: predicate.load_eliminater 1.25% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.16% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.86% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.84% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.61% : 0.000001s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.55% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.86% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.14% : 0.000003s : 13: predicate.reduce_eliminate 0.51% : 0.000001s : 12: predicate.remove_not_recompute_node 1.12% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.07% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.32% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.02% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.39% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 1.00% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.24% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.84% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.67% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.48% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.53% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.83% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000140 4 8.01% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.99% : 0.000129s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089305 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000059s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.14% : 0.000123s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000338s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.58% : 0.000515s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000498s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001116s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.18% : 0.000160s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.11% : 0.005454s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.29% : 0.000255s : 1: opt_b 7.94% : 0.007095s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.26% : 0.000234s : 1: renormalize.infer 0.27% : 0.000238s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000132s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 78.01% : 0.069667s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.61% : 0.002333s : 1: type_inference 0.07% : 0.000064s : 1: validate TotalTime = 0.0809104, [21] [bootstrap]: 0.00032341 [type_inference]: 0.00243963 [auto_monad]: 0.00013314 [graph_reusing]: 2.33017e-06 [inline]: 1.64984e-06 [parallel-infer-symbol]: 1.63959e-06 [pre_auto_parallel]: 2.639e-05 [insert-virtual-dataset]: 2.44984e-06 [parallel-infer-symbol-second]: 3.59956e-07 [dataset_repeat_opt]: 1.22981e-06 [pipeline_split]: 1.29966e-06 [optimize]: 0.00731852, [52] [py_interpret_to_execute]: 1.75298e-05 [rewriter_before_opt_a]: 3.62899e-05 [opt_a]: 0.00563164, [2] [Cycle 1]: 0.00162319, [43] [expand_dump_flag]: 3.21027e-06 [switch_simplify]: 2.99998e-05 [loop_unroll]: 1.43503e-05 [a_1]: 0.0003471 [recompute_prepare]: 1.02501e-05 [updatestate_depend_eliminate]: 8.99984e-06 [updatestate_assign_eliminate]: 6.02985e-06 [updatestate_loads_eliminate]: 7.02031e-06 [parameter_eliminate]: 3.05986e-06 [a_2]: 0.00012379 [accelerated_algorithm]: 9.60007e-06 [shard]: 2.36975e-06 [meta_shard_fg_expand]: 3.92972e-06 [shard_inline]: 8.88994e-06 [auto_parallel]: 1.23098e-05 [parallel]: 6.90017e-06 [flash_sp]: 9.75979e-06 [merge_comm]: 7.7798e-06 [allreduce_fusion]: 6.23008e-06 [matmul_add_comm_reduction]: 1.06497e-05 [allreduce_slice_to_reducescatter]: 9.49949e-07 [virtual_shard_identity]: 1.07302e-05 [virtual_dataset]: 8.42009e-06 [get_grad_eliminate_]: 9.12976e-06 [virtual_output]: 7.89016e-06 [merge_forward]: 5.57024e-06 [cell_reuse_recompute_pass]: 2.00002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.77701e-05 [before_grad]: 1.375e-05 [inplace_validation]: 5.11995e-06 [meta_fg_expand]: 5.53997e-06 [inplace_validation_after_expand]: 6.2203e-06 [flash_sp_send_recv_attached]: 4.53973e-06 [receive_attached]: 2.50014e-06 [after_resolve]: 1.188e-05 [a_after_grad]: 1.411e-05 [special_op_eliminate]: 8.44011e-06 [renormalize]: 0.00050471 [add_forward_monad_depend]: 3.44962e-06 [auto_monad_grad]: 1.81003e-06 [auto_monad_eliminator]: 3.18801e-05 [cse]: 3.12198e-05 [a_3]: 6.402e-05 [Cycle 2]: 0.00079904, [43] [expand_dump_flag]: 1.03004e-06 [switch_simplify]: 9.45013e-06 [loop_unroll]: 7.80029e-06 [a_1]: 0.00020549 [recompute_prepare]: 8.17003e-06 [updatestate_depend_eliminate]: 6.18026e-06 [updatestate_assign_eliminate]: 5.30994e-06 [updatestate_loads_eliminate]: 5.63031e-06 [parameter_eliminate]: 1.19023e-06 [a_2]: 0.0001103 [accelerated_algorithm]: 8.95979e-06 [shard]: 1.36998e-06 [meta_shard_fg_expand]: 2.59979e-06 [shard_inline]: 8.14022e-06 [auto_parallel]: 1.093e-05 [parallel]: 3.75975e-06 [flash_sp]: 3.31039e-06 [merge_comm]: 5.9898e-06 [allreduce_fusion]: 5.29969e-06 [matmul_add_comm_reduction]: 8.74e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 8.89041e-06 [virtual_dataset]: 8.04011e-06 [get_grad_eliminate_]: 7.87992e-06 [virtual_output]: 7.3202e-06 [merge_forward]: 4.54998e-06 [cell_reuse_recompute_pass]: 2.00002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.63498e-05 [before_grad]: 1.28103e-05 [inplace_validation]: 4.59002e-06 [meta_fg_expand]: 5.03985e-06 [inplace_validation_after_expand]: 5.01005e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 7.30157e-07 [after_resolve]: 1.01803e-05 [a_after_grad]: 1.23898e-05 [special_op_eliminate]: 7.79005e-06 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 9.09902e-07 [auto_monad_grad]: 1.22981e-06 [auto_monad_eliminator]: 1.92099e-05 [cse]: 2.01799e-05 [a_3]: 5.23599e-05 [py_interpret_to_execute_after_opt_a]: 1.00899e-05 [slice_cell_reuse_recomputed_activation]: 1.91992e-06 [rewriter_after_opt_a]: 0.00013693 [convert_after_rewriter]: 9.94001e-06 [order_py_execute_after_rewriter]: 6.42007e-06 [opt_b]: 0.00025135, [1] [Cycle 1]: 0.00024562, [7] [b_1]: 0.00017007 [b_2]: 1.02501e-05 [updatestate_depend_eliminate]: 5.56977e-06 [updatestate_assign_eliminate]: 4.4601e-06 [updatestate_loads_eliminate]: 5.22984e-06 [renormalize]: 4.50294e-07 [cse]: 1.88397e-05 [optimize_parallel_all_gather_comm]: 8.86014e-06 [overlap_param_gather]: 1.29035e-06 [cconv]: 2.21301e-05 [loop_unroll]: 0.00048051 [opt_after_cconv]: 0.00013659, [1] [Cycle 1]: 0.0001308, [7] [c_1]: 5.43999e-05 [parameter_eliminate]: 2.35997e-06 [updatestate_depend_eliminate]: 8.69995e-06 [updatestate_assign_eliminate]: 5.0799e-06 [updatestate_loads_eliminate]: 5.44032e-06 [cse]: 2.22502e-05 [renormalize]: 3.90224e-07 [remove_dup_value]: 1.21398e-05 [tuple_transform]: 7.13198e-05, [1] [Cycle 1]: 6.66501e-05, [2] [d_1]: 5.69099e-05 [renormalize]: 2.00234e-07 [partial_unused_args_eliminate]: 1.79e-06 [add_cache_embedding]: 1.27004e-05 [add_recomputation]: 6.183e-05 [cse_after_recomputation]: 2.75997e-05, [1] [Cycle 1]: 2.26102e-05, [1] [cse]: 1.77496e-05 [environ_conv]: 6.85034e-06 [swap_dp_allreduce_reducescatter]: 8.06013e-06 [bias_add_comm_swap]: 2.14996e-06 [label_micro_interleaved_index]: 1.81003e-06 [label_fine_grained_interleaved_index]: 1.95019e-06 [merge_cast_opt]: 1.20001e-06 [slice_recompute_activation]: 1.53016e-06 [micro_interleaved_order_control]: 1.57021e-06 [assign_add_opt]: 7.61962e-06 [ForceFp32Comm]: 8.09785e-07 [remove_cast_before_assign_add]: 7.59959e-07 [full_micro_interleaved_order_control]: 1.74996e-06 [reorder_send_recv_between_fp_bp]: 1.58977e-06 [comm_op_add_attrs]: 1.22981e-06 [add_comm_op_reuse_tag]: 7.59959e-07 [interleave_split_concat_branches]: 5.89993e-07 [interleave_parallel_branches]: 6.80331e-07 [overlap_opt_shard_in_pipeline]: 9.29926e-07 [overlap_opt_shard_grad_in_pipeline]: 1.87987e-06 [control_data_broadcast_order]: 8.29808e-07 [grouped_pairwise_exchange_alltoall]: 1.05985e-06 [offloading_packed_experts]: 1.02026e-06 [overlap_recompute_and_grad_model_parallel]: 1.60001e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.49946e-07 [overlap_recompute_allgather_and_fa_grad]: 8.70321e-07 [overlap_grad_ring_attention]: 1.95019e-06 [overlap_grad_flash_sp]: 1.40402e-05 [begin_end_overlap_inline]: 7.70204e-07 [split_matmul_comm_elemetwise]: 1.74996e-06 [split_layernorm_comm]: 1.59023e-06 [handle_group_info]: 9.20147e-07 [symbol_engine_optimizer]: 8.83299e-05, [1] [Cycle 1]: 8.39103e-05, [6] [build]: 4.42984e-06 [elim_shapecalc]: 1.29901e-05 [elim_not_effective]: 1.71801e-05 [opt_reshape]: 9.32021e-06 [fold_const_symbol]: 1.45501e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.43982e-06 [auto_monad_reorder]: 2.94899e-05 [get_jit_bprop_graph]: 4.69852e-07 [rewriter_after_jit_bprop_graph]: 5.60191e-07 [eliminate_special_op_node]: 0.00049262 [distribtued_split]: 3.97e-05 [validate]: 3.376e-05 [task_emit]: 0.0698087 [execute]: 9.2499e-06 Sums bootstrap : 0.000323s : 0.42% type_inference : 0.002440s : 3.18% auto_monad : 0.000133s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000018s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000553s : 0.72% optimize.opt_a.recompute_prepare : 0.000018s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000234s : 0.31% optimize.opt_a.accelerated_algorithm : 0.000019s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000017s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000505s : 0.66% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000116s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000137s : 0.18% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000170s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000481s : 0.63% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000493s : 0.64% distribtued_split : 0.000040s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.069809s : 90.99% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000132 63 4.65% : 0.000006s : 2: substitution.depend_value_elim 1.88% : 0.000002s : 5: substitution.elim_not_effective 1.76% : 0.000002s : 5: substitution.fold_const_symbol 5.21% : 0.000007s : 6: substitution.graph_param_transform 51.71% : 0.000068s : 1: substitution.inline 4.10% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.29% : 0.000004s : 6: substitution.load_eliminater 2.32% : 0.000003s : 2: substitution.reduce_all_const_elim 5.84% : 0.000008s : 10: substitution.remove_not_recompute_node 2.51% : 0.000003s : 2: substitution.replace_old_param 8.78% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.94% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002412 2 88.19% : 0.002127s : 1: type_inference.infer 11.81% : 0.000285s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000228 1420 0.84% : 0.000002s : 13: predicate.accumulaten_eliminater 1.26% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.17% : 0.000005s : 25: predicate.arithmetic_simplify 0.77% : 0.000002s : 13: predicate.cast_eliminate 0.88% : 0.000002s : 12: predicate.check_bprop_eliminate 0.80% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.38% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.80% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.90% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 2.01% : 0.000005s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.29% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.92% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.63% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.13% : 0.000003s : 12: predicate.less_batch_normalization 1.79% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.35% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.81% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.84% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.79% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000002s : 13: predicate.reduce_eliminate 0.50% : 0.000001s : 12: predicate.remove_not_recompute_node 1.21% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.92% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.08% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.06% : 0.000002s : 12: predicate.shard_identity_eliminate 1.29% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.23% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.35% : 0.000010s : 43: predicate.switch_simplify 0.71% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.76% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.53% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.59% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.49% : 0.000003s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.39% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.57% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000157 4 9.95% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.05% : 0.000141s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090168 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000349s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.56% : 0.000507s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000490s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.27% : 0.001142s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.18% : 0.000160s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000035s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.25% : 0.005635s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.28% : 0.000255s : 1: opt_b 8.13% : 0.007327s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.33% : 0.000294s : 1: renormalize.infer 0.23% : 0.000204s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000142s : 1: rewriter_after_opt_a 0.05% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 77.45% : 0.069835s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.73% : 0.002458s : 1: type_inference 0.08% : 0.000068s : 1: validate TotalTime = 0.0821576, [21] [bootstrap]: 0.00032123 [type_inference]: 0.00255995 [auto_monad]: 0.00013725 [graph_reusing]: 2.20025e-06 [inline]: 1.81003e-06 [parallel-infer-symbol]: 2.39024e-06 [pre_auto_parallel]: 2.58698e-05 [insert-virtual-dataset]: 3.0701e-06 [parallel-infer-symbol-second]: 4.69852e-07 [dataset_repeat_opt]: 1.66008e-06 [pipeline_split]: 1.79e-06 [optimize]: 0.00736817, [52] [py_interpret_to_execute]: 1.72104e-05 [rewriter_before_opt_a]: 3.475e-05 [opt_a]: 0.00562142, [2] [Cycle 1]: 0.00159603, [43] [expand_dump_flag]: 3.72995e-06 [switch_simplify]: 2.98899e-05 [loop_unroll]: 1.312e-05 [a_1]: 0.00034579 [recompute_prepare]: 8.82987e-06 [updatestate_depend_eliminate]: 9.0301e-06 [updatestate_assign_eliminate]: 6.29015e-06 [updatestate_loads_eliminate]: 7.80029e-06 [parameter_eliminate]: 3.68012e-06 [a_2]: 0.00011893 [accelerated_algorithm]: 8.5202e-06 [shard]: 2.14018e-06 [meta_shard_fg_expand]: 4.38001e-06 [shard_inline]: 8.04989e-06 [auto_parallel]: 1.24704e-05 [parallel]: 7.75e-06 [flash_sp]: 1.055e-05 [merge_comm]: 8.08993e-06 [allreduce_fusion]: 5.89993e-06 [matmul_add_comm_reduction]: 1.15898e-05 [allreduce_slice_to_reducescatter]: 4.70318e-07 [virtual_shard_identity]: 9.05991e-06 [virtual_dataset]: 7.52974e-06 [get_grad_eliminate_]: 7.7798e-06 [virtual_output]: 7.51996e-06 [merge_forward]: 6.38003e-06 [cell_reuse_recompute_pass]: 1.89012e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.83e-05 [before_grad]: 1.45203e-05 [inplace_validation]: 6.09038e-06 [meta_fg_expand]: 5.74021e-06 [inplace_validation_after_expand]: 6.88015e-06 [flash_sp_send_recv_attached]: 5.62007e-06 [receive_attached]: 2.72971e-06 [after_resolve]: 1.16103e-05 [a_after_grad]: 1.26301e-05 [special_op_eliminate]: 8.04011e-06 [renormalize]: 0.00045123 [add_forward_monad_depend]: 3.55998e-06 [auto_monad_grad]: 1.94972e-06 [auto_monad_eliminator]: 3.28198e-05 [cse]: 3.12598e-05 [a_3]: 5.962e-05 [Cycle 2]: 0.00079191, [43] [expand_dump_flag]: 1.15018e-06 [switch_simplify]: 9.04035e-06 [loop_unroll]: 7.6103e-06 [a_1]: 0.00020625 [recompute_prepare]: 7.83009e-06 [updatestate_depend_eliminate]: 6.19981e-06 [updatestate_assign_eliminate]: 4.97978e-06 [updatestate_loads_eliminate]: 4.99003e-06 [parameter_eliminate]: 1.36998e-06 [a_2]: 0.00010945 [accelerated_algorithm]: 9.07993e-06 [shard]: 1.38022e-06 [meta_shard_fg_expand]: 2.6403e-06 [shard_inline]: 8.24034e-06 [auto_parallel]: 1.13398e-05 [parallel]: 3.32994e-06 [flash_sp]: 3.47011e-06 [merge_comm]: 5.86966e-06 [allreduce_fusion]: 4.52995e-06 [matmul_add_comm_reduction]: 7.7798e-06 [allreduce_slice_to_reducescatter]: 3.1013e-07 [virtual_shard_identity]: 9.11998e-06 [virtual_dataset]: 7.66013e-06 [get_grad_eliminate_]: 7.32997e-06 [virtual_output]: 7.2401e-06 [merge_forward]: 4.90993e-06 [cell_reuse_recompute_pass]: 1.83005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55699e-05 [before_grad]: 1.28699e-05 [inplace_validation]: 4.65987e-06 [meta_fg_expand]: 4.97e-06 [inplace_validation_after_expand]: 5.27035e-06 [flash_sp_send_recv_attached]: 9.89996e-07 [receive_attached]: 7.30157e-07 [after_resolve]: 1.00601e-05 [a_after_grad]: 1.24797e-05 [special_op_eliminate]: 7.59028e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.89879e-07 [auto_monad_grad]: 1.13994e-06 [auto_monad_eliminator]: 1.86302e-05 [cse]: 1.93799e-05 [a_3]: 4.87496e-05 [py_interpret_to_execute_after_opt_a]: 9.14e-06 [slice_cell_reuse_recomputed_activation]: 2.25008e-06 [rewriter_after_opt_a]: 0.00013693 [convert_after_rewriter]: 1.116e-05 [order_py_execute_after_rewriter]: 6.06012e-06 [opt_b]: 0.00024435, [1] [Cycle 1]: 0.00023876, [7] [b_1]: 0.00016381 [b_2]: 9.85013e-06 [updatestate_depend_eliminate]: 5.67967e-06 [updatestate_assign_eliminate]: 4.60958e-06 [updatestate_loads_eliminate]: 5.19026e-06 [renormalize]: 1.99769e-07 [cse]: 1.83699e-05 [optimize_parallel_all_gather_comm]: 8.81031e-06 [overlap_param_gather]: 1.55019e-06 [cconv]: 2.23098e-05 [loop_unroll]: 0.00053818 [opt_after_cconv]: 0.00013819, [1] [Cycle 1]: 0.00013164, [7] [c_1]: 5.40903e-05 [parameter_eliminate]: 2.28034e-06 [updatestate_depend_eliminate]: 8.30973e-06 [updatestate_assign_eliminate]: 5.03007e-06 [updatestate_loads_eliminate]: 5.28991e-06 [cse]: 2.18004e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.16997e-05 [tuple_transform]: 7.02101e-05, [1] [Cycle 1]: 6.56e-05, [2] [d_1]: 5.59199e-05 [renormalize]: 2.00234e-07 [partial_unused_args_eliminate]: 1.91014e-06 [add_cache_embedding]: 1.44499e-05 [add_recomputation]: 6.24e-05 [cse_after_recomputation]: 2.59299e-05, [1] [Cycle 1]: 2.12998e-05, [1] [cse]: 1.61799e-05 [environ_conv]: 7.59959e-06 [swap_dp_allreduce_reducescatter]: 6.97002e-06 [bias_add_comm_swap]: 2.51038e-06 [label_micro_interleaved_index]: 1.70991e-06 [label_fine_grained_interleaved_index]: 2.20025e-06 [merge_cast_opt]: 1.39978e-06 [slice_recompute_activation]: 2.13971e-06 [micro_interleaved_order_control]: 1.89012e-06 [assign_add_opt]: 6.88015e-06 [ForceFp32Comm]: 8.49832e-07 [remove_cast_before_assign_add]: 1.0496e-06 [full_micro_interleaved_order_control]: 1.8999e-06 [reorder_send_recv_between_fp_bp]: 2.43029e-06 [comm_op_add_attrs]: 8.00006e-07 [add_comm_op_reuse_tag]: 1.09011e-06 [interleave_split_concat_branches]: 1.05985e-06 [interleave_parallel_branches]: 6.20261e-07 [overlap_opt_shard_in_pipeline]: 1.11014e-06 [overlap_opt_shard_grad_in_pipeline]: 2.23005e-06 [control_data_broadcast_order]: 1.22003e-06 [grouped_pairwise_exchange_alltoall]: 1.60001e-06 [offloading_packed_experts]: 1.30013e-06 [overlap_recompute_and_grad_model_parallel]: 2.48011e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.20979e-06 [overlap_recompute_allgather_and_fa_grad]: 9.80217e-07 [overlap_grad_ring_attention]: 1.24006e-06 [overlap_grad_flash_sp]: 1.506e-05 [begin_end_overlap_inline]: 7.59959e-07 [split_matmul_comm_elemetwise]: 2.42982e-06 [split_layernorm_comm]: 1.57999e-06 [handle_group_info]: 1.26008e-06 [symbol_engine_optimizer]: 8.58903e-05, [1] [Cycle 1]: 8.11699e-05, [6] [build]: 3.77977e-06 [elim_shapecalc]: 1.16602e-05 [elim_not_effective]: 1.666e-05 [opt_reshape]: 8.72975e-06 [fold_const_symbol]: 1.39298e-05 [renormalize]: 1.99769e-07 [pipeline_parallel_scheduler]: 1.64006e-06 [auto_monad_reorder]: 2.89502e-05 [get_jit_bprop_graph]: 8.2003e-07 [rewriter_after_jit_bprop_graph]: 4.80097e-07 [eliminate_special_op_node]: 0.00052147 [distribtued_split]: 4.13801e-05 [validate]: 3.54899e-05 [task_emit]: 0.0708469 [execute]: 1.15898e-05 Sums bootstrap : 0.000321s : 0.41% type_inference : 0.002560s : 3.29% auto_monad : 0.000137s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000552s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000228s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000451s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000137s : 0.18% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000538s : 0.69% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000521s : 0.67% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.070847s : 90.95% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000133 63 5.17% : 0.000007s : 2: substitution.depend_value_elim 2.07% : 0.000003s : 5: substitution.elim_not_effective 1.78% : 0.000002s : 5: substitution.fold_const_symbol 5.26% : 0.000007s : 6: substitution.graph_param_transform 51.07% : 0.000068s : 1: substitution.inline 4.12% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.13% : 0.000004s : 6: substitution.load_eliminater 2.80% : 0.000004s : 2: substitution.reduce_all_const_elim 6.40% : 0.000009s : 10: substitution.remove_not_recompute_node 2.20% : 0.000003s : 2: substitution.replace_old_param 8.50% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.48% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002530 2 88.58% : 0.002241s : 1: type_inference.infer 11.42% : 0.000289s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000231 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.26% : 0.000005s : 25: predicate.arithmetic_simplify 0.78% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.40% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.46% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.96% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.86% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.25% : 0.000001s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.49% : 0.000013s : 63: predicate.inline 1.11% : 0.000003s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.15% : 0.000003s : 12: predicate.less_batch_normalization 1.72% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000006s : 38: predicate.load_eliminater 1.39% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.82% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.80% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.24% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.23% : 0.000003s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.80% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.56% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.40% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.42% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.81% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.81% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.53% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000164 4 9.63% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.37% : 0.000148s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091375 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.16% : 0.000150s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000346s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.59% : 0.000536s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.60% : 0.000549s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001115s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.16% : 0.005626s : 1: opt_a 0.16% : 0.000142s : 1: opt_after_cconv 0.27% : 0.000247s : 1: opt_b 8.07% : 0.007376s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.27% : 0.000245s : 1: renormalize.infer 0.22% : 0.000200s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000143s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000089s : 1: symbol_engine_optimizer 77.56% : 0.070874s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.82% : 0.002579s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.0832977, [21] [bootstrap]: 0.00035382 [type_inference]: 0.00263593 [auto_monad]: 0.00021801 [graph_reusing]: 2.86987e-06 [inline]: 1.39e-06 [parallel-infer-symbol]: 2.40002e-06 [pre_auto_parallel]: 2.80403e-05 [insert-virtual-dataset]: 2.82004e-06 [parallel-infer-symbol-second]: 3.89758e-07 [dataset_repeat_opt]: 1.36998e-06 [pipeline_split]: 1.6503e-06 [optimize]: 0.00744902, [52] [py_interpret_to_execute]: 1.89203e-05 [rewriter_before_opt_a]: 3.82997e-05 [opt_a]: 0.00567996, [2] [Cycle 1]: 0.00167206, [43] [expand_dump_flag]: 4.10993e-06 [switch_simplify]: 3.12701e-05 [loop_unroll]: 1.35899e-05 [a_1]: 0.00035532 [recompute_prepare]: 9.11998e-06 [updatestate_depend_eliminate]: 9.0301e-06 [updatestate_assign_eliminate]: 6.31995e-06 [updatestate_loads_eliminate]: 8.64035e-06 [parameter_eliminate]: 3.74019e-06 [a_2]: 0.00017106 [accelerated_algorithm]: 9.2499e-06 [shard]: 2.86987e-06 [meta_shard_fg_expand]: 4.0601e-06 [shard_inline]: 9.41986e-06 [auto_parallel]: 1.30897e-05 [parallel]: 8.48016e-06 [flash_sp]: 1.33603e-05 [merge_comm]: 8.71997e-06 [allreduce_fusion]: 6.3302e-06 [matmul_add_comm_reduction]: 1.18399e-05 [allreduce_slice_to_reducescatter]: 4.90341e-07 [virtual_shard_identity]: 9.77982e-06 [virtual_dataset]: 7.80961e-06 [get_grad_eliminate_]: 8.1202e-06 [virtual_output]: 7.94977e-06 [merge_forward]: 6.27991e-06 [cell_reuse_recompute_pass]: 2.44007e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.807e-05 [before_grad]: 1.36397e-05 [inplace_validation]: 6.07967e-06 [meta_fg_expand]: 6.08014e-06 [inplace_validation_after_expand]: 6.76978e-06 [flash_sp_send_recv_attached]: 5.47012e-06 [receive_attached]: 2.98023e-06 [after_resolve]: 1.22101e-05 [a_after_grad]: 1.24397e-05 [special_op_eliminate]: 8.66968e-06 [renormalize]: 0.0004695 [add_forward_monad_depend]: 3.82029e-06 [auto_monad_grad]: 1.95997e-06 [auto_monad_eliminator]: 3.36301e-05 [cse]: 3.62499e-05 [a_3]: 5.94398e-05 [Cycle 2]: 0.00080628, [43] [expand_dump_flag]: 1.15018e-06 [switch_simplify]: 9.61032e-06 [loop_unroll]: 7.79005e-06 [a_1]: 0.00020286 [recompute_prepare]: 7.8897e-06 [updatestate_depend_eliminate]: 6.23986e-06 [updatestate_assign_eliminate]: 5.11995e-06 [updatestate_loads_eliminate]: 4.92996e-06 [parameter_eliminate]: 1.43982e-06 [a_2]: 0.00010555 [accelerated_algorithm]: 8.40006e-06 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 2.73017e-06 [shard_inline]: 8.16025e-06 [auto_parallel]: 1.11097e-05 [parallel]: 3.62005e-06 [flash_sp]: 3.30992e-06 [merge_comm]: 5.90971e-06 [allreduce_fusion]: 4.92996e-06 [matmul_add_comm_reduction]: 7.7798e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 9.1102e-06 [virtual_dataset]: 7.7798e-06 [get_grad_eliminate_]: 7.18003e-06 [virtual_output]: 7.68015e-06 [merge_forward]: 4.57e-06 [cell_reuse_recompute_pass]: 1.89012e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.54697e-05 [before_grad]: 1.28299e-05 [inplace_validation]: 4.24962e-06 [meta_fg_expand]: 5.01005e-06 [inplace_validation_after_expand]: 5.77979e-06 [flash_sp_send_recv_attached]: 9.49949e-07 [receive_attached]: 1.05007e-06 [after_resolve]: 1.03801e-05 [a_after_grad]: 1.21701e-05 [special_op_eliminate]: 7.77002e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.69855e-07 [auto_monad_grad]: 1.2801e-06 [auto_monad_eliminator]: 1.92598e-05 [cse]: 1.95899e-05 [a_3]: 4.96404e-05 [py_interpret_to_execute_after_opt_a]: 8.86014e-06 [slice_cell_reuse_recomputed_activation]: 2.78e-06 [rewriter_after_opt_a]: 0.00015106 [convert_after_rewriter]: 1.12699e-05 [order_py_execute_after_rewriter]: 6.31995e-06 [opt_b]: 0.0002489, [1] [Cycle 1]: 0.00024248, [7] [b_1]: 0.00016387 [b_2]: 1.00401e-05 [updatestate_depend_eliminate]: 5.17024e-06 [updatestate_assign_eliminate]: 4.36977e-06 [updatestate_loads_eliminate]: 5.28991e-06 [renormalize]: 3.39933e-07 [cse]: 1.93701e-05 [optimize_parallel_all_gather_comm]: 8.51974e-06 [overlap_param_gather]: 1.97021e-06 [cconv]: 2.60901e-05 [loop_unroll]: 0.00050065 [opt_after_cconv]: 0.00013899, [1] [Cycle 1]: 0.00013166, [7] [c_1]: 5.39399e-05 [parameter_eliminate]: 2.47033e-06 [updatestate_depend_eliminate]: 8.10018e-06 [updatestate_assign_eliminate]: 5.0799e-06 [updatestate_loads_eliminate]: 5.30016e-06 [cse]: 2.12896e-05 [renormalize]: 4.20026e-07 [remove_dup_value]: 1.56499e-05 [tuple_transform]: 7.20001e-05, [1] [Cycle 1]: 6.693e-05, [2] [d_1]: 5.665e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 2.1602e-06 [add_cache_embedding]: 1.45701e-05 [add_recomputation]: 6.59702e-05 [cse_after_recomputation]: 2.77096e-05, [1] [Cycle 1]: 2.23503e-05, [1] [cse]: 1.69603e-05 [environ_conv]: 7.89016e-06 [swap_dp_allreduce_reducescatter]: 8.31019e-06 [bias_add_comm_swap]: 2.17976e-06 [label_micro_interleaved_index]: 2.01026e-06 [label_fine_grained_interleaved_index]: 2.37022e-06 [merge_cast_opt]: 1.4198e-06 [slice_recompute_activation]: 2.33995e-06 [micro_interleaved_order_control]: 1.95997e-06 [assign_add_opt]: 7.49016e-06 [ForceFp32Comm]: 9.79751e-07 [remove_cast_before_assign_add]: 1.11992e-06 [full_micro_interleaved_order_control]: 2.31992e-06 [reorder_send_recv_between_fp_bp]: 2.54018e-06 [comm_op_add_attrs]: 1.17999e-06 [add_comm_op_reuse_tag]: 1.06031e-06 [interleave_split_concat_branches]: 8.801e-07 [interleave_parallel_branches]: 1.20001e-06 [overlap_opt_shard_in_pipeline]: 1.86032e-06 [overlap_opt_shard_grad_in_pipeline]: 2.33995e-06 [control_data_broadcast_order]: 1.41002e-06 [grouped_pairwise_exchange_alltoall]: 1.26986e-06 [offloading_packed_experts]: 1.13016e-06 [overlap_recompute_and_grad_model_parallel]: 2.23983e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.15018e-06 [overlap_recompute_allgather_and_fa_grad]: 1.19023e-06 [overlap_grad_ring_attention]: 2.25008e-06 [overlap_grad_flash_sp]: 1.67298e-05 [begin_end_overlap_inline]: 7.90227e-07 [split_matmul_comm_elemetwise]: 2.06009e-06 [split_layernorm_comm]: 2.14018e-06 [handle_group_info]: 9.99775e-07 [symbol_engine_optimizer]: 9.22601e-05, [1] [Cycle 1]: 8.7e-05, [6] [build]: 4.1998e-06 [elim_shapecalc]: 1.295e-05 [elim_not_effective]: 1.645e-05 [opt_reshape]: 9.43011e-06 [fold_const_symbol]: 1.48001e-05 [renormalize]: 2.59839e-07 [pipeline_parallel_scheduler]: 2.06009e-06 [auto_monad_reorder]: 3.161e-05 [get_jit_bprop_graph]: 5.0012e-07 [rewriter_after_jit_bprop_graph]: 5.90459e-07 [eliminate_special_op_node]: 0.00056142 [distribtued_split]: 4.37303e-05 [validate]: 3.76101e-05 [task_emit]: 0.0716399 [execute]: 1.308e-05 Sums bootstrap : 0.000354s : 0.45% type_inference : 0.002636s : 3.34% auto_monad : 0.000218s : 0.28% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.02% optimize.rewriter_before_opt_a : 0.000038s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000558s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000277s : 0.35% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000017s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.01% optimize.opt_a.after_resolve : 0.000023s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000470s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000053s : 0.07% optimize.opt_a.cse : 0.000056s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000151s : 0.19% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000026s : 0.03% optimize.loop_unroll : 0.000501s : 0.63% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000016s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000066s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000561s : 0.71% distribtued_split : 0.000044s : 0.06% validate : 0.000038s : 0.05% task_emit : 0.071640s : 90.65% execute : 0.000013s : 0.02% TotalTime = 0.0835247, [21] [bootstrap]: 0.00039622 [type_inference]: 0.00289578 [auto_monad]: 0.00013478 [graph_reusing]: 2.76975e-06 [inline]: 1.38022e-06 [parallel-infer-symbol]: 2.09967e-06 [pre_auto_parallel]: 2.56896e-05 [insert-virtual-dataset]: 2.86009e-06 [parallel-infer-symbol-second]: 3.89758e-07 [dataset_repeat_opt]: 1.41002e-06 [pipeline_split]: 1.48034e-06 [optimize]: 0.00753098, [52] [py_interpret_to_execute]: 3.56296e-05 [rewriter_before_opt_a]: 3.76399e-05 [opt_a]: 0.00577678, [2] [Cycle 1]: 0.00174362, [43] [expand_dump_flag]: 4.06988e-06 [switch_simplify]: 4.52199e-05 [loop_unroll]: 1.42599e-05 [a_1]: 0.00042209 [recompute_prepare]: 9.77982e-06 [updatestate_depend_eliminate]: 8.97003e-06 [updatestate_assign_eliminate]: 6.11041e-06 [updatestate_loads_eliminate]: 7.90041e-06 [parameter_eliminate]: 3.41004e-06 [a_2]: 0.00012143 [accelerated_algorithm]: 8.79029e-06 [shard]: 2.21981e-06 [meta_shard_fg_expand]: 4.24031e-06 [shard_inline]: 8.80007e-06 [auto_parallel]: 1.21198e-05 [parallel]: 7.87014e-06 [flash_sp]: 1.12304e-05 [merge_comm]: 7.63033e-06 [allreduce_fusion]: 5.22984e-06 [matmul_add_comm_reduction]: 1.10702e-05 [allreduce_slice_to_reducescatter]: 4.49829e-07 [virtual_shard_identity]: 9.21031e-06 [virtual_dataset]: 8.92021e-06 [get_grad_eliminate_]: 8.29017e-06 [virtual_output]: 8.08993e-06 [merge_forward]: 6.16023e-06 [cell_reuse_recompute_pass]: 1.95997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.75e-05 [before_grad]: 1.39102e-05 [inplace_validation]: 5.72996e-06 [meta_fg_expand]: 5.58002e-06 [inplace_validation_after_expand]: 6.42007e-06 [flash_sp_send_recv_attached]: 5.59026e-06 [receive_attached]: 2.71993e-06 [after_resolve]: 1.16001e-05 [a_after_grad]: 1.274e-05 [special_op_eliminate]: 8.2301e-06 [renormalize]: 0.00052666 [add_forward_monad_depend]: 3.58978e-06 [auto_monad_grad]: 2.10013e-06 [auto_monad_eliminator]: 3.53302e-05 [cse]: 3.47202e-05 [a_3]: 6.099e-05 [Cycle 2]: 0.00080423, [43] [expand_dump_flag]: 1.13016e-06 [switch_simplify]: 9.81987e-06 [loop_unroll]: 8.42009e-06 [a_1]: 0.00020752 [recompute_prepare]: 7.81007e-06 [updatestate_depend_eliminate]: 6.32973e-06 [updatestate_assign_eliminate]: 5.16977e-06 [updatestate_loads_eliminate]: 5.56e-06 [parameter_eliminate]: 1.32015e-06 [a_2]: 0.00010967 [accelerated_algorithm]: 9.03988e-06 [shard]: 1.22003e-06 [meta_shard_fg_expand]: 2.76975e-06 [shard_inline]: 8.48994e-06 [auto_parallel]: 1.11e-05 [parallel]: 3.41004e-06 [flash_sp]: 3.0403e-06 [merge_comm]: 5.91995e-06 [allreduce_fusion]: 5.02961e-06 [matmul_add_comm_reduction]: 8.21985e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 9.39006e-06 [virtual_dataset]: 8.31997e-06 [get_grad_eliminate_]: 7.96001e-06 [virtual_output]: 7.81985e-06 [merge_forward]: 4.66965e-06 [cell_reuse_recompute_pass]: 1.91014e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.592e-05 [before_grad]: 1.28304e-05 [inplace_validation]: 4.44986e-06 [meta_fg_expand]: 4.76977e-06 [inplace_validation_after_expand]: 5.47012e-06 [flash_sp_send_recv_attached]: 1.01002e-06 [receive_attached]: 7.00355e-07 [after_resolve]: 1.03703e-05 [a_after_grad]: 1.253e-05 [special_op_eliminate]: 7.59028e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.801e-07 [auto_monad_grad]: 1.30991e-06 [auto_monad_eliminator]: 1.891e-05 [cse]: 2.00402e-05 [a_3]: 5.08199e-05 [py_interpret_to_execute_after_opt_a]: 8.77958e-06 [slice_cell_reuse_recomputed_activation]: 2.22027e-06 [rewriter_after_opt_a]: 0.00014485 [convert_after_rewriter]: 1.16499e-05 [order_py_execute_after_rewriter]: 6.53975e-06 [opt_b]: 0.00025211, [1] [Cycle 1]: 0.00024639, [7] [b_1]: 0.00016937 [b_2]: 1.097e-05 [updatestate_depend_eliminate]: 5.70994e-06 [updatestate_assign_eliminate]: 4.61983e-06 [updatestate_loads_eliminate]: 5.35976e-06 [renormalize]: 3.7998e-07 [cse]: 1.89696e-05 [optimize_parallel_all_gather_comm]: 8.82009e-06 [overlap_param_gather]: 1.43982e-06 [cconv]: 2.37497e-05 [loop_unroll]: 0.00050559 [opt_after_cconv]: 0.00013583, [1] [Cycle 1]: 0.00012945, [7] [c_1]: 5.44302e-05 [parameter_eliminate]: 2.53972e-06 [updatestate_depend_eliminate]: 8.38982e-06 [updatestate_assign_eliminate]: 4.80982e-06 [updatestate_loads_eliminate]: 5.44032e-06 [cse]: 2.209e-05 [renormalize]: 4.10248e-07 [remove_dup_value]: 1.42599e-05 [tuple_transform]: 7.055e-05, [1] [Cycle 1]: 6.60801e-05, [2] [d_1]: 5.663e-05 [renormalize]: 2.40281e-07 [partial_unused_args_eliminate]: 1.91014e-06 [add_cache_embedding]: 1.41901e-05 [add_recomputation]: 6.47302e-05 [cse_after_recomputation]: 2.685e-05, [1] [Cycle 1]: 2.207e-05, [1] [cse]: 1.69701e-05 [environ_conv]: 8.21007e-06 [swap_dp_allreduce_reducescatter]: 7.28993e-06 [bias_add_comm_swap]: 2.3297e-06 [label_micro_interleaved_index]: 2.50991e-06 [label_fine_grained_interleaved_index]: 2.01026e-06 [merge_cast_opt]: 1.23028e-06 [slice_recompute_activation]: 2.49967e-06 [micro_interleaved_order_control]: 1.57999e-06 [assign_add_opt]: 7.07991e-06 [ForceFp32Comm]: 7.5018e-07 [remove_cast_before_assign_add]: 1.10967e-06 [full_micro_interleaved_order_control]: 2.46009e-06 [reorder_send_recv_between_fp_bp]: 1.97021e-06 [comm_op_add_attrs]: 1.18976e-06 [add_comm_op_reuse_tag]: 1.07009e-06 [interleave_split_concat_branches]: 8.29808e-07 [interleave_parallel_branches]: 9.79751e-07 [overlap_opt_shard_in_pipeline]: 1.17999e-06 [overlap_opt_shard_grad_in_pipeline]: 2.17976e-06 [control_data_broadcast_order]: 1.34995e-06 [grouped_pairwise_exchange_alltoall]: 1.28988e-06 [offloading_packed_experts]: 1.19023e-06 [overlap_recompute_and_grad_model_parallel]: 1.89012e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.60194e-07 [overlap_recompute_allgather_and_fa_grad]: 1.08033e-06 [overlap_grad_ring_attention]: 1.67033e-06 [overlap_grad_flash_sp]: 1.55601e-05 [begin_end_overlap_inline]: 7.29691e-07 [split_matmul_comm_elemetwise]: 2.2701e-06 [split_layernorm_comm]: 1.64006e-06 [handle_group_info]: 9.29926e-07 [symbol_engine_optimizer]: 8.72002e-05, [1] [Cycle 1]: 8.268e-05, [6] [build]: 4.22029e-06 [elim_shapecalc]: 1.22301e-05 [elim_not_effective]: 1.63396e-05 [opt_reshape]: 9.32021e-06 [fold_const_symbol]: 1.45799e-05 [renormalize]: 4.49829e-07 [pipeline_parallel_scheduler]: 1.91992e-06 [auto_monad_reorder]: 3.00701e-05 [get_jit_bprop_graph]: 4.09782e-07 [rewriter_after_jit_bprop_graph]: 6.59842e-07 [eliminate_special_op_node]: 0.00059605 [distribtued_split]: 4.313e-05 [validate]: 3.72101e-05 [task_emit]: 0.0715641 [execute]: 1.08001e-05 Sums bootstrap : 0.000396s : 0.50% type_inference : 0.002896s : 3.65% auto_monad : 0.000135s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000036s : 0.04% optimize.rewriter_before_opt_a : 0.000038s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000055s : 0.07% optimize.opt_a.loop_unroll : 0.000023s : 0.03% optimize.opt_a.a_1 : 0.000630s : 0.79% optimize.opt_a.recompute_prepare : 0.000018s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000231s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000527s : 0.66% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000054s : 0.07% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000112s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000145s : 0.18% optimize.convert_after_rewriter : 0.000012s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000169s : 0.21% optimize.opt_b.b_2 : 0.000011s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000506s : 0.64% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000065s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000003s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000596s : 0.75% distribtued_split : 0.000043s : 0.05% validate : 0.000037s : 0.05% task_emit : 0.071564s : 90.24% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000142 63 4.89% : 0.000007s : 2: substitution.depend_value_elim 1.93% : 0.000003s : 5: substitution.elim_not_effective 1.88% : 0.000003s : 5: substitution.fold_const_symbol 5.38% : 0.000008s : 6: substitution.graph_param_transform 51.82% : 0.000074s : 1: substitution.inline 3.82% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.13% : 0.000004s : 6: substitution.load_eliminater 2.47% : 0.000004s : 2: substitution.reduce_all_const_elim 5.90% : 0.000008s : 10: substitution.remove_not_recompute_node 2.84% : 0.000004s : 2: substitution.replace_old_param 8.22% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.73% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002604 2 88.23% : 0.002298s : 1: type_inference.infer 11.77% : 0.000306s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000073 1 100.00% : 0.000073s : 1: match.inline ------[predicate.] 0.000232 1420 0.84% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.75% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.47% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.97% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.19% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.19% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.89% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.37% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.75% : 0.000013s : 63: predicate.inline 1.00% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.13% : 0.000003s : 12: predicate.less_batch_normalization 1.64% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000006s : 38: predicate.load_eliminater 1.40% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.75% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.72% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.73% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.89% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.17% : 0.000003s : 14: predicate.partial_defer_inline 1.18% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.66% : 0.000002s : 12: predicate.remove_not_recompute_node 1.17% : 0.000003s : 25: predicate.replace_applicator 0.50% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.74% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.88% : 0.000002s : 12: predicate.shard_identity_eliminate 1.46% : 0.000003s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.91% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.24% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.27% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.75% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.84% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.47% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.76% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.38% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.57% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.77% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.89% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000167 4 11.28% : 0.000019s : 1: func_graph_cloner_run.FuncGraphClonerGraph 88.72% : 0.000148s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092629 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000071s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.25% : 0.000234s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.41% : 0.000381s : 1: bootstrap 0.03% : 0.000030s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000052s : 1: distribtued_split 0.62% : 0.000577s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000511s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001130s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.14% : 0.005684s : 1: opt_a 0.15% : 0.000143s : 1: opt_after_cconv 0.27% : 0.000252s : 1: opt_b 8.05% : 0.007458s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000035s : 1: pre_auto_parallel 0.03% : 0.000025s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000020s : 1: remove_dup_value 0.28% : 0.000259s : 1: renormalize.infer 0.22% : 0.000205s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000157s : 1: rewriter_after_opt_a 0.05% : 0.000044s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000096s : 1: symbol_engine_optimizer 77.38% : 0.071675s : 1: task_emit 0.08% : 0.000076s : 1: tuple_transform 2.87% : 0.002655s : 1: type_inference 0.08% : 0.000075s : 1: validate Time group info: ------[substitution.] 0.000137 63 5.20% : 0.000007s : 2: substitution.depend_value_elim 1.98% : 0.000003s : 5: substitution.elim_not_effective 1.93% : 0.000003s : 5: substitution.fold_const_symbol 5.43% : 0.000007s : 6: substitution.graph_param_transform 50.52% : 0.000069s : 1: substitution.inline 4.11% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.24% : 0.000004s : 6: substitution.load_eliminater 2.47% : 0.000003s : 2: substitution.reduce_all_const_elim 5.91% : 0.000008s : 10: substitution.remove_not_recompute_node 2.67% : 0.000004s : 2: substitution.replace_old_param 8.81% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.72% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002864 2 86.56% : 0.002479s : 1: type_inference.infer 13.44% : 0.000385s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000233 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 1.09% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.92% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.59% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.89% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.21% : 0.000000s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.88% : 0.000004s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.04% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.89% : 0.000014s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.11% : 0.000003s : 12: predicate.less_batch_normalization 1.58% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.35% : 0.000005s : 38: predicate.load_eliminater 1.24% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.85% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.54% : 0.000001s : 6: predicate.parallel_virtual_node 1.27% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.74% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.23% : 0.000003s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.20% : 0.000003s : 25: predicate.replace_applicator 0.42% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.97% : 0.000002s : 12: predicate.shard_identity_eliminate 1.34% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.09% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.37% : 0.000006s : 38: predicate.stopgrad_eliminater 0.40% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.94% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.39% : 0.000010s : 43: predicate.switch_simplify 0.82% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.84% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.61% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.49% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.72% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.37% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.96% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.63% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000173 4 9.52% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.48% : 0.000157s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.093099 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000069s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.16% : 0.000148s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.45% : 0.000420s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000051s : 1: distribtued_split 0.66% : 0.000611s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000515s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.31% : 0.001223s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000160s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.21% : 0.005781s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.27% : 0.000255s : 1: opt_b 8.10% : 0.007539s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.04% : 0.000041s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.33% : 0.000309s : 1: renormalize.infer 0.23% : 0.000212s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000150s : 1: rewriter_after_opt_a 0.05% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 76.90% : 0.071595s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 3.13% : 0.002914s : 1: type_inference 0.08% : 0.000072s : 1: validate TotalTime = 0.0867691, [21] [bootstrap]: 0.00053556 [type_inference]: 0.00339646 [auto_monad]: 0.00014455 [graph_reusing]: 2.9197e-06 [inline]: 1.43005e-06 [parallel-infer-symbol]: 2.54996e-06 [pre_auto_parallel]: 2.938e-05 [insert-virtual-dataset]: 3.32994e-06 [parallel-infer-symbol-second]: 4.89876e-07 [dataset_repeat_opt]: 1.50036e-06 [pipeline_split]: 1.53994e-06 [optimize]: 0.00964852, [52] [py_interpret_to_execute]: 1.73301e-05 [rewriter_before_opt_a]: 4.031e-05 [opt_a]: 0.00777525, [2] [Cycle 1]: 0.00181108, [43] [expand_dump_flag]: 4.31994e-06 [switch_simplify]: 3.36696e-05 [loop_unroll]: 1.63298e-05 [a_1]: 0.00041392 [recompute_prepare]: 1.133e-05 [updatestate_depend_eliminate]: 9.55025e-06 [updatestate_assign_eliminate]: 6.44987e-06 [updatestate_loads_eliminate]: 7.96001e-06 [parameter_eliminate]: 3.36999e-06 [a_2]: 0.00014245 [accelerated_algorithm]: 1.05998e-05 [shard]: 2.31992e-06 [meta_shard_fg_expand]: 4.04008e-06 [shard_inline]: 1.03698e-05 [auto_parallel]: 1.29999e-05 [parallel]: 9.20007e-06 [flash_sp]: 1.251e-05 [merge_comm]: 9.30997e-06 [allreduce_fusion]: 6.76e-06 [matmul_add_comm_reduction]: 1.20699e-05 [allreduce_slice_to_reducescatter]: 4.79631e-07 [virtual_shard_identity]: 1.16602e-05 [virtual_dataset]: 9.83989e-06 [get_grad_eliminate_]: 9.60985e-06 [virtual_output]: 8.86992e-06 [merge_forward]: 6.51041e-06 [cell_reuse_recompute_pass]: 1.97999e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.22498e-05 [before_grad]: 1.72397e-05 [inplace_validation]: 5.83986e-06 [meta_fg_expand]: 6.38003e-06 [inplace_validation_after_expand]: 7.83009e-06 [flash_sp_send_recv_attached]: 4.92996e-06 [receive_attached]: 2.63983e-06 [after_resolve]: 1.46502e-05 [a_after_grad]: 1.59899e-05 [special_op_eliminate]: 1.03898e-05 [renormalize]: 0.00049027 [add_forward_monad_depend]: 3.51993e-06 [auto_monad_grad]: 1.96975e-06 [auto_monad_eliminator]: 3.49698e-05 [cse]: 6.32401e-05 [a_3]: 6.996e-05 [Cycle 2]: 0.00094265, [43] [expand_dump_flag]: 1.2801e-06 [switch_simplify]: 1.12099e-05 [loop_unroll]: 9.68017e-06 [a_1]: 0.00025654 [recompute_prepare]: 9.23965e-06 [updatestate_depend_eliminate]: 6.8699e-06 [updatestate_assign_eliminate]: 5.37978e-06 [updatestate_loads_eliminate]: 5.73974e-06 [parameter_eliminate]: 1.49012e-06 [a_2]: 0.00012745 [accelerated_algorithm]: 1.01603e-05 [shard]: 1.43982e-06 [meta_shard_fg_expand]: 2.98023e-06 [shard_inline]: 9.65968e-06 [auto_parallel]: 1.17403e-05 [parallel]: 3.80026e-06 [flash_sp]: 3.36999e-06 [merge_comm]: 7.24988e-06 [allreduce_fusion]: 5.51017e-06 [matmul_add_comm_reduction]: 8.7996e-06 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 1.06501e-05 [virtual_dataset]: 1.22101e-05 [get_grad_eliminate_]: 9.26014e-06 [virtual_output]: 8.84011e-06 [merge_forward]: 5.68014e-06 [cell_reuse_recompute_pass]: 2.33017e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.97999e-05 [before_grad]: 1.59098e-05 [inplace_validation]: 5.10039e-06 [meta_fg_expand]: 5.84964e-06 [inplace_validation_after_expand]: 6.17001e-06 [flash_sp_send_recv_attached]: 1.03982e-06 [receive_attached]: 9.00123e-07 [after_resolve]: 1.23302e-05 [a_after_grad]: 1.491e-05 [special_op_eliminate]: 9.49996e-06 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 1.03004e-06 [auto_monad_grad]: 1.24006e-06 [auto_monad_eliminator]: 2.07596e-05 [cse]: 2.15499e-05 [a_3]: 5.91702e-05 [py_interpret_to_execute_after_opt_a]: 9.70019e-06 [slice_cell_reuse_recomputed_activation]: 2.46987e-06 [rewriter_after_opt_a]: 0.00014567 [convert_after_rewriter]: 1.17398e-05 [order_py_execute_after_rewriter]: 6.75023e-06 [opt_b]: 0.00028806, [1] [Cycle 1]: 0.00028181, [7] [b_1]: 0.00019695 [b_2]: 1.19503e-05 [updatestate_depend_eliminate]: 5.86035e-06 [updatestate_assign_eliminate]: 4.80004e-06 [updatestate_loads_eliminate]: 5.81983e-06 [renormalize]: 4.10248e-07 [cse]: 2.09101e-05 [optimize_parallel_all_gather_comm]: 9.14e-06 [overlap_param_gather]: 1.81003e-06 [cconv]: 2.48002e-05 [loop_unroll]: 0.00052458 [opt_after_cconv]: 0.0001534, [1] [Cycle 1]: 0.00014703, [7] [c_1]: 6.46398e-05 [parameter_eliminate]: 2.71015e-06 [updatestate_depend_eliminate]: 9.07993e-06 [updatestate_assign_eliminate]: 4.86011e-06 [updatestate_loads_eliminate]: 6.17001e-06 [cse]: 2.29902e-05 [renormalize]: 3.89758e-07 [remove_dup_value]: 1.39801e-05 [tuple_transform]: 8.54102e-05, [1] [Cycle 1]: 8.03801e-05, [2] [d_1]: 6.96001e-05 [renormalize]: 2.79862e-07 [partial_unused_args_eliminate]: 2.06009e-06 [add_cache_embedding]: 1.47698e-05 [add_recomputation]: 6.85598e-05 [cse_after_recomputation]: 2.86102e-05, [1] [Cycle 1]: 2.33599e-05, [1] [cse]: 1.79298e-05 [environ_conv]: 7.53999e-06 [swap_dp_allreduce_reducescatter]: 8.31997e-06 [bias_add_comm_swap]: 2.23983e-06 [label_micro_interleaved_index]: 1.96975e-06 [label_fine_grained_interleaved_index]: 2.51969e-06 [merge_cast_opt]: 1.30013e-06 [slice_recompute_activation]: 2.10013e-06 [micro_interleaved_order_control]: 1.8198e-06 [assign_add_opt]: 7.68015e-06 [ForceFp32Comm]: 8.70321e-07 [remove_cast_before_assign_add]: 1.03004e-06 [full_micro_interleaved_order_control]: 2.33995e-06 [reorder_send_recv_between_fp_bp]: 2.59979e-06 [comm_op_add_attrs]: 9.79751e-07 [add_comm_op_reuse_tag]: 1.08965e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 9.89996e-07 [overlap_opt_shard_in_pipeline]: 1.15996e-06 [overlap_opt_shard_grad_in_pipeline]: 2.11969e-06 [control_data_broadcast_order]: 1.17999e-06 [grouped_pairwise_exchange_alltoall]: 1.29035e-06 [offloading_packed_experts]: 1.09989e-06 [overlap_recompute_and_grad_model_parallel]: 2.12016e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.39587e-07 [overlap_recompute_allgather_and_fa_grad]: 1.16974e-06 [overlap_grad_ring_attention]: 2.56998e-06 [overlap_grad_flash_sp]: 1.64099e-05 [begin_end_overlap_inline]: 1.0198e-06 [split_matmul_comm_elemetwise]: 2.11969e-06 [split_layernorm_comm]: 2.17976e-06 [handle_group_info]: 9.69972e-07 [symbol_engine_optimizer]: 9.955e-05, [1] [Cycle 1]: 9.50699e-05, [6] [build]: 4.99003e-06 [elim_shapecalc]: 1.40802e-05 [elim_not_effective]: 1.94302e-05 [opt_reshape]: 1.05104e-05 [fold_const_symbol]: 1.752e-05 [renormalize]: 2.59839e-07 [pipeline_parallel_scheduler]: 1.77976e-06 [auto_monad_reorder]: 3.08496e-05 [get_jit_bprop_graph]: 4.69852e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00054328 [distribtued_split]: 4.64399e-05 [validate]: 3.71002e-05 [task_emit]: 0.0720568 [execute]: 1.31899e-05 Sums bootstrap : 0.000536s : 0.66% type_inference : 0.003396s : 4.21% auto_monad : 0.000145s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000029s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000040s : 0.05% optimize.opt_a.expand_dump_flag : 0.000006s : 0.01% optimize.opt_a.switch_simplify : 0.000045s : 0.06% optimize.opt_a.loop_unroll : 0.000026s : 0.03% optimize.opt_a.a_1 : 0.000670s : 0.83% optimize.opt_a.recompute_prepare : 0.000021s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000270s : 0.33% optimize.opt_a.accelerated_algorithm : 0.000021s : 0.03% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000020s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000017s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000021s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000022s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000019s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000042s : 0.05% optimize.opt_a.before_grad : 0.000033s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000027s : 0.03% optimize.opt_a.a_after_grad : 0.000031s : 0.04% optimize.opt_a.special_op_eliminate : 0.000020s : 0.02% optimize.opt_a.renormalize : 0.000490s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000056s : 0.07% optimize.opt_a.cse : 0.000085s : 0.11% optimize.opt_a.a_3 : 0.000129s : 0.16% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.18% optimize.convert_after_rewriter : 0.000012s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000197s : 0.24% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000021s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000525s : 0.65% optimize.opt_after_cconv.c_1 : 0.000065s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000070s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000069s : 0.09% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000019s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000018s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000543s : 0.67% distribtued_split : 0.000046s : 0.06% validate : 0.000037s : 0.05% task_emit : 0.072057s : 89.35% execute : 0.000013s : 0.02% Time group info: ------[substitution.] 0.000154 63 4.95% : 0.000008s : 2: substitution.depend_value_elim 2.16% : 0.000003s : 5: substitution.elim_not_effective 2.26% : 0.000003s : 5: substitution.fold_const_symbol 5.87% : 0.000009s : 6: substitution.graph_param_transform 48.07% : 0.000074s : 1: substitution.inline 4.45% : 0.000007s : 10: substitution.j_node_and_user_rematch 3.50% : 0.000005s : 6: substitution.load_eliminater 2.75% : 0.000004s : 2: substitution.reduce_all_const_elim 6.89% : 0.000011s : 10: substitution.remove_not_recompute_node 2.84% : 0.000004s : 2: substitution.replace_old_param 8.43% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 7.83% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.003364 2 90.99% : 0.003061s : 1: type_inference.infer 9.01% : 0.000303s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000073 1 100.00% : 0.000073s : 1: match.inline ------[predicate.] 0.000269 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.15% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.75% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.13% : 0.000006s : 25: predicate.arithmetic_simplify 0.77% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.50% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.33% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000005s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.29% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.82% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 6.20% : 0.000017s : 63: predicate.inline 1.18% : 0.000003s : 12: predicate.inline_without_move 0.43% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000003s : 12: predicate.less_batch_normalization 1.69% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000007s : 38: predicate.load_eliminater 1.23% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.71% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.12% : 0.000003s : 14: predicate.partial_defer_inline 1.33% : 0.000004s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000003s : 13: predicate.reduce_eliminate 0.62% : 0.000002s : 12: predicate.remove_not_recompute_node 1.21% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.88% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 0.96% : 0.000003s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.57% : 0.000004s : 18: predicate.special_op_eliminate 1.03% : 0.000003s : 12: predicate.specialize_transform 1.15% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.87% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.31% : 0.000006s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.94% : 0.000003s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.08% : 0.000011s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.76% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.79% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.80% : 0.000008s : 37: predicate.tuple_list_get_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.38% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.55% : 0.000010s : 50: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000167 4 10.10% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.90% : 0.000151s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.098605 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000074s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.16% : 0.000159s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.57% : 0.000563s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000055s : 1: distribtued_split 0.57% : 0.000558s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000023s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000010s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000535s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.37% : 0.001350s : 80: opt.transform.opt_a 0.06% : 0.000063s : 1: opt.transform.opt_after_cconv 0.19% : 0.000185s : 27: opt.transform.opt_b 0.07% : 0.000068s : 1: opt.transform.opt_trans_graph 0.04% : 0.000038s : 3: opt.transform.special_op_eliminate 0.06% : 0.000057s : 4: opt.transform.symbol_engine_opt 7.89% : 0.007779s : 1: opt_a 0.16% : 0.000158s : 1: opt_after_cconv 0.30% : 0.000292s : 1: opt_b 9.79% : 0.009657s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000006s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000036s : 1: pre_auto_parallel 0.02% : 0.000023s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.27% : 0.000269s : 1: renormalize.infer 0.22% : 0.000215s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000153s : 1: rewriter_after_opt_a 0.05% : 0.000045s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000103s : 1: symbol_engine_optimizer 73.11% : 0.072091s : 1: task_emit 0.09% : 0.000089s : 1: tuple_transform 3.46% : 0.003416s : 1: type_inference 0.08% : 0.000074s : 1: validate TotalTime = 0.0872773, [21] [bootstrap]: 0.00054996 [type_inference]: 0.00357068 [auto_monad]: 0.00010627 [graph_reusing]: 1.60001e-06 [inline]: 1.68988e-06 [parallel-infer-symbol]: 1.2503e-06 [pre_auto_parallel]: 2.346e-05 [insert-virtual-dataset]: 1.47987e-06 [parallel-infer-symbol-second]: 4.49829e-07 [dataset_repeat_opt]: 6.79865e-07 [pipeline_split]: 9.09902e-07 [optimize]: 0.00969228, [52] [py_interpret_to_execute]: 1.71899e-05 [rewriter_before_opt_a]: 3.05502e-05 [opt_a]: 0.00799972, [2] [Cycle 1]: 0.00159279, [43] [expand_dump_flag]: 2.38977e-06 [switch_simplify]: 2.74898e-05 [loop_unroll]: 1.308e-05 [a_1]: 0.00034537 [recompute_prepare]: 9.05013e-06 [updatestate_depend_eliminate]: 8.92999e-06 [updatestate_assign_eliminate]: 6.4699e-06 [updatestate_loads_eliminate]: 6.00005e-06 [parameter_eliminate]: 2.57976e-06 [a_2]: 0.00011508 [accelerated_algorithm]: 8.2301e-06 [shard]: 1.64006e-06 [meta_shard_fg_expand]: 2.88012e-06 [shard_inline]: 9.39006e-06 [auto_parallel]: 1.27601e-05 [parallel]: 4.97e-06 [flash_sp]: 6.42985e-06 [merge_comm]: 6.99982e-06 [allreduce_fusion]: 5.84964e-06 [matmul_add_comm_reduction]: 8.71997e-06 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 1.03302e-05 [virtual_dataset]: 8.52998e-06 [get_grad_eliminate_]: 7.9996e-06 [virtual_output]: 8.3698e-06 [merge_forward]: 4.92018e-06 [cell_reuse_recompute_pass]: 1.64984e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.83596e-05 [before_grad]: 1.37002e-05 [inplace_validation]: 4.40003e-06 [meta_fg_expand]: 5.37001e-06 [inplace_validation_after_expand]: 5.41983e-06 [flash_sp_send_recv_attached]: 2.73017e-06 [receive_attached]: 1.51992e-06 [after_resolve]: 1.14203e-05 [a_after_grad]: 1.27899e-05 [special_op_eliminate]: 8.04989e-06 [renormalize]: 0.00047039 [add_forward_monad_depend]: 3.19025e-06 [auto_monad_grad]: 1.72993e-06 [auto_monad_eliminator]: 2.48398e-05 [cse]: 2.365e-05 [a_3]: 5.95399e-05 [Cycle 2]: 0.00079136, [43] [expand_dump_flag]: 1.17021e-06 [switch_simplify]: 9.22987e-06 [loop_unroll]: 7.70018e-06 [a_1]: 0.00020609 [recompute_prepare]: 7.28993e-06 [updatestate_depend_eliminate]: 6.0997e-06 [updatestate_assign_eliminate]: 5.06034e-06 [updatestate_loads_eliminate]: 5.18002e-06 [parameter_eliminate]: 1.26008e-06 [a_2]: 0.00010623 [accelerated_algorithm]: 8.57981e-06 [shard]: 1.26008e-06 [meta_shard_fg_expand]: 2.81027e-06 [shard_inline]: 7.94977e-06 [auto_parallel]: 1.08099e-05 [parallel]: 3.56976e-06 [flash_sp]: 2.37999e-06 [merge_comm]: 5.68992e-06 [allreduce_fusion]: 4.88991e-06 [matmul_add_comm_reduction]: 7.63964e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.95979e-06 [virtual_dataset]: 7.43987e-06 [get_grad_eliminate_]: 9.79006e-06 [virtual_output]: 7.60006e-06 [merge_forward]: 4.75021e-06 [cell_reuse_recompute_pass]: 2.10013e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.61203e-05 [before_grad]: 1.25999e-05 [inplace_validation]: 4.15044e-06 [meta_fg_expand]: 4.88991e-06 [inplace_validation_after_expand]: 5.09014e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 7.79983e-07 [after_resolve]: 9.72021e-06 [a_after_grad]: 1.173e-05 [special_op_eliminate]: 7.70018e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.89879e-07 [auto_monad_grad]: 1.17999e-06 [auto_monad_eliminator]: 1.904e-05 [cse]: 2.04202e-05 [a_3]: 4.96199e-05 [py_interpret_to_execute_after_opt_a]: 9.05013e-06 [slice_cell_reuse_recomputed_activation]: 2.06009e-06 [rewriter_after_opt_a]: 0.00013049 [convert_after_rewriter]: 1.188e-05 [order_py_execute_after_rewriter]: 5.01005e-06 [opt_b]: 0.0002501, [1] [Cycle 1]: 0.0002441, [7] [b_1]: 0.00016503 [b_2]: 1.00899e-05 [updatestate_depend_eliminate]: 5.70016e-06 [updatestate_assign_eliminate]: 4.48991e-06 [updatestate_loads_eliminate]: 5.02961e-06 [renormalize]: 4.30271e-07 [cse]: 2.04202e-05 [optimize_parallel_all_gather_comm]: 8.42009e-06 [overlap_param_gather]: 1.11992e-06 [cconv]: 1.51601e-05 [loop_unroll]: 0.00051795 [opt_after_cconv]: 0.00013896, [1] [Cycle 1]: 0.00013247, [7] [c_1]: 5.48298e-05 [parameter_eliminate]: 2.72039e-06 [updatestate_depend_eliminate]: 8.48016e-06 [updatestate_assign_eliminate]: 4.62029e-06 [updatestate_loads_eliminate]: 5.32996e-06 [cse]: 2.29301e-05 [renormalize]: 3.89758e-07 [remove_dup_value]: 1.01598e-05 [tuple_transform]: 7.08001e-05, [1] [Cycle 1]: 6.60601e-05, [2] [d_1]: 5.59599e-05 [renormalize]: 1.60187e-07 [partial_unused_args_eliminate]: 1.35042e-06 [add_cache_embedding]: 1.28197e-05 [add_recomputation]: 5.486e-05 [cse_after_recomputation]: 2.93399e-05, [1] [Cycle 1]: 2.41301e-05, [1] [cse]: 1.88299e-05 [environ_conv]: 6.73998e-06 [swap_dp_allreduce_reducescatter]: 7.06967e-06 [bias_add_comm_swap]: 1.40024e-06 [label_micro_interleaved_index]: 1.13016e-06 [label_fine_grained_interleaved_index]: 1.03982e-06 [merge_cast_opt]: 5.49946e-07 [slice_recompute_activation]: 9.20147e-07 [micro_interleaved_order_control]: 1.34017e-06 [assign_add_opt]: 6.58957e-06 [ForceFp32Comm]: 5.19678e-07 [remove_cast_before_assign_add]: 4.60073e-07 [full_micro_interleaved_order_control]: 9.20147e-07 [reorder_send_recv_between_fp_bp]: 8.49832e-07 [comm_op_add_attrs]: 5.69969e-07 [add_comm_op_reuse_tag]: 4.39584e-07 [interleave_split_concat_branches]: 6.59842e-07 [interleave_parallel_branches]: 4.60073e-07 [overlap_opt_shard_in_pipeline]: 1.30991e-06 [overlap_opt_shard_grad_in_pipeline]: 8.29808e-07 [control_data_broadcast_order]: 6.9011e-07 [grouped_pairwise_exchange_alltoall]: 5.49946e-07 [offloading_packed_experts]: 7.39936e-07 [overlap_recompute_and_grad_model_parallel]: 8.40053e-07 [overlap_grad_matmul_and_grad_allreduce]: 3.89758e-07 [overlap_recompute_allgather_and_fa_grad]: 4.4005e-07 [overlap_grad_ring_attention]: 9.89996e-07 [overlap_grad_flash_sp]: 1.24201e-05 [begin_end_overlap_inline]: 3.59956e-07 [split_matmul_comm_elemetwise]: 9.09902e-07 [split_layernorm_comm]: 7.69738e-07 [handle_group_info]: 4.00003e-07 [symbol_engine_optimizer]: 9.062e-05, [1] [Cycle 1]: 8.55299e-05, [6] [build]: 4.00981e-06 [elim_shapecalc]: 1.32103e-05 [elim_not_effective]: 1.69603e-05 [opt_reshape]: 9.26014e-06 [fold_const_symbol]: 1.40402e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 9.80217e-07 [auto_monad_reorder]: 2.07797e-05 [get_jit_bprop_graph]: 2.79862e-07 [rewriter_after_jit_bprop_graph]: 2.29571e-07 [eliminate_special_op_node]: 0.00053201 [distribtued_split]: 3.37102e-05 [validate]: 3.21902e-05 [task_emit]: 0.0724484 [execute]: 8.3698e-06 Sums bootstrap : 0.000550s : 0.68% type_inference : 0.003571s : 4.43% auto_monad : 0.000106s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000023s : 0.03% insert-virtual-dataset : 0.000001s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000031s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000037s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000551s : 0.68% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000221s : 0.27% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000009s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000470s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000044s : 0.05% optimize.opt_a.cse : 0.000044s : 0.05% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000130s : 0.16% optimize.convert_after_rewriter : 0.000012s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.20% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000518s : 0.64% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000055s : 0.07% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000000s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000000s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000000s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000021s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000532s : 0.66% distribtued_split : 0.000034s : 0.04% validate : 0.000032s : 0.04% task_emit : 0.072448s : 89.90% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000123 63 3.93% : 0.000005s : 2: substitution.depend_value_elim 1.88% : 0.000002s : 5: substitution.elim_not_effective 1.54% : 0.000002s : 5: substitution.fold_const_symbol 4.73% : 0.000006s : 6: substitution.graph_param_transform 51.42% : 0.000063s : 1: substitution.inline 4.09% : 0.000005s : 10: substitution.j_node_and_user_rematch 4.23% : 0.000005s : 6: substitution.load_eliminater 2.31% : 0.000003s : 2: substitution.reduce_all_const_elim 6.77% : 0.000008s : 10: substitution.remove_not_recompute_node 2.12% : 0.000003s : 2: substitution.replace_old_param 8.76% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.23% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.003546 2 93.22% : 0.003305s : 1: type_inference.infer 6.78% : 0.000240s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000062 1 100.00% : 0.000062s : 1: match.inline ------[predicate.] 0.000236 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.88% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.22% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.87% : 0.000002s : 12: predicate.check_bprop_eliminate 0.69% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.15% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.98% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.21% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000005s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.03% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.77% : 0.000002s : 12: predicate.get_grad_eliminate 0.31% : 0.000001s : 6: predicate.graph_param_transform 0.73% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.54% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.13% : 0.000003s : 12: predicate.less_batch_normalization 1.79% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000006s : 38: predicate.load_eliminater 1.24% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.28% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.86% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.81% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.55% : 0.000001s : 6: predicate.parallel_virtual_node 1.20% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.84% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.18% : 0.000003s : 13: predicate.reduce_eliminate 0.62% : 0.000001s : 12: predicate.remove_not_recompute_node 1.17% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.88% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.55% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.05% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.43% : 0.000010s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.68% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.66% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.85% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.26% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.50% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.87% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.90% : 0.000002s : 12: predicate.virtual_output_eliminate 0.47% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000157 4 5.97% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.03% : 0.000147s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.098835 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.06% : 0.000059s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.12% : 0.000120s : 1: auto_monad 0.03% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.58% : 0.000574s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000033s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000042s : 1: distribtued_split 0.55% : 0.000546s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.53% : 0.000529s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.12% : 0.001110s : 80: opt.transform.opt_a 0.05% : 0.000053s : 1: opt.transform.opt_after_cconv 0.16% : 0.000155s : 27: opt.transform.opt_b 0.05% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000034s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 8.10% : 0.008004s : 1: opt_a 0.15% : 0.000144s : 1: opt_after_cconv 0.26% : 0.000253s : 1: opt_b 9.82% : 0.009701s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.26% : 0.000254s : 1: renormalize.infer 0.21% : 0.000211s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000137s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000094s : 1: symbol_engine_optimizer 73.33% : 0.072475s : 1: task_emit 0.08% : 0.000075s : 1: tuple_transform 3.63% : 0.003591s : 1: type_inference 0.07% : 0.000066s : 1: validate TotalTime = 0.0785632, [21] [bootstrap]: 0.00031481 [type_inference]: 0.00222376 [auto_monad]: 0.00010061 [graph_reusing]: 1.74996e-06 [inline]: 1.39e-06 [parallel-infer-symbol]: 1.95019e-06 [pre_auto_parallel]: 2.11e-05 [insert-virtual-dataset]: 2.27988e-06 [parallel-infer-symbol-second]: 3.69735e-07 [dataset_repeat_opt]: 1.0198e-06 [pipeline_split]: 1.11992e-06 [optimize]: 0.00689567, [52] [py_interpret_to_execute]: 1.19498e-05 [rewriter_before_opt_a]: 3.041e-05 [opt_a]: 0.00527711, [2] [Cycle 1]: 0.0014424, [43] [expand_dump_flag]: 2.54018e-06 [switch_simplify]: 2.63997e-05 [loop_unroll]: 1.30399e-05 [a_1]: 0.00033166 [recompute_prepare]: 8.69995e-06 [updatestate_depend_eliminate]: 7.74022e-06 [updatestate_assign_eliminate]: 5.02961e-06 [updatestate_loads_eliminate]: 5.30016e-06 [parameter_eliminate]: 1.96975e-06 [a_2]: 0.00011239 [accelerated_algorithm]: 8.73022e-06 [shard]: 1.42027e-06 [meta_shard_fg_expand]: 3.09013e-06 [shard_inline]: 8.09971e-06 [auto_parallel]: 1.12304e-05 [parallel]: 5.38025e-06 [flash_sp]: 7.56001e-06 [merge_comm]: 7.43009e-06 [allreduce_fusion]: 5.11995e-06 [matmul_add_comm_reduction]: 8.92999e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 9.77982e-06 [virtual_dataset]: 8.66968e-06 [get_grad_eliminate_]: 7.83009e-06 [virtual_output]: 7.65966e-06 [merge_forward]: 4.75999e-06 [cell_reuse_recompute_pass]: 1.5297e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.68099e-05 [before_grad]: 1.35503e-05 [inplace_validation]: 4.36977e-06 [meta_fg_expand]: 5.30016e-06 [inplace_validation_after_expand]: 5.17024e-06 [flash_sp_send_recv_attached]: 4.23985e-06 [receive_attached]: 1.57021e-06 [after_resolve]: 1.06301e-05 [a_after_grad]: 1.28304e-05 [special_op_eliminate]: 7.98982e-06 [renormalize]: 0.00040708 [add_forward_monad_depend]: 2.54996e-06 [auto_monad_grad]: 1.62981e-06 [auto_monad_eliminator]: 2.33604e-05 [cse]: 2.721e-05 [a_3]: 5.84899e-05 [Cycle 2]: 0.00078179, [43] [expand_dump_flag]: 9.00123e-07 [switch_simplify]: 1.11703e-05 [loop_unroll]: 7.98982e-06 [a_1]: 0.00020482 [recompute_prepare]: 7.7202e-06 [updatestate_depend_eliminate]: 5.92042e-06 [updatestate_assign_eliminate]: 4.55976e-06 [updatestate_loads_eliminate]: 5.20004e-06 [parameter_eliminate]: 9.99775e-07 [a_2]: 0.00010565 [accelerated_algorithm]: 8.55001e-06 [shard]: 1.07009e-06 [meta_shard_fg_expand]: 2.42004e-06 [shard_inline]: 7.75e-06 [auto_parallel]: 1.00997e-05 [parallel]: 3.00026e-06 [flash_sp]: 2.80002e-06 [merge_comm]: 5.85988e-06 [allreduce_fusion]: 4.99003e-06 [matmul_add_comm_reduction]: 7.32997e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 8.80007e-06 [virtual_dataset]: 7.71973e-06 [get_grad_eliminate_]: 7.47014e-06 [virtual_output]: 7.07991e-06 [merge_forward]: 4.39025e-06 [cell_reuse_recompute_pass]: 1.62004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.53999e-05 [before_grad]: 1.26902e-05 [inplace_validation]: 4.36977e-06 [meta_fg_expand]: 4.65987e-06 [inplace_validation_after_expand]: 5.01983e-06 [flash_sp_send_recv_attached]: 8.60076e-07 [receive_attached]: 6.00237e-07 [after_resolve]: 1.00499e-05 [a_after_grad]: 1.21803e-05 [special_op_eliminate]: 7.62986e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.10251e-07 [auto_monad_grad]: 9.49949e-07 [auto_monad_eliminator]: 1.62101e-05 [cse]: 1.87401e-05 [a_3]: 4.95701e-05 [py_interpret_to_execute_after_opt_a]: 9.26992e-06 [slice_cell_reuse_recomputed_activation]: 1.75973e-06 [rewriter_after_opt_a]: 0.00014878 [convert_after_rewriter]: 1.00699e-05 [order_py_execute_after_rewriter]: 6.00005e-06 [opt_b]: 0.00024685, [1] [Cycle 1]: 0.00024166, [7] [b_1]: 0.00016595 [b_2]: 1.01998e-05 [updatestate_depend_eliminate]: 5.53019e-06 [updatestate_assign_eliminate]: 4.55976e-06 [updatestate_loads_eliminate]: 5.19026e-06 [renormalize]: 2.99886e-07 [cse]: 1.868e-05 [optimize_parallel_all_gather_comm]: 7.71042e-06 [overlap_param_gather]: 7.30157e-07 [cconv]: 1.25798e-05 [loop_unroll]: 0.00047872 [opt_after_cconv]: 0.00013116, [1] [Cycle 1]: 0.00012554, [7] [c_1]: 5.26099e-05 [parameter_eliminate]: 1.79e-06 [updatestate_depend_eliminate]: 7.18003e-06 [updatestate_assign_eliminate]: 4.69014e-06 [updatestate_loads_eliminate]: 5.22984e-06 [cse]: 2.04002e-05 [renormalize]: 3.30154e-07 [remove_dup_value]: 1.00099e-05 [tuple_transform]: 6.937e-05, [1] [Cycle 1]: 6.516e-05, [2] [d_1]: 5.57499e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.28988e-06 [add_cache_embedding]: 1.12201e-05 [add_recomputation]: 5.23902e-05 [cse_after_recomputation]: 2.78801e-05, [1] [Cycle 1]: 2.27201e-05, [1] [cse]: 1.73701e-05 [environ_conv]: 5.76023e-06 [swap_dp_allreduce_reducescatter]: 6.77025e-06 [bias_add_comm_swap]: 1.48965e-06 [label_micro_interleaved_index]: 1.05007e-06 [label_fine_grained_interleaved_index]: 1.2503e-06 [merge_cast_opt]: 5.60191e-07 [slice_recompute_activation]: 8.40053e-07 [micro_interleaved_order_control]: 1.22981e-06 [assign_add_opt]: 6.4401e-06 [ForceFp32Comm]: 8.60076e-07 [remove_cast_before_assign_add]: 5.89993e-07 [full_micro_interleaved_order_control]: 1.32015e-06 [reorder_send_recv_between_fp_bp]: 1.19023e-06 [comm_op_add_attrs]: 5.49946e-07 [add_comm_op_reuse_tag]: 5.80214e-07 [interleave_split_concat_branches]: 5.19678e-07 [interleave_parallel_branches]: 4.70318e-07 [overlap_opt_shard_in_pipeline]: 8.30274e-07 [overlap_opt_shard_grad_in_pipeline]: 1.13994e-06 [control_data_broadcast_order]: 6.59842e-07 [grouped_pairwise_exchange_alltoall]: 7.10133e-07 [offloading_packed_experts]: 6.39819e-07 [overlap_recompute_and_grad_model_parallel]: 1.15018e-06 [overlap_grad_matmul_and_grad_allreduce]: 3.39933e-07 [overlap_recompute_allgather_and_fa_grad]: 5.99772e-07 [overlap_grad_ring_attention]: 1.35973e-06 [overlap_grad_flash_sp]: 1.17403e-05 [begin_end_overlap_inline]: 4.10248e-07 [split_matmul_comm_elemetwise]: 1.11992e-06 [split_layernorm_comm]: 1.09011e-06 [handle_group_info]: 8.49832e-07 [symbol_engine_optimizer]: 8.37403e-05, [1] [Cycle 1]: 7.92402e-05, [6] [build]: 3.6601e-06 [elim_shapecalc]: 1.188e-05 [elim_not_effective]: 1.52201e-05 [opt_reshape]: 8.46991e-06 [fold_const_symbol]: 1.331e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.34995e-06 [auto_monad_reorder]: 2.306e-05 [get_jit_bprop_graph]: 3.30154e-07 [rewriter_after_jit_bprop_graph]: 2.5006e-07 [eliminate_special_op_node]: 0.00049485 [distribtued_split]: 3.35099e-05 [validate]: 3.02601e-05 [task_emit]: 0.0681809 [execute]: 8.21007e-06 Sums bootstrap : 0.000315s : 0.42% type_inference : 0.002224s : 2.98% auto_monad : 0.000101s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000536s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000218s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000407s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000046s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000149s : 0.20% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000166s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000013s : 0.02% optimize.loop_unroll : 0.000479s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.02% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000495s : 0.66% distribtued_split : 0.000034s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.068181s : 91.46% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000110 63 4.08% : 0.000004s : 2: substitution.depend_value_elim 1.93% : 0.000002s : 5: substitution.elim_not_effective 1.81% : 0.000002s : 5: substitution.fold_const_symbol 6.24% : 0.000007s : 6: substitution.graph_param_transform 49.28% : 0.000054s : 1: substitution.inline 4.40% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.58% : 0.000004s : 6: substitution.load_eliminater 2.19% : 0.000002s : 2: substitution.reduce_all_const_elim 6.62% : 0.000007s : 10: substitution.remove_not_recompute_node 2.22% : 0.000002s : 2: substitution.replace_old_param 9.52% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.13% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002200 2 89.82% : 0.001976s : 1: type_inference.infer 10.18% : 0.000224s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000230 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.04% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.75% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.39% : 0.000005s : 25: predicate.arithmetic_simplify 0.94% : 0.000002s : 13: predicate.cast_eliminate 0.84% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.18% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.22% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.87% : 0.000004s : 31: predicate.environ_get_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.86% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.23% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.84% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.60% : 0.000013s : 63: predicate.inline 1.07% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.76% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000006s : 38: predicate.load_eliminater 1.33% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.14% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.28% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000002s : 13: predicate.reduce_eliminate 0.59% : 0.000001s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.89% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.46% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.14% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.18% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.67% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.91% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.76% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.39% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.42% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000129 4 6.05% : 0.000008s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.95% : 0.000121s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087241 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000057s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000112s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000338s : 1: bootstrap 0.02% : 0.000016s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.04% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.58% : 0.000508s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.01% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000488s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001091s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.18% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 6.05% : 0.005281s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.29% : 0.000250s : 1: opt_b 7.91% : 0.006904s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.25% : 0.000220s : 1: renormalize.infer 0.21% : 0.000182s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000154s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000087s : 1: symbol_engine_optimizer 78.18% : 0.068205s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.57% : 0.002241s : 1: type_inference 0.07% : 0.000062s : 1: validate TotalTime = 0.0795313, [21] [bootstrap]: 0.00032096 [type_inference]: 0.00235788 [auto_monad]: 0.00012174 [graph_reusing]: 1.95997e-06 [inline]: 1.72993e-06 [parallel-infer-symbol]: 2.23983e-06 [pre_auto_parallel]: 2.17101e-05 [insert-virtual-dataset]: 2.40002e-06 [parallel-infer-symbol-second]: 3.70201e-07 [dataset_repeat_opt]: 1.23028e-06 [pipeline_split]: 1.35973e-06 [optimize]: 0.00706738, [52] [py_interpret_to_execute]: 1.42502e-05 [rewriter_before_opt_a]: 3.38401e-05 [opt_a]: 0.0054178, [2] [Cycle 1]: 0.00151928, [43] [expand_dump_flag]: 2.79024e-06 [switch_simplify]: 2.864e-05 [loop_unroll]: 1.35903e-05 [a_1]: 0.00034228 [recompute_prepare]: 8.74e-06 [updatestate_depend_eliminate]: 9.10973e-06 [updatestate_assign_eliminate]: 5.66989e-06 [updatestate_loads_eliminate]: 6.85034e-06 [parameter_eliminate]: 3.0701e-06 [a_2]: 0.00011664 [accelerated_algorithm]: 8.24034e-06 [shard]: 1.85985e-06 [meta_shard_fg_expand]: 3.45008e-06 [shard_inline]: 8.41031e-06 [auto_parallel]: 1.18301e-05 [parallel]: 6.02985e-06 [flash_sp]: 9.83989e-06 [merge_comm]: 8.11042e-06 [allreduce_fusion]: 5.74999e-06 [matmul_add_comm_reduction]: 1.01901e-05 [allreduce_slice_to_reducescatter]: 8.10251e-07 [virtual_shard_identity]: 9.25967e-06 [virtual_dataset]: 7.99028e-06 [get_grad_eliminate_]: 7.65035e-06 [virtual_output]: 7.58003e-06 [merge_forward]: 5.9898e-06 [cell_reuse_recompute_pass]: 1.76998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.645e-05 [before_grad]: 1.32602e-05 [inplace_validation]: 5.43986e-06 [meta_fg_expand]: 4.86989e-06 [inplace_validation_after_expand]: 6.63009e-06 [flash_sp_send_recv_attached]: 4.09968e-06 [receive_attached]: 1.95019e-06 [after_resolve]: 1.18697e-05 [a_after_grad]: 1.268e-05 [special_op_eliminate]: 8.17003e-06 [renormalize]: 0.00044579 [add_forward_monad_depend]: 3.15998e-06 [auto_monad_grad]: 1.74996e-06 [auto_monad_eliminator]: 2.923e-05 [cse]: 2.74098e-05 [a_3]: 5.93597e-05 [Cycle 2]: 0.00080311, [43] [expand_dump_flag]: 1.05985e-06 [switch_simplify]: 8.89972e-06 [loop_unroll]: 7.6904e-06 [a_1]: 0.00020308 [recompute_prepare]: 7.47992e-06 [updatestate_depend_eliminate]: 6.25988e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.66989e-06 [parameter_eliminate]: 1.18045e-06 [a_2]: 0.00010609 [accelerated_algorithm]: 8.14022e-06 [shard]: 1.34017e-06 [meta_shard_fg_expand]: 2.50991e-06 [shard_inline]: 8.01031e-06 [auto_parallel]: 2.34302e-05 [parallel]: 3.93018e-06 [flash_sp]: 3.26009e-06 [merge_comm]: 6.3898e-06 [allreduce_fusion]: 5.56e-06 [matmul_add_comm_reduction]: 8.55979e-06 [allreduce_slice_to_reducescatter]: 3.70201e-07 [virtual_shard_identity]: 9.14e-06 [virtual_dataset]: 7.83987e-06 [get_grad_eliminate_]: 7.31042e-06 [virtual_output]: 7.04033e-06 [merge_forward]: 4.84986e-06 [cell_reuse_recompute_pass]: 1.88034e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.527e-05 [before_grad]: 1.24704e-05 [inplace_validation]: 4.39025e-06 [meta_fg_expand]: 4.86989e-06 [inplace_validation_after_expand]: 5.26989e-06 [flash_sp_send_recv_attached]: 9.99775e-07 [receive_attached]: 1.23028e-06 [after_resolve]: 9.47993e-06 [a_after_grad]: 1.22003e-05 [special_op_eliminate]: 7.43987e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.00006e-07 [auto_monad_grad]: 1.11992e-06 [auto_monad_eliminator]: 1.93301e-05 [cse]: 1.95899e-05 [a_3]: 4.89801e-05 [py_interpret_to_execute_after_opt_a]: 8.85967e-06 [slice_cell_reuse_recomputed_activation]: 2.13971e-06 [rewriter_after_opt_a]: 0.00013496 [convert_after_rewriter]: 8.65012e-06 [order_py_execute_after_rewriter]: 5.53997e-06 [opt_b]: 0.00024643, [1] [Cycle 1]: 0.00024095, [7] [b_1]: 0.00016477 [b_2]: 9.71975e-06 [updatestate_depend_eliminate]: 5.57024e-06 [updatestate_assign_eliminate]: 4.51971e-06 [updatestate_loads_eliminate]: 5.15999e-06 [renormalize]: 3.1013e-07 [cse]: 1.93799e-05 [optimize_parallel_all_gather_comm]: 8.74978e-06 [overlap_param_gather]: 1.07987e-06 [cconv]: 2.127e-05 [loop_unroll]: 0.00047145 [opt_after_cconv]: 0.00013481, [1] [Cycle 1]: 0.00012885, [7] [c_1]: 5.52698e-05 [parameter_eliminate]: 2.31015e-06 [updatestate_depend_eliminate]: 7.7798e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.49993e-06 [cse]: 2.12304e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 1.25701e-05 [tuple_transform]: 6.99502e-05, [1] [Cycle 1]: 6.537e-05, [2] [d_1]: 5.56302e-05 [renormalize]: 1.80211e-07 [partial_unused_args_eliminate]: 1.75973e-06 [add_cache_embedding]: 1.22897e-05 [add_recomputation]: 5.85797e-05 [cse_after_recomputation]: 2.65902e-05, [1] [Cycle 1]: 2.167e-05, [1] [cse]: 1.685e-05 [environ_conv]: 6.75023e-06 [swap_dp_allreduce_reducescatter]: 7.00029e-06 [bias_add_comm_swap]: 2.38977e-06 [label_micro_interleaved_index]: 1.57999e-06 [label_fine_grained_interleaved_index]: 2.04984e-06 [merge_cast_opt]: 1.24006e-06 [slice_recompute_activation]: 1.34995e-06 [micro_interleaved_order_control]: 1.55997e-06 [assign_add_opt]: 7.2699e-06 [ForceFp32Comm]: 1.28988e-06 [remove_cast_before_assign_add]: 6.80331e-07 [full_micro_interleaved_order_control]: 1.72015e-06 [reorder_send_recv_between_fp_bp]: 1.90036e-06 [comm_op_add_attrs]: 9.80217e-07 [add_comm_op_reuse_tag]: 7.89762e-07 [interleave_split_concat_branches]: 7.79983e-07 [interleave_parallel_branches]: 6.49597e-07 [overlap_opt_shard_in_pipeline]: 7.19912e-07 [overlap_opt_shard_grad_in_pipeline]: 1.83983e-06 [control_data_broadcast_order]: 9.00123e-07 [grouped_pairwise_exchange_alltoall]: 9.29926e-07 [offloading_packed_experts]: 1.41002e-06 [overlap_recompute_and_grad_model_parallel]: 1.3602e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.79865e-07 [overlap_recompute_allgather_and_fa_grad]: 9.00123e-07 [overlap_grad_ring_attention]: 1.8701e-06 [overlap_grad_flash_sp]: 1.32099e-05 [begin_end_overlap_inline]: 7.10133e-07 [split_matmul_comm_elemetwise]: 1.73971e-06 [split_layernorm_comm]: 1.66986e-06 [handle_group_info]: 9.00123e-07 [symbol_engine_optimizer]: 8.67699e-05, [1] [Cycle 1]: 8.20397e-05, [6] [build]: 4.27989e-06 [elim_shapecalc]: 1.18399e-05 [elim_not_effective]: 1.68798e-05 [opt_reshape]: 8.79029e-06 [fold_const_symbol]: 1.44597e-05 [renormalize]: 1.79745e-07 [pipeline_parallel_scheduler]: 1.55997e-06 [auto_monad_reorder]: 2.679e-05 [get_jit_bprop_graph]: 4.09782e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00048942 [distribtued_split]: 3.83896e-05 [validate]: 3.24599e-05 [task_emit]: 0.0687977 [execute]: 9.43989e-06 Sums bootstrap : 0.000321s : 0.43% type_inference : 0.002358s : 3.12% auto_monad : 0.000122s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000545s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000035s : 0.05% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000446s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000047s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000135s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000021s : 0.03% optimize.loop_unroll : 0.000471s : 0.62% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000059s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000027s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000489s : 0.65% distribtued_split : 0.000038s : 0.05% validate : 0.000032s : 0.04% task_emit : 0.068798s : 91.17% execute : 0.000009s : 0.01% TotalTime = 0.0799644, [21] [bootstrap]: 0.00031276 [type_inference]: 0.00222322 [auto_monad]: 0.00010516 [graph_reusing]: 2.15974e-06 [inline]: 1.08033e-06 [parallel-infer-symbol]: 1.23028e-06 [pre_auto_parallel]: 2.047e-05 [insert-virtual-dataset]: 2.09967e-06 [parallel-infer-symbol-second]: 4.49829e-07 [dataset_repeat_opt]: 1.40024e-06 [pipeline_split]: 1.28988e-06 [optimize]: 0.00688147, [52] [py_interpret_to_execute]: 1.31298e-05 [rewriter_before_opt_a]: 3.13399e-05 [opt_a]: 0.00526382, [2] [Cycle 1]: 0.00145005, [43] [expand_dump_flag]: 2.34973e-06 [switch_simplify]: 2.542e-05 [loop_unroll]: 1.34804e-05 [a_1]: 0.00032664 [recompute_prepare]: 8.78004e-06 [updatestate_depend_eliminate]: 7.37002e-06 [updatestate_assign_eliminate]: 5.63962e-06 [updatestate_loads_eliminate]: 6.27013e-06 [parameter_eliminate]: 2.47033e-06 [a_2]: 0.00011528 [accelerated_algorithm]: 8.50018e-06 [shard]: 1.70013e-06 [meta_shard_fg_expand]: 3.60003e-06 [shard_inline]: 8.31997e-06 [auto_parallel]: 1.15102e-05 [parallel]: 5.38025e-06 [flash_sp]: 8.02008e-06 [merge_comm]: 7.2699e-06 [allreduce_fusion]: 5.43008e-06 [matmul_add_comm_reduction]: 8.53976e-06 [allreduce_slice_to_reducescatter]: 3.50177e-07 [virtual_shard_identity]: 1.02599e-05 [virtual_dataset]: 8.08993e-06 [get_grad_eliminate_]: 7.91997e-06 [virtual_output]: 8.02008e-06 [merge_forward]: 5.25964e-06 [cell_reuse_recompute_pass]: 1.55019e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.626e-05 [before_grad]: 1.35303e-05 [inplace_validation]: 5.01983e-06 [meta_fg_expand]: 4.90015e-06 [inplace_validation_after_expand]: 5.46034e-06 [flash_sp_send_recv_attached]: 4.26034e-06 [receive_attached]: 1.70013e-06 [after_resolve]: 1.08499e-05 [a_after_grad]: 1.30199e-05 [special_op_eliminate]: 7.89994e-06 [renormalize]: 0.0004196 [add_forward_monad_depend]: 2.44007e-06 [auto_monad_grad]: 1.26008e-06 [auto_monad_eliminator]: 2.26898e-05 [cse]: 2.53897e-05 [a_3]: 5.806e-05 [Cycle 2]: 0.00077935, [43] [expand_dump_flag]: 1.09989e-06 [switch_simplify]: 9.2499e-06 [loop_unroll]: 8.27992e-06 [a_1]: 0.00020519 [recompute_prepare]: 7.49994e-06 [updatestate_depend_eliminate]: 5.81983e-06 [updatestate_assign_eliminate]: 4.31016e-06 [updatestate_loads_eliminate]: 5.34998e-06 [parameter_eliminate]: 1.09011e-06 [a_2]: 0.00010499 [accelerated_algorithm]: 8.40984e-06 [shard]: 1.10967e-06 [meta_shard_fg_expand]: 2.68035e-06 [shard_inline]: 8.1202e-06 [auto_parallel]: 1.013e-05 [parallel]: 3.17022e-06 [flash_sp]: 2.82004e-06 [merge_comm]: 5.70016e-06 [allreduce_fusion]: 4.99003e-06 [matmul_add_comm_reduction]: 6.88992e-06 [allreduce_slice_to_reducescatter]: 2.60305e-07 [virtual_shard_identity]: 8.79029e-06 [virtual_dataset]: 7.70995e-06 [get_grad_eliminate_]: 7.81985e-06 [virtual_output]: 7.47014e-06 [merge_forward]: 4.50993e-06 [cell_reuse_recompute_pass]: 1.58977e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.47698e-05 [before_grad]: 1.27e-05 [inplace_validation]: 4.27011e-06 [meta_fg_expand]: 4.62029e-06 [inplace_validation_after_expand]: 4.96022e-06 [flash_sp_send_recv_attached]: 9.80217e-07 [receive_attached]: 7.59959e-07 [after_resolve]: 1.03e-05 [a_after_grad]: 1.19898e-05 [special_op_eliminate]: 7.22986e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.00006e-07 [auto_monad_grad]: 9.69972e-07 [auto_monad_eliminator]: 1.626e-05 [cse]: 1.805e-05 [a_3]: 4.91301e-05 [py_interpret_to_execute_after_opt_a]: 9.31975e-06 [slice_cell_reuse_recomputed_activation]: 1.60001e-06 [rewriter_after_opt_a]: 0.00014228 [convert_after_rewriter]: 1.02501e-05 [order_py_execute_after_rewriter]: 5.71972e-06 [opt_b]: 0.00024031, [1] [Cycle 1]: 0.00023525, [7] [b_1]: 0.00016291 [b_2]: 1.02399e-05 [updatestate_depend_eliminate]: 4.82984e-06 [updatestate_assign_eliminate]: 4.19002e-06 [updatestate_loads_eliminate]: 4.97e-06 [renormalize]: 2.90107e-07 [cse]: 1.76597e-05 [optimize_parallel_all_gather_comm]: 7.75e-06 [overlap_param_gather]: 8.29808e-07 [cconv]: 1.69598e-05 [loop_unroll]: 0.00048396 [opt_after_cconv]: 0.00012932, [1] [Cycle 1]: 0.00012341, [7] [c_1]: 5.324e-05 [parameter_eliminate]: 1.56974e-06 [updatestate_depend_eliminate]: 7.11996e-06 [updatestate_assign_eliminate]: 4.69992e-06 [updatestate_loads_eliminate]: 5.19026e-06 [cse]: 1.93897e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 8.69017e-06 [tuple_transform]: 6.89803e-05, [1] [Cycle 1]: 6.44098e-05, [2] [d_1]: 5.51599e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 1.29035e-06 [add_cache_embedding]: 1.06399e-05 [add_recomputation]: 5.223e-05 [cse_after_recomputation]: 2.52998e-05, [1] [Cycle 1]: 2.07098e-05, [1] [cse]: 1.57603e-05 [environ_conv]: 6.50994e-06 [swap_dp_allreduce_reducescatter]: 7.01007e-06 [bias_add_comm_swap]: 1.70013e-06 [label_micro_interleaved_index]: 1.68011e-06 [label_fine_grained_interleaved_index]: 1.33039e-06 [merge_cast_opt]: 8.30274e-07 [slice_recompute_activation]: 1.30991e-06 [micro_interleaved_order_control]: 1.4198e-06 [assign_add_opt]: 6.94999e-06 [ForceFp32Comm]: 4.89876e-07 [remove_cast_before_assign_add]: 5.19678e-07 [full_micro_interleaved_order_control]: 9.50415e-07 [reorder_send_recv_between_fp_bp]: 1.2801e-06 [comm_op_add_attrs]: 5.09899e-07 [add_comm_op_reuse_tag]: 6.00237e-07 [interleave_split_concat_branches]: 5.80214e-07 [interleave_parallel_branches]: 5.80214e-07 [overlap_opt_shard_in_pipeline]: 9.19681e-07 [overlap_opt_shard_grad_in_pipeline]: 1.36998e-06 [control_data_broadcast_order]: 7.79983e-07 [grouped_pairwise_exchange_alltoall]: 6.00237e-07 [offloading_packed_experts]: 5.89993e-07 [overlap_recompute_and_grad_model_parallel]: 1.22981e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.0012e-07 [overlap_recompute_allgather_and_fa_grad]: 5.39701e-07 [overlap_grad_ring_attention]: 1.22981e-06 [overlap_grad_flash_sp]: 1.18902e-05 [begin_end_overlap_inline]: 3.90224e-07 [split_matmul_comm_elemetwise]: 9.79751e-07 [split_layernorm_comm]: 1.43005e-06 [handle_group_info]: 6.50063e-07 [symbol_engine_optimizer]: 8.55201e-05, [1] [Cycle 1]: 8.085e-05, [6] [build]: 3.72017e-06 [elim_shapecalc]: 1.18501e-05 [elim_not_effective]: 1.54302e-05 [opt_reshape]: 9.18005e-06 [fold_const_symbol]: 1.37398e-05 [renormalize]: 2.29571e-07 [pipeline_parallel_scheduler]: 8.69855e-07 [auto_monad_reorder]: 2.09101e-05 [get_jit_bprop_graph]: 2.89641e-07 [rewriter_after_jit_bprop_graph]: 4.60073e-07 [eliminate_special_op_node]: 0.00049462 [distribtued_split]: 3.43402e-05 [validate]: 2.94e-05 [task_emit]: 0.0695935 [execute]: 8.44989e-06 Sums bootstrap : 0.000313s : 0.41% type_inference : 0.002223s : 2.93% auto_monad : 0.000105s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000031s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000532s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000015s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000420s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000039s : 0.05% optimize.opt_a.cse : 0.000043s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000142s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000484s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000019s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000021s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000495s : 0.65% distribtued_split : 0.000034s : 0.05% validate : 0.000029s : 0.04% task_emit : 0.069593s : 91.61% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000128 63 4.72% : 0.000006s : 2: substitution.depend_value_elim 2.28% : 0.000003s : 5: substitution.elim_not_effective 1.97% : 0.000003s : 5: substitution.fold_const_symbol 5.23% : 0.000007s : 6: substitution.graph_param_transform 51.25% : 0.000065s : 1: substitution.inline 3.98% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.32% : 0.000004s : 6: substitution.load_eliminater 2.36% : 0.000003s : 2: substitution.reduce_all_const_elim 6.13% : 0.000008s : 10: substitution.remove_not_recompute_node 2.18% : 0.000003s : 2: substitution.replace_old_param 8.77% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.80% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002330 2 88.69% : 0.002066s : 1: type_inference.infer 11.31% : 0.000264s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000228 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.14% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.14% : 0.000005s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.50% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.31% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.88% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.67% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 1.93% : 0.000004s : 31: predicate.environ_get_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.33% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.23% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.86% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 1.00% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.67% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000005s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.38% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.72% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.71% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.59% : 0.000001s : 6: predicate.parallel_virtual_node 1.23% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000002s : 13: predicate.reduce_eliminate 0.51% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.10% : 0.000003s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.00% : 0.000002s : 12: predicate.shard_identity_eliminate 1.42% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.15% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.28% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.85% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.52% : 0.000003s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.70% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.84% : 0.000002s : 12: predicate.virtual_output_eliminate 0.59% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000151 4 9.55% : 0.000014s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.45% : 0.000136s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088423 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000063s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000133s : 1: auto_monad 0.04% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000344s : 1: bootstrap 0.03% : 0.000025s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000046s : 1: distribtued_split 0.57% : 0.000503s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000481s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001099s : 80: opt.transform.opt_a 0.06% : 0.000054s : 1: opt.transform.opt_after_cconv 0.17% : 0.000155s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.13% : 0.005422s : 1: opt_a 0.16% : 0.000139s : 1: opt_after_cconv 0.28% : 0.000250s : 1: opt_b 8.00% : 0.007076s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000239s : 1: renormalize.infer 0.23% : 0.000201s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000140s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 77.83% : 0.068823s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.69% : 0.002376s : 1: type_inference 0.08% : 0.000066s : 1: validate Time group info: ------[substitution.] 0.000114 63 4.63% : 0.000005s : 2: substitution.depend_value_elim 2.18% : 0.000002s : 5: substitution.elim_not_effective 1.84% : 0.000002s : 5: substitution.fold_const_symbol 5.57% : 0.000006s : 6: substitution.graph_param_transform 49.09% : 0.000056s : 1: substitution.inline 4.40% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.42% : 0.000004s : 6: substitution.load_eliminater 2.49% : 0.000003s : 2: substitution.reduce_all_const_elim 6.38% : 0.000007s : 10: substitution.remove_not_recompute_node 2.38% : 0.000003s : 2: substitution.replace_old_param 9.43% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.18% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002199 2 89.36% : 0.001965s : 1: type_inference.infer 10.64% : 0.000234s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000055 1 100.00% : 0.000055s : 1: match.inline ------[predicate.] 0.000228 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.23% : 0.000005s : 25: predicate.arithmetic_simplify 0.95% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.18% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.94% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.52% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.19% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000002s : 19: predicate.environ_get_depend_swap 1.82% : 0.000004s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.69% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.95% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.71% : 0.000002s : 12: predicate.incorporate_call_switch 5.44% : 0.000012s : 63: predicate.inline 1.08% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.66% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.51% : 0.000006s : 38: predicate.load_eliminater 1.46% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.82% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.66% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.51% : 0.000001s : 12: predicate.remove_not_recompute_node 1.19% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000002s : 12: predicate.shard_identity_eliminate 1.38% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 0.96% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.40% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.29% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.81% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.76% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.75% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.57% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.28% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.51% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.59% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000138 4 8.35% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.65% : 0.000126s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088628 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000057s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000117s : 1: auto_monad 0.03% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000338s : 1: bootstrap 0.02% : 0.000021s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.57% : 0.000507s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000493s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001082s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 5.94% : 0.005268s : 1: opt_a 0.15% : 0.000133s : 1: opt_after_cconv 0.27% : 0.000243s : 1: opt_b 7.77% : 0.006890s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.01% : 0.000012s : 1: remove_dup_value 0.26% : 0.000230s : 1: renormalize.infer 0.21% : 0.000184s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000148s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000088s : 1: symbol_engine_optimizer 78.55% : 0.069618s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.53% : 0.002239s : 1: type_inference 0.07% : 0.000060s : 1: validate TotalTime = 0.0820448, [21] [bootstrap]: 0.00032685 [type_inference]: 0.00258464 [auto_monad]: 0.00012524 [graph_reusing]: 2.65986e-06 [inline]: 1.26986e-06 [parallel-infer-symbol]: 1.90036e-06 [pre_auto_parallel]: 2.445e-05 [insert-virtual-dataset]: 2.52994e-06 [parallel-infer-symbol-second]: 4.50294e-07 [dataset_repeat_opt]: 1.72993e-06 [pipeline_split]: 1.41002e-06 [optimize]: 0.00737692, [52] [py_interpret_to_execute]: 1.44797e-05 [rewriter_before_opt_a]: 3.46103e-05 [opt_a]: 0.00567554, [2] [Cycle 1]: 0.00161589, [43] [expand_dump_flag]: 3.48967e-06 [switch_simplify]: 2.86903e-05 [loop_unroll]: 1.37896e-05 [a_1]: 0.00034459 [recompute_prepare]: 8.56025e-06 [updatestate_depend_eliminate]: 8.11974e-06 [updatestate_assign_eliminate]: 5.81983e-06 [updatestate_loads_eliminate]: 8.10996e-06 [parameter_eliminate]: 3.22983e-06 [a_2]: 0.00012003 [accelerated_algorithm]: 8.2897e-06 [shard]: 2.17045e-06 [meta_shard_fg_expand]: 4.19002e-06 [shard_inline]: 8.57981e-06 [auto_parallel]: 1.19703e-05 [parallel]: 7.49016e-06 [flash_sp]: 1.16299e-05 [merge_comm]: 8.02986e-06 [allreduce_fusion]: 5.52041e-06 [matmul_add_comm_reduction]: 1.049e-05 [allreduce_slice_to_reducescatter]: 4.10248e-07 [virtual_shard_identity]: 9.81009e-06 [virtual_dataset]: 7.87014e-06 [get_grad_eliminate_]: 8.04011e-06 [virtual_output]: 7.91997e-06 [merge_forward]: 6.14021e-06 [cell_reuse_recompute_pass]: 1.8701e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.66302e-05 [before_grad]: 1.36001e-05 [inplace_validation]: 5.0501e-06 [meta_fg_expand]: 5.76023e-06 [inplace_validation_after_expand]: 6.52997e-06 [flash_sp_send_recv_attached]: 4.88013e-06 [receive_attached]: 2.35997e-06 [after_resolve]: 1.11298e-05 [a_after_grad]: 1.28001e-05 [special_op_eliminate]: 8.1202e-06 [renormalize]: 0.00050586 [add_forward_monad_depend]: 3.53018e-06 [auto_monad_grad]: 1.85007e-06 [auto_monad_eliminator]: 3.19998e-05 [cse]: 3.13004e-05 [a_3]: 5.83199e-05 [Cycle 2]: 0.00079595, [43] [expand_dump_flag]: 1.11014e-06 [switch_simplify]: 9.85991e-06 [loop_unroll]: 7.56001e-06 [a_1]: 0.00020881 [recompute_prepare]: 7.58981e-06 [updatestate_depend_eliminate]: 6.21006e-06 [updatestate_assign_eliminate]: 4.87966e-06 [updatestate_loads_eliminate]: 4.99981e-06 [parameter_eliminate]: 1.47987e-06 [a_2]: 0.00010523 [accelerated_algorithm]: 8.50018e-06 [shard]: 1.37975e-06 [meta_shard_fg_expand]: 2.79024e-06 [shard_inline]: 1.013e-05 [auto_parallel]: 1.14501e-05 [parallel]: 3.61027e-06 [flash_sp]: 3.6899e-06 [merge_comm]: 5.94975e-06 [allreduce_fusion]: 5.0501e-06 [matmul_add_comm_reduction]: 7.85012e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.95979e-06 [virtual_dataset]: 8.15e-06 [get_grad_eliminate_]: 7.93999e-06 [virtual_output]: 7.41007e-06 [merge_forward]: 5.0799e-06 [cell_reuse_recompute_pass]: 2.08011e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59601e-05 [before_grad]: 1.27601e-05 [inplace_validation]: 4.59002e-06 [meta_fg_expand]: 4.65987e-06 [inplace_validation_after_expand]: 5.29038e-06 [flash_sp_send_recv_attached]: 1.07987e-06 [receive_attached]: 8.2003e-07 [after_resolve]: 9.85013e-06 [a_after_grad]: 1.20802e-05 [special_op_eliminate]: 7.48038e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.90344e-07 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 1.82204e-05 [cse]: 2.04598e-05 [a_3]: 4.87599e-05 [py_interpret_to_execute_after_opt_a]: 9.18005e-06 [slice_cell_reuse_recomputed_activation]: 2.02004e-06 [rewriter_after_opt_a]: 0.0001367 [convert_after_rewriter]: 1.093e-05 [order_py_execute_after_rewriter]: 5.62007e-06 [opt_b]: 0.00024197, [1] [Cycle 1]: 0.00023654, [7] [b_1]: 0.00016285 [b_2]: 9.35979e-06 [updatestate_depend_eliminate]: 5.20004e-06 [updatestate_assign_eliminate]: 4.38979e-06 [updatestate_loads_eliminate]: 5.03985e-06 [renormalize]: 2.89641e-07 [cse]: 1.81999e-05 [optimize_parallel_all_gather_comm]: 8.04011e-06 [overlap_param_gather]: 1.17021e-06 [cconv]: 2.31899e-05 [loop_unroll]: 0.00050684 [opt_after_cconv]: 0.0001337, [1] [Cycle 1]: 0.00012772, [7] [c_1]: 5.45601e-05 [parameter_eliminate]: 2.69013e-06 [updatestate_depend_eliminate]: 8.21007e-06 [updatestate_assign_eliminate]: 4.40981e-06 [updatestate_loads_eliminate]: 5.0501e-06 [cse]: 2.10502e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.409e-05 [tuple_transform]: 7.42301e-05, [1] [Cycle 1]: 6.94399e-05, [2] [d_1]: 5.99204e-05 [renormalize]: 2.29571e-07 [partial_unused_args_eliminate]: 2.08989e-06 [add_cache_embedding]: 1.39098e-05 [add_recomputation]: 6.34901e-05 [cse_after_recomputation]: 2.66e-05, [1] [Cycle 1]: 2.19601e-05, [1] [cse]: 1.685e-05 [environ_conv]: 7.48038e-06 [swap_dp_allreduce_reducescatter]: 7.05011e-06 [bias_add_comm_swap]: 2.52994e-06 [label_micro_interleaved_index]: 2.40002e-06 [label_fine_grained_interleaved_index]: 1.86032e-06 [merge_cast_opt]: 1.13994e-06 [slice_recompute_activation]: 2.11969e-06 [micro_interleaved_order_control]: 1.62981e-06 [assign_add_opt]: 7.30995e-06 [ForceFp32Comm]: 8.09785e-07 [remove_cast_before_assign_add]: 7.49715e-07 [full_micro_interleaved_order_control]: 2.04006e-06 [reorder_send_recv_between_fp_bp]: 2.48989e-06 [comm_op_add_attrs]: 1.00024e-06 [add_comm_op_reuse_tag]: 9.89996e-07 [interleave_split_concat_branches]: 8.2003e-07 [interleave_parallel_branches]: 1.03004e-06 [overlap_opt_shard_in_pipeline]: 1.43005e-06 [overlap_opt_shard_grad_in_pipeline]: 1.76998e-06 [control_data_broadcast_order]: 1.07987e-06 [grouped_pairwise_exchange_alltoall]: 1.34995e-06 [offloading_packed_experts]: 9.09902e-07 [overlap_recompute_and_grad_model_parallel]: 1.75042e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.02026e-06 [overlap_recompute_allgather_and_fa_grad]: 9.29926e-07 [overlap_grad_ring_attention]: 1.77976e-06 [overlap_grad_flash_sp]: 1.47601e-05 [begin_end_overlap_inline]: 8.10251e-07 [split_matmul_comm_elemetwise]: 1.90968e-06 [split_layernorm_comm]: 1.56974e-06 [handle_group_info]: 9.00123e-07 [symbol_engine_optimizer]: 8.662e-05, [1] [Cycle 1]: 8.20099e-05, [6] [build]: 3.49991e-06 [elim_shapecalc]: 1.16099e-05 [elim_not_effective]: 1.60802e-05 [opt_reshape]: 9.36026e-06 [fold_const_symbol]: 1.49501e-05 [renormalize]: 4.00003e-07 [pipeline_parallel_scheduler]: 1.31968e-06 [auto_monad_reorder]: 2.89502e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00052112 [distribtued_split]: 4.04301e-05 [validate]: 3.51402e-05 [task_emit]: 0.0707149 [execute]: 1.10399e-05 Sums bootstrap : 0.000327s : 0.42% type_inference : 0.002585s : 3.32% auto_monad : 0.000125s : 0.16% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000553s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000019s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000506s : 0.65% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000137s : 0.18% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000507s : 0.65% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000060s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000521s : 0.67% distribtued_split : 0.000040s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.070715s : 90.90% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000129 63 4.88% : 0.000006s : 2: substitution.depend_value_elim 2.13% : 0.000003s : 5: substitution.elim_not_effective 2.02% : 0.000003s : 5: substitution.fold_const_symbol 5.71% : 0.000007s : 6: substitution.graph_param_transform 48.96% : 0.000063s : 1: substitution.inline 4.08% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.20% : 0.000004s : 6: substitution.load_eliminater 2.79% : 0.000004s : 2: substitution.reduce_all_const_elim 6.25% : 0.000008s : 10: substitution.remove_not_recompute_node 2.63% : 0.000003s : 2: substitution.replace_old_param 9.34% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.01% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002555 2 89.31% : 0.002282s : 1: type_inference.infer 10.69% : 0.000273s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000062 1 100.00% : 0.000062s : 1: match.inline ------[predicate.] 0.000233 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.12% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.26% : 0.000005s : 25: predicate.arithmetic_simplify 0.88% : 0.000002s : 13: predicate.cast_eliminate 0.74% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.35% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.92% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.96% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000005s : 31: predicate.environ_get_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.29% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.65% : 0.000013s : 63: predicate.inline 0.92% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.82% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.36% : 0.000005s : 38: predicate.load_eliminater 1.37% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.28% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.71% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.28% : 0.000003s : 13: predicate.reduce_eliminate 0.48% : 0.000001s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.77% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 1.01% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.43% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.02% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.95% : 0.000002s : 14: predicate.switch_defer_inline 1.56% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.51% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.87% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.46% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.88% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.43% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.75% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.49% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000167 4 9.50% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.50% : 0.000151s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091327 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000137s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000350s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.59% : 0.000535s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000517s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001111s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000058s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.22% : 0.005680s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000245s : 1: opt_b 8.09% : 0.007385s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.31% : 0.000285s : 1: renormalize.infer 0.24% : 0.000215s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000142s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 77.46% : 0.070746s : 1: task_emit 0.09% : 0.000078s : 1: tuple_transform 2.85% : 0.002602s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.0850083, [21] [bootstrap]: 0.00035249 [type_inference]: 0.00255719 [auto_monad]: 0.00013488 [graph_reusing]: 2.13971e-06 [inline]: 1.32015e-06 [parallel-infer-symbol]: 1.24006e-06 [pre_auto_parallel]: 2.702e-05 [insert-virtual-dataset]: 2.01026e-06 [parallel-infer-symbol-second]: 3.70201e-07 [dataset_repeat_opt]: 1.47987e-06 [pipeline_split]: 7.70204e-07 [optimize]: 0.00732702, [52] [py_interpret_to_execute]: 1.54898e-05 [rewriter_before_opt_a]: 3.65102e-05 [opt_a]: 0.00560812, [2] [Cycle 1]: 0.00159952, [43] [expand_dump_flag]: 3.99025e-06 [switch_simplify]: 3.09101e-05 [loop_unroll]: 1.28699e-05 [a_1]: 0.00034991 [recompute_prepare]: 9.09995e-06 [updatestate_depend_eliminate]: 7.41985e-06 [updatestate_assign_eliminate]: 5.99027e-06 [updatestate_loads_eliminate]: 7.39982e-06 [parameter_eliminate]: 3.03006e-06 [a_2]: 0.00011982 [accelerated_algorithm]: 9.01008e-06 [shard]: 1.69035e-06 [meta_shard_fg_expand]: 4.16022e-06 [shard_inline]: 8.44989e-06 [auto_parallel]: 1.16504e-05 [parallel]: 7.72998e-06 [flash_sp]: 1.268e-05 [merge_comm]: 8.10996e-06 [allreduce_fusion]: 5.60004e-06 [matmul_add_comm_reduction]: 1.112e-05 [allreduce_slice_to_reducescatter]: 6.09551e-07 [virtual_shard_identity]: 9.28016e-06 [virtual_dataset]: 7.87992e-06 [get_grad_eliminate_]: 7.87992e-06 [virtual_output]: 7.60006e-06 [merge_forward]: 6.18026e-06 [cell_reuse_recompute_pass]: 1.89012e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.73301e-05 [before_grad]: 1.409e-05 [inplace_validation]: 5.2196e-06 [meta_fg_expand]: 5.75976e-06 [inplace_validation_after_expand]: 6.89039e-06 [flash_sp_send_recv_attached]: 5.25033e-06 [receive_attached]: 2.46987e-06 [after_resolve]: 1.15102e-05 [a_after_grad]: 1.21002e-05 [special_op_eliminate]: 8.74e-06 [renormalize]: 0.00045489 [add_forward_monad_depend]: 3.55998e-06 [auto_monad_grad]: 2.33995e-06 [auto_monad_eliminator]: 3.38503e-05 [cse]: 3.46098e-05 [a_3]: 7.40797e-05 [Cycle 2]: 0.00079677, [43] [expand_dump_flag]: 1.11992e-06 [switch_simplify]: 9.37004e-06 [loop_unroll]: 8.15e-06 [a_1]: 0.00020749 [recompute_prepare]: 7.49994e-06 [updatestate_depend_eliminate]: 6.29993e-06 [updatestate_assign_eliminate]: 5.05988e-06 [updatestate_loads_eliminate]: 4.98025e-06 [parameter_eliminate]: 1.05007e-06 [a_2]: 0.00010406 [accelerated_algorithm]: 8.39029e-06 [shard]: 1.09989e-06 [meta_shard_fg_expand]: 2.59001e-06 [shard_inline]: 8.25012e-06 [auto_parallel]: 1.00997e-05 [parallel]: 3.69037e-06 [flash_sp]: 2.94996e-06 [merge_comm]: 6.00982e-06 [allreduce_fusion]: 5.24009e-06 [matmul_add_comm_reduction]: 7.32997e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.7996e-06 [virtual_dataset]: 7.81985e-06 [get_grad_eliminate_]: 7.60006e-06 [virtual_output]: 7.34022e-06 [merge_forward]: 4.24031e-06 [cell_reuse_recompute_pass]: 1.90968e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50697e-05 [before_grad]: 1.31899e-05 [inplace_validation]: 4.40981e-06 [meta_fg_expand]: 4.98025e-06 [inplace_validation_after_expand]: 5.23031e-06 [flash_sp_send_recv_attached]: 7.19912e-07 [receive_attached]: 5.89993e-07 [after_resolve]: 9.72021e-06 [a_after_grad]: 1.17701e-05 [special_op_eliminate]: 7.87014e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 6.9011e-07 [auto_monad_grad]: 9.69972e-07 [auto_monad_eliminator]: 1.746e-05 [cse]: 1.82702e-05 [a_3]: 4.82197e-05 [py_interpret_to_execute_after_opt_a]: 8.98028e-06 [slice_cell_reuse_recomputed_activation]: 2.34041e-06 [rewriter_after_opt_a]: 0.00015156 [convert_after_rewriter]: 1.14902e-05 [order_py_execute_after_rewriter]: 7.01007e-06 [opt_b]: 0.00025094, [1] [Cycle 1]: 0.0002452, [7] [b_1]: 0.00016813 [b_2]: 9.84035e-06 [updatestate_depend_eliminate]: 5.51995e-06 [updatestate_assign_eliminate]: 4.7097e-06 [updatestate_loads_eliminate]: 5.0501e-06 [renormalize]: 2.90107e-07 [cse]: 1.824e-05 [optimize_parallel_all_gather_comm]: 7.50972e-06 [overlap_param_gather]: 1.11014e-06 [cconv]: 1.76299e-05 [loop_unroll]: 0.00049067 [opt_after_cconv]: 0.00013416, [1] [Cycle 1]: 0.00012783, [7] [c_1]: 5.36898e-05 [parameter_eliminate]: 1.89012e-06 [updatestate_depend_eliminate]: 7.78958e-06 [updatestate_assign_eliminate]: 4.65009e-06 [updatestate_loads_eliminate]: 5.01005e-06 [cse]: 2.10302e-05 [renormalize]: 3.89758e-07 [remove_dup_value]: 1.32397e-05 [tuple_transform]: 7.22199e-05, [1] [Cycle 1]: 6.72699e-05, [2] [d_1]: 5.70402e-05 [renormalize]: 2.39816e-07 [partial_unused_args_eliminate]: 1.24983e-06 [add_cache_embedding]: 1.373e-05 [add_recomputation]: 6.28801e-05 [cse_after_recomputation]: 2.73599e-05, [1] [Cycle 1]: 2.184e-05, [1] [cse]: 1.672e-05 [environ_conv]: 8.05967e-06 [swap_dp_allreduce_reducescatter]: 7.3798e-06 [bias_add_comm_swap]: 2.46987e-06 [label_micro_interleaved_index]: 2.2999e-06 [label_fine_grained_interleaved_index]: 1.59023e-06 [merge_cast_opt]: 1.45985e-06 [slice_recompute_activation]: 2.08011e-06 [micro_interleaved_order_control]: 9.79751e-07 [assign_add_opt]: 7.52974e-06 [ForceFp32Comm]: 1.09011e-06 [remove_cast_before_assign_add]: 4.69852e-07 [full_micro_interleaved_order_control]: 2.20025e-06 [reorder_send_recv_between_fp_bp]: 2.33017e-06 [comm_op_add_attrs]: 1.07009e-06 [add_comm_op_reuse_tag]: 9.00123e-07 [interleave_split_concat_branches]: 9.79751e-07 [interleave_parallel_branches]: 9.99775e-07 [overlap_opt_shard_in_pipeline]: 1.70991e-06 [overlap_opt_shard_grad_in_pipeline]: 2.44984e-06 [control_data_broadcast_order]: 1.24006e-06 [grouped_pairwise_exchange_alltoall]: 1.3602e-06 [offloading_packed_experts]: 1.15018e-06 [overlap_recompute_and_grad_model_parallel]: 2.29012e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.40053e-07 [overlap_recompute_allgather_and_fa_grad]: 7.30157e-07 [overlap_grad_ring_attention]: 2.02004e-06 [overlap_grad_flash_sp]: 1.533e-05 [begin_end_overlap_inline]: 3.29688e-07 [split_matmul_comm_elemetwise]: 1.79978e-06 [split_layernorm_comm]: 1.97999e-06 [handle_group_info]: 1.28988e-06 [symbol_engine_optimizer]: 8.73902e-05, [1] [Cycle 1]: 8.23298e-05, [6] [build]: 4.10993e-06 [elim_shapecalc]: 1.20196e-05 [elim_not_effective]: 1.61701e-05 [opt_reshape]: 9.09995e-06 [fold_const_symbol]: 1.36001e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.87987e-06 [auto_monad_reorder]: 2.99099e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 7.70204e-07 [eliminate_special_op_node]: 0.00050941 [distribtued_split]: 4.14597e-05 [validate]: 3.437e-05 [task_emit]: 0.0737062 [execute]: 1.44998e-05 Sums bootstrap : 0.000352s : 0.44% type_inference : 0.002557s : 3.17% auto_monad : 0.000135s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000557s : 0.69% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000017s : 0.02% optimize.opt_a.renormalize : 0.000455s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.06% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000122s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000152s : 0.19% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000168s : 0.21% optimize.opt_b.b_2 : TotalTime = 0.0850438, [21] [bootstrap]: 0.0003525 [type_inference]: 0.00255698 [auto_monad]: 0.0001352 [graph_reusing]: 2.20956e-06 [inline]: 1.46031e-06 [parallel-infer-symbol]: 1.95019e-06 [pre_auto_parallel]: 2.72603e-05 [insert-virtual-dataset]: 3.11993e-06 [parallel-infer-symbol-second]: 3.89758e-07 [dataset_repeat_opt]: 1.64006e-06 [pipeline_split]: 1.59955e-06 [optimize]: 0.00732803, [52] [py_interpret_to_execute]: 1.54399e-05 [rewriter_before_opt_a]: 3.60697e-05 [opt_a]: 0.00560629, [2] [Cycle 1]: 0.00156789, [43] [expand_dump_flag]: 1.62004e-06 [switch_simplify]: 2.29804e-05 [loop_unroll]: 1.308e-05 [a_1]: 0.00033014 [recompute_prepare]: 8.94023e-06 [updatestate_depend_eliminate]: 9.03988e-06 [updatestate_assign_eliminate]: 6.37025e-06 [updatestate_loads_eliminate]: 7.55023e-06 [parameter_eliminate]: 3.2899e-06 [a_2]: 0.00012018 [accelerated_algorithm]: 8.76002e-06 [shard]: 2.44007e-06 [meta_shard_fg_expand]: 4.19002e-06 [shard_inline]: 8.40984e-06 [auto_parallel]: 1.26399e-05 [parallel]: 7.39982e-06 [flash_sp]: 1.33403e-05 [merge_comm]: 8.43033e-06 [allreduce_fusion]: 5.30016e-06 [matmul_add_comm_reduction]: 1.14799e-05 [allreduce_slice_to_reducescatter]: 2.80328e-07 [virtual_shard_identity]: 9.85013e-06 [virtual_dataset]: 8.77026e-06 [get_grad_eliminate_]: 7.72998e-06 [virtual_output]: 7.62986e-06 [merge_forward]: 5.74999e-06 [cell_reuse_recompute_pass]: 1.94041e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.80602e-05 [before_grad]: 1.42497e-05 [inplace_validation]: 4.5402e-06 [meta_fg_expand]: 5.64009e-06 [inplace_validation_after_expand]: 7.31973e-06 [flash_sp_send_recv_attached]: 5.77979e-06 [receive_attached]: 2.19001e-06 [after_resolve]: 1.18301e-05 [a_after_grad]: 1.27298e-05 [special_op_eliminate]: 8.44989e-06 [renormalize]: 0.00045544 [add_forward_monad_depend]: 3.49013e-06 [auto_monad_grad]: 1.41002e-06 [auto_monad_eliminator]: 3.40799e-05 [cse]: 3.45898e-05 [a_3]: 7.79796e-05 [Cycle 2]: 0.00079276, [43] [expand_dump_flag]: 9.10368e-07 [switch_simplify]: 1.09999e-05 [loop_unroll]: 8.16956e-06 [a_1]: 0.00020631 [recompute_prepare]: 7.16001e-06 [updatestate_depend_eliminate]: 6.19004e-06 [updatestate_assign_eliminate]: 5.13019e-06 [updatestate_loads_eliminate]: 5.09014e-06 [parameter_eliminate]: 1.32993e-06 [a_2]: 0.00010542 [accelerated_algorithm]: 8.27992e-06 [shard]: 1.30991e-06 [meta_shard_fg_expand]: 2.55974e-06 [shard_inline]: 8.02008e-06 [auto_parallel]: 1.18101e-05 [parallel]: 3.62005e-06 [flash_sp]: 3.41982e-06 [merge_comm]: 5.72996e-06 [allreduce_fusion]: 5.09014e-06 [matmul_add_comm_reduction]: 8.29017e-06 [allreduce_slice_to_reducescatter]: 2.80328e-07 [virtual_shard_identity]: 8.76002e-06 [virtual_dataset]: 7.7798e-06 [get_grad_eliminate_]: 7.63964e-06 [virtual_output]: 6.96024e-06 [merge_forward]: 4.78001e-06 [cell_reuse_recompute_pass]: 1.96043e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55899e-05 [before_grad]: 1.268e-05 [inplace_validation]: 4.34974e-06 [meta_fg_expand]: 4.75021e-06 [inplace_validation_after_expand]: 5.3402e-06 [flash_sp_send_recv_attached]: 9.20147e-07 [receive_attached]: 8.69855e-07 [after_resolve]: 1.02799e-00.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000018s : 0.02% optimize.loop_unroll : 0.000491s : 0.61% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optim5 [a_after_grad]: 1.19004e-05 [special_op_eliminate]: 7.58003e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.29691e-07 [auto_monad_grad]: 1.27032e-06 [auto_monad_eliminator]: 1.83601e-05 [cse]: 2.02497e-05 [a_3]: 4.89498e-05 [py_interpret_to_execute_after_opt_a]: 9.56981e-06 [slice_cell_reuse_recomputed_activation]: 2.44007e-06 [rewriter_after_opt_a]: 0.00015201 [convert_after_rewriter]: 1.295e-05 [order_py_execute_after_rewriter]: 5.11995e-06 [opt_b]: 0.00024365, [1] [Cycle 1]: 0.00023758, [7] [b_1]: 0.00016293 [b_2]: 1.011e-05 [updatestate_depend_eliminate]: 5.05988e-06 [updatestate_assign_eliminate]: 4.35999e-06 [updatestate_loads_eliminate]: 5.0799e-06 [renormalize]: 2.70084e-07 [cse]: 1.89701e-05 [optimize_parallel_all_gather_comm]: 8.68039e-06 [overlap_param_gather]: 1.67033e-06 [cconv]: 2.49301e-05 [loop_unroll]: 0.00049103 [opt_after_cconv]: 0.0001337, [1] [Cycle 1]: 0.00012744, [7] [c_1]: 5.34202e-05 [parameter_eliminate]: 2.6701e-06 [updatestate_depend_eliminate]: 8.02986e-06 [updatestate_assign_eliminate]: 4.63007e-06 [updatestate_loads_eliminate]: 4.94998e-06 [cse]: 2.22498e-05 [renormalize]: 5.20144e-07 [remove_dup_value]: 1.29603e-05 [tuple_transform]: 7.23801e-05, [1] [Cycle 1]: 6.779e-05, [2] [d_1]: 5.766e-05 [renormalize]: 3.19909e-07 [partial_unused_args_eliminate]: 1.95019e-06 [add_cache_embedding]: 1.39596e-05 [add_recomputation]: 6.303e-05 [cse_after_recomputation]: 2.82801e-05, [1] [Cycle 1]: 2.268e-05, [1] [cse]: 1.73799e-05 [environ_conv]: 6.54999e-06 [swap_dp_allreduce_reducescatter]: 7.8599e-06 [bias_add_comm_swap]: 2.31992e-06 [label_micro_interleaved_index]: 2.65986e-06 [label_fine_grained_interleaved_index]: 1.97999e-06 [merge_cast_opt]: 1.47009e-06 [slice_recompute_activation]: 1.96975e-06 [micro_interleaved_order_control]: 1.98977e-06 [assign_add_opt]: 7.6401e-06 [ForceFp32Comm]: 5.89993e-07 [remove_cast_before_assign_add]: 9.80217e-07 [full_micro_interleaved_order_control]: 2.19001e-06 [reorder_send_recv_between_fp_bp]: 2.21003e-06 [comm_op_add_attrs]: 8.90344e-07 [add_comm_op_reuse_tag]: 1.05007e-06 [interleave_split_concat_branches]: 8.49832e-07 [interleave_parallel_branches]: 5.99772e-07 [overlap_opt_shard_in_pipeline]: 1.4198e-06 [overlap_opt_shard_grad_in_pipeline]: 2.14996e-06 [control_data_broadcast_order]: 1.24006e-06 [grouped_pairwise_exchange_alltoall]: 9.00123e-07 [offloading_packed_experts]: 4.30271e-07 [overlap_recompute_and_grad_model_parallel]: 2.46987e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.59959e-07 [overlap_recompute_allgather_and_fa_grad]: 1.18976e-06 [overlap_grad_ring_attention]: 1.19023e-06 [overlap_grad_flash_sp]: 1.50101e-05 [begin_end_overlap_inline]: 7.59959e-07 [split_matmul_comm_elemetwise]: 2.22027e-06 [split_layernorm_comm]: 1.79e-06 [handle_group_info]: 1.24983e-06 [symbol_engine_optimizer]: 8.66298e-05, [1] [Cycle 1]: 8.207e-05, [6] [build]: 3.43006e-06 [elim_shapecalc]: 1.253e-05 [elim_not_effective]: 1.58702e-05 [opt_reshape]: 9.02032e-06 [fold_const_symbol]: 1.491e-05 [renormalize]: 2.59839e-07 [pipeline_parallel_scheduler]: 1.58045e-06 [auto_monad_reorder]: 3.02098e-05 [get_jit_bprop_graph]: 2.90107e-07 [rewriter_after_jit_bprop_graph]: 2.39816e-07 [eliminate_special_op_node]: 0.00050855 [distribtued_split]: 4.15402e-05 [validate]: 3.555e-05 [task_emit]: 0.0737582 [execute]: 8.21007e-06 Sums bootstrap : 0.000353s : 0.44% type_inference : 0.002557s : 3.16% auto_monad izer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000509s : 0.63% distribtued_split : 0.000041s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.073706s : 91.28% execute : 0.000014s : 0.02% : 0.000135s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000536s : 0.66% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000226s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000017s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000456s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.06% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000127s : 0.16% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000152s : 0.19% optimize.convert_after_rewriter : 0.000013s : 0.02% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.20% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000491s : 0.61% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000058s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000003s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000000s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000509s : 0.63% distribtued_split : 0.000042s : 0.05% validate : 0.000036s : 0.04% task_emit : 0.073758s : 91.29% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000133 63 4.79% : 0.000006s : 2: substitution.depend_value_elim 2.08% : 0.000003s : 5: substitution.elim_not_effective 1.31% : 0.000002s : 5: substitution.fold_const_symbol 5.28% : 0.000007s : 6: substitution.graph_param_transform 52.39% : 0.000070s : 1: substitution.inline 3.98% : 0.000005s : 10: substitution.j_node_and_user_rematch 2.77% : 0.000004s : 6: substitution.load_eliminater 2.87% : 0.000004s : 2: substitution.reduce_all_const_elim 6.27% : 0.000008s : 10: substitution.remove_not_recompute_node 2.17% : 0.000003s : 2: substitution.replace_old_param 8.83% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.26% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002527 2 88.91% : 0.002247s : 1: type_inference.infer 11.09% : 0.000280s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000069 1 100.00% : 0.000069s : 1: match.inline ------[predicate.] 0.000230 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.10% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.84% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.29% : 0.000005s : 25: predicate.arithmetic_simplify 0.89% : 0.000002s : 13: predicate.cast_eliminate 0.84% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.50% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.43% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.83% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_depend_swap 1.89% : 0.000004s : 31: predicate.environ_get_eliminate 1.19% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.32% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.57% : 0.000013s : 63: predicate.inline 0.97% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.19% : 0.000003s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.30% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.69% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.72% : 0.000002s : 13: predicate.minmaximum_grad 0.81% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.44% : 0.000001s : 6: predicate.parallel_virtual_node 1.23% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.13% : 0.000003s : 13: predicate.reduce_eliminate 0.49% : 0.000001s : 12: predicate.remove_not_recompute_node 1.18% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.99% : 0.000002s : 12: predicate.shard_identity_eliminate 1.41% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.03% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.43% : 0.000006s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.45% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.67% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.76% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.61% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.42% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.77% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000148 4 8.40% : 0.000012s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.60% : 0.000135s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.094211 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000148s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.40% : 0.000378s : 1: bootstrap 0.02% : 0.000022s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.56% : 0.000523s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.03% : 0.000024s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.53% : 0.000500s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.20% : 0.001133s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000158s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.03% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.96% : 0.005612s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000254s : 1: opt_b 7.79% : 0.007335s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000252s : 1: renormalize.infer 0.21% : 0.000198s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000157s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000006s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 78.28% : 0.073743s : 1: task_emit 0.08% : 0.000075s : 1: tuple_transform 2.73% : 0.002576s : 1: type_inference 0.07% : 0.000069s : 1: validate Time group info: ------[substitution.] 0.000115 63 4.43% : 0.000005s : 2: substitution.depend_value_elim 2.38% : 0.000003s : 5: substitution.elim_not_effective 2.91% : 0.000003s : 5: substitution.fold_const_symbol 6.82% : 0.000008s : 6: substitution.graph_param_transform 43.16% : 0.000049s : 1: substitution.inline 4.60% : 0.000005s : 10: substitution.j_node_and_user_rematch 4.07% : 0.000005s : 6: substitution.load_eliminater 2.74% : 0.000003s : 2: substitution.reduce_all_const_elim 6.89% : 0.000008s : 10: substitution.remove_not_recompute_node 3.11% : 0.000004s : 2: substitution.replace_old_param 10.35% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.56% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002527 2 88.90% : 0.002246s : 1: type_inference.infer 11.10% : 0.000281s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000048 1 100.00% : 0.000048s : 1: match.inline ------[predicate.] 0.000231 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.05% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.67% : 0.000002s : 12: predicate.addn_check_dump 0.85% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.26% : 0.000005s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.87% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.21% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.93% : 0.000004s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.21% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.04% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.45% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.11% : 0.000003s : 12: predicate.less_batch_normalization 1.80% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.50% : 0.000006s : 38: predicate.load_eliminater 1.25% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.89% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.84% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.83% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.53% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.17% : 0.000003s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.56% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.86% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.58% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.01% : 0.000002s : 12: predicate.shard_identity_eliminate 1.49% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.14% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.90% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.37% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.05% : 0.000009s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.82% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.98% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.59% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.27% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.47% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000167 4 10.69% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.31% : 0.000149s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.094203 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000148s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.40% : 0.000378s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000017s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.55% : 0.000523s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000005s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.53% : 0.000501s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.17% : 0.001098s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.16% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.03% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.96% : 0.005610s : 1: opt_a 0.15% : 0.000137s : 1: opt_after_cconv 0.26% : 0.000247s : 1: opt_b 7.79% : 0.007336s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.01% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000249s : 1: renormalize.infer 0.21% : 0.000200s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000157s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 78.33% : 0.073785s : 1: task_emit 0.08% : 0.000076s : 1: tuple_transform 2.73% : 0.002576s : 1: type_inference 0.07% : 0.000070s : 1: validate TotalTime = 0.085807, [21] [bootstrap]: 0.00032814 [type_inference]: 0.00246471 [auto_monad]: 0.00013024 [graph_reusing]: 2.12993e-06 [inline]: 1.38022e-06 [parallel-infer-symbol]: 2.10013e-06 [pre_auto_parallel]: 2.57301e-05 [insert-virtual-dataset]: 2.97977e-06 [parallel-infer-symbol-second]: 4.09782e-07 [dataset_repeat_opt]: 1.24006e-06 [pipeline_split]: 1.34017e-06 [optimize]: 0.00713718, [52] [py_interpret_to_execute]: 1.52201e-05 [rewriter_before_opt_a]: 3.52999e-05 [opt_a]: 0.00542582, [2] [Cycle 1]: 0.00154903, [43] [expand_dump_flag]: 3.45008e-06 [switch_simplify]: 3.039e-05 [loop_unroll]: 1.28797e-05 [a_1]: 0.00034399 [recompute_prepare]: 8.97003e-06 [updatestate_depend_eliminate]: 8.80007e-06 [updatestate_assign_eliminate]: 5.9302e-06 [updatestate_loads_eliminate]: 7.60006e-06 [parameter_eliminate]: 3.17022e-06 [a_2]: 0.0001192 [accelerated_algorithm]: 8.57003e-06 [shard]: 2.12993e-06 [meta_shard_fg_expand]: 4.17e-06 [shard_inline]: 8.62032e-06 [auto_parallel]: 1.194e-05 [parallel]: 8.35024e-06 [flash_sp]: 1.03298e-05 [merge_comm]: 8.53976e-06 [allreduce_fusion]: 4.79026e-06 [matmul_add_comm_reduction]: 1.049e-05 [allreduce_slice_to_reducescatter]: 6.00237e-07 [virtual_shard_identity]: 9.68017e-06 [virtual_dataset]: 8.00006e-06 [get_grad_eliminate_]: 7.72998e-06 [virtual_output]: 7.75978e-06 [merge_forward]: 5.87991e-06 [cell_reuse_recompute_pass]: 1.7602e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.64802e-05 [before_grad]: 1.371e-05 [inplace_validation]: 5.29969e-06 [meta_fg_expand]: 5.51995e-06 [inplace_validation_after_expand]: 7.55023e-06 [flash_sp_send_recv_attached]: 5.33974e-06 [receive_attached]: 2.73995e-06 [after_resolve]: 1.11e-05 [a_after_grad]: 1.27004e-05 [special_op_eliminate]: 7.65035e-06 [renormalize]: 0.00044696 [add_forward_monad_depend]: 3.46964e-06 [auto_monad_grad]: 1.8999e-06 [auto_monad_eliminator]: 3.32701e-05 [cse]: 3.21302e-05 [a_3]: 5.87702e-05 [Cycle 2]: 0.00080021, [43] [expand_dump_flag]: 1.15018e-06 [switch_simplify]: 9.0301e-06 [loop_unroll]: 9.65036e-06 [a_1]: 0.00020457 [recompute_prepare]: 7.2401e-06 [updatestate_depend_eliminate]: 6.19004e-06 [updatestate_assign_eliminate]: 4.86989e-06 [updatestate_loads_eliminate]: 5.28013e-06 [parameter_eliminate]: 1.53994e-06 [a_2]: 0.00011677 [accelerated_algorithm]: 8.22963e-06 [shard]: 1.30013e-06 [meta_shard_fg_expand]: 2.77022e-06 [shard_inline]: 7.56979e-06 [auto_parallel]: 1.12802e-05 [parallel]: 3.81004e-06 [flash_sp]: 3.79002e-06 [merge_comm]: 5.95022e-06 [allreduce_fusion]: 4.97e-06 [matmul_add_comm_reduction]: 8.08993e-06 [allreduce_slice_to_reducescatter]: 2.19792e-07 [virtual_shard_identity]: 8.6599e-06 [virtual_dataset]: 7.6103e-06 [get_grad_eliminate_]: 7.43009e-06 [virtual_output]: 6.8997e-06 [merge_forward]: 4.5998e-06 [cell_reuse_recompute_pass]: 2.05031e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51498e-05 [before_grad]: 1.24802e-05 [inplace_validation]: 4.10993e-06 [meta_fg_expand]: 5.24009e-06 [inplace_validation_after_expand]: 5.23962e-06 [flash_sp_send_recv_attached]: 1.10967e-06 [receive_attached]: 7.90227e-07 [after_resolve]: 9.4804e-06 [a_after_grad]: 1.19e-05 [special_op_eliminate]: 7.22986e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 9.19681e-07 [auto_monad_grad]: 1.22981e-06 [auto_monad_eliminator]: 1.91499e-05 [cse]: 1.97301e-05 [a_3]: 4.89699e-05 [py_interpret_to_execute_after_opt_a]: 9.49018e-06 [slice_cell_reuse_recomputed_activation]: 2.4098e-06 [rewriter_after_opt_a]: 0.0001391 [convert_after_rewriter]: 8.94023e-06 [order_py_execute_after_rewriter]: 6.94999e-06 [opt_b]: 0.00024403, [1] [Cycle 1]: 0.00023812, [7] [b_1]: 0.00016381 [b_2]: 9.6797e-06 [updatestate_depend_eliminate]: 5.41983e-06 [updatestate_assign_eliminate]: 4.63007e-06 [updatestate_loads_eliminate]: 5.28013e-06 [renormalize]: 3.7998e-07 [cse]: 1.82502e-05 [optimize_parallel_all_gather_comm]: 8.29995e-06 [overlap_param_gather]: 2.00002e-06 [cconv]: 2.22796e-05 [loop_unroll]: 0.00049833 [opt_after_cconv]: 0.00013705, [1] [Cycle 1]: 0.00013059, [7] [c_1]: 5.50398e-05 [parameter_eliminate]: 2.31992e-06 [updatestate_depend_eliminate]: 7.89994e-06 [updatestate_assign_eliminate]: 4.74975e-06 [updatestate_loads_eliminate]: 5.65033e-06 [cse]: 2.15499e-05 [renormalize]: 3.90224e-07 [remove_dup_value]: 1.32499e-05 [tuple_transform]: 7.01998e-05, [1] [Cycle 1]: 6.54398e-05, [2] [d_1]: 5.566e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.8701e-06 [add_cache_embedding]: 1.41999e-05 [add_recomputation]: 6.42203e-05 [cse_after_recomputation]: 2.79201e-05, [1] [Cycle 1]: 2.283e-05, [1] [cse]: 1.786e-05 [environ_conv]: 8.38982e-06 [swap_dp_allreduce_reducescatter]: 7.59028e-06 [bias_add_comm_swap]: 2.48989e-06 [label_micro_interleaved_index]: 2.35019e-06 [label_fine_grained_interleaved_index]: 2.12993e-06 [merge_cast_opt]: 1.39e-06 [slice_recompute_activation]: 1.79e-06 [micro_interleaved_order_control]: 1.91014e-06 [assign_add_opt]: 7.56979e-06 [ForceFp32Comm]: 8.69855e-07 [remove_cast_before_assign_add]: 9.20147e-07 [full_micro_interleaved_order_control]: 2.73017e-06 [reorder_send_recv_between_fp_bp]: 1.95019e-06 [comm_op_add_attrs]: 9.79751e-07 [add_comm_op_reuse_tag]: 1.11992e-06 [interleave_split_concat_branches]: 8.49832e-07 [interleave_parallel_branches]: 6.9011e-07 [overlap_opt_shard_in_pipeline]: 1.47009e-06 [overlap_opt_shard_grad_in_pipeline]: 2.65986e-06 [control_data_broadcast_order]: 9.4017e-07 [grouped_pairwise_exchange_alltoall]: 1.19023e-06 [offloading_packed_experts]: 1.30991e-06 [overlap_recompute_and_grad_model_parallel]: 2.82004e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.89879e-07 [overlap_recompute_allgather_and_fa_grad]: 7.90227e-07 [overlap_grad_ring_attention]: 1.82027e-06 [overlap_grad_flash_sp]: 1.51903e-05 [begin_end_overlap_inline]: 1.01002e-06 [split_matmul_comm_elemetwise]: 2.04006e-06 [split_layernorm_comm]: 1.60001e-06 [handle_group_info]: 1.30991e-06 [symbol_engine_optimizer]: 8.65902e-05, [1] [Cycle 1]: 8.20099e-05, [6] [build]: 3.64985e-06 [elim_shapecalc]: 1.20997e-05 [elim_not_effective]: 1.64998e-05 [opt_reshape]: 8.82987e-06 [fold_const_symbol]: 1.37901e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.62004e-06 [auto_monad_reorder]: 2.70498e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 4.09782e-07 [eliminate_special_op_node]: 0.00051561 [distribtued_split]: 4.18099e-05 [validate]: 3.55099e-05 [task_emit]: 0.0748257 [execute]: 1.213e-05 Sums bootstrap : 0.000328s : 0.40% type_inference : 0.002465s : 3.02% auto_monad : 0.000130s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000023s : 0.03% optimize.opt_a.a_1 : 0.000549s : 0.67% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000236s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000447s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.06% optimize.opt_a.cse : 0.000052s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000139s : 0.17% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.20% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000498s : 0.61% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000027s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000516s : 0.63% distribtued_split : 0.000042s : 0.05% validate : 0.000036s : 0.04% task_emit : 0.074826s : 91.55% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000131 63 5.28% : 0.000007s : 2: substitution.depend_value_elim 1.99% : 0.000003s : 5: substitution.elim_not_effective 1.74% : 0.000002s : 5: substitution.fold_const_symbol 5.40% : 0.000007s : 6: substitution.graph_param_transform 49.94% : 0.000065s : 1: substitution.inline 4.12% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.29% : 0.000004s : 6: substitution.load_eliminater 2.80% : 0.000004s : 2: substitution.reduce_all_const_elim 6.03% : 0.000008s : 10: substitution.remove_not_recompute_node 2.56% : 0.000003s : 2: substitution.replace_old_param 9.03% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.83% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002434 2 88.44% : 0.002153s : 1: type_inference.infer 11.56% : 0.000281s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000239 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.14% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.83% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.21% : 0.000005s : 25: predicate.arithmetic_simplify 0.80% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.36% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.92% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.07% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000003s : 19: predicate.environ_get_depend_swap 1.85% : 0.000004s : 31: predicate.environ_get_eliminate 1.03% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.22% : 0.000003s : 14: predicate.float_depend_g_call 0.69% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.37% : 0.000013s : 63: predicate.inline 1.02% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.99% : 0.000002s : 12: predicate.less_batch_normalization 1.66% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.30% : 0.000005s : 38: predicate.load_eliminater 1.32% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.84% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.44% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.16% : 0.000003s : 19: predicate.partial_eliminate 0.73% : 0.000002s : 13: predicate.print_const_string_wrapper 0.93% : 0.000002s : 12: predicate.reduce_all_const_elim 1.04% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.75% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 0.94% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.87% : 0.000002s : 12: predicate.shard_identity_eliminate 1.23% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.31% : 0.000006s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.82% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.33% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.40% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.60% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.49% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.40% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.25% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 6.82% : 0.000016s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.88% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000159 4 10.05% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.95% : 0.000143s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.094788 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000069s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000142s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000353s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.56% : 0.000530s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000509s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.18% : 0.001116s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.16% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.73% : 0.005430s : 1: opt_a 0.15% : 0.000141s : 1: opt_after_cconv 0.26% : 0.000247s : 1: opt_b 7.54% : 0.007145s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000244s : 1: renormalize.infer 0.21% : 0.000196s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000145s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000089s : 1: symbol_engine_optimizer 78.97% : 0.074856s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.62% : 0.002484s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.09366, [21] [bootstrap]: 0.00034778 [type_inference]: 0.00267419 [auto_monad]: 0.00013578 [graph_reusing]: 2.6701e-06 [inline]: 1.79978e-06 [parallel-infer-symbol]: 2.40002e-06 [pre_auto_parallel]: 2.70302e-05 [insert-virtual-dataset]: 3.24985e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 1.66986e-06 [pipeline_split]: 1.67033e-06 [optimize]: 0.00779825, [52] [py_interpret_to_execute]: 2.08402e-05 [rewriter_before_opt_a]: 4.06699e-05 [opt_a]: 0.00593295, [2] [Cycle 1]: 0.00175885, [43] [expand_dump_flag]: 3.71039e-06 [switch_simplify]: 3.361e-05 [loop_unroll]: 1.60602e-05 [a_1]: 0.00040658 [recompute_prepare]: 1.095e-05 [updatestate_depend_eliminate]: 8.99984e-06 [updatestate_assign_eliminate]: 6.23008e-06 [updatestate_loads_eliminate]: 8.35024e-06 [parameter_eliminate]: 3.47989e-06 [a_2]: 0.00014422 [accelerated_algorithm]: 1.03801e-05 [shard]: 2.33995e-06 [meta_shard_fg_expand]: 3.85009e-06 [shard_inline]: 1.02096e-05 [auto_parallel]: 1.27703e-05 [parallel]: 7.89994e-06 [flash_sp]: 1.21701e-05 [merge_comm]: 8.88994e-06 [allreduce_fusion]: 6.51041e-06 [matmul_add_comm_reduction]: 1.10599e-05 [allreduce_slice_to_reducescatter]: 4.69852e-07 [virtual_shard_identity]: 1.188e-05 [virtual_dataset]: 9.78028e-06 [get_grad_eliminate_]: 9.66014e-06 [virtual_output]: 9.49018e-06 [merge_forward]: 6.09038e-06 [cell_reuse_recompute_pass]: 1.93017e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.15899e-05 [before_grad]: 1.73803e-05 [inplace_validation]: 5.55022e-06 [meta_fg_expand]: 6.18026e-06 [inplace_validation_after_expand]: 7.22986e-06 [flash_sp_send_recv_attached]: 5.47012e-06 [receive_attached]: 3.18978e-06 [after_resolve]: 1.33999e-05 [a_after_grad]: 1.59699e-05 [special_op_eliminate]: 1.03102e-05 [renormalize]: 0.00048678 [add_forward_monad_depend]: 3.46964e-06 [auto_monad_grad]: 1.95997e-06 [auto_monad_eliminator]: 3.40804e-05 [cse]: 3.557e-05 [a_3]: 6.94599e-05 [Cycle 2]: 0.00093309, [43] [expand_dump_flag]: 1.15996e-06 [switch_simplify]: 1.10203e-05 [loop_unroll]: 1.16602e-05 [a_1]: 0.00025345 [recompute_prepare]: 9.01986e-06 [updatestate_depend_eliminate]: 6.38003e-06 [updatestate_assign_eliminate]: 5.26989e-06 [updatestate_loads_eliminate]: 5.91995e-06 [parameter_eliminate]: 1.43982e-06 [a_2]: 0.00012831 [accelerated_algorithm]: 1.03903e-05 [shard]: 1.34017e-06 [meta_shard_fg_expand]: 2.72971e-06 [shard_inline]: 9.77004e-06 [auto_parallel]: 1.20699e-05 [parallel]: 4.41028e-06 [flash_sp]: 4.29992e-06 [merge_comm]: 6.82985e-06 [allreduce_fusion]: 6.00982e-06 [matmul_add_comm_reduction]: 8.52998e-06 [allreduce_slice_to_reducescatter]: 3.1013e-07 [virtual_shard_identity]: 1.05998e-05 [virtual_dataset]: 9.33977e-06 [get_grad_eliminate_]: 9.30019e-06 [virtual_output]: 8.6301e-06 [merge_forward]: 5.1898e-06 [cell_reuse_recompute_pass]: 2.1602e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.97799e-05 [before_grad]: 1.57799e-05 [inplace_validation]: 4.6799e-06 [meta_fg_expand]: 5.68014e-06 [inplace_validation_after_expand]: 5.85988e-06 [flash_sp_send_recv_attached]: 1.03004e-06 [receive_attached]: 8.49832e-07 [after_resolve]: 1.21896e-05 [a_after_grad]: 1.44099e-05 [special_op_eliminate]: 9.56003e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.40053e-07 [auto_monad_grad]: 1.14972e-06 [auto_monad_eliminator]: 1.986e-05 [cse]: 2.19601e-05 [a_3]: 5.962e-05 [py_interpret_to_execute_after_opt_a]: 1.011e-05 [slice_cell_reuse_recomputed_activation]: 2.63983e-06 [rewriter_after_opt_a]: 0.00014873 [convert_after_rewriter]: 9.9903e-06 [order_py_execute_after_rewriter]: 6.63009e-06 [opt_b]: 0.00028762, [1] [Cycle 1]: 0.00028145, [7] [b_1]: 0.00019672 [b_2]: 1.22599e-05 [updatestate_depend_eliminate]: 6.15977e-06 [updatestate_assign_eliminate]: 4.68036e-06 [updatestate_loads_eliminate]: 5.8501e-06 [renormalize]: 3.19909e-07 [cse]: 2.03201e-05 [optimize_parallel_all_gather_comm]: 9.13022e-06 [overlap_param_gather]: 1.55019e-06 [cconv]: 2.422e-05 [loop_unroll]: 0.00051547 [opt_after_cconv]: 0.00014951, [1] [Cycle 1]: 0.00014308, [7] [c_1]: 6.33299e-05 [parameter_eliminate]: 2.60957e-06 [updatestate_depend_eliminate]: 8.71997e-06 [updatestate_assign_eliminate]: 5.00027e-06 [updatestate_loads_eliminate]: 5.8501e-06 [cse]: 2.27899e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.50898e-05 [tuple_transform]: 8.42498e-05, [1] [Cycle 1]: 7.929e-05, [2] [d_1]: 6.90101e-05 [renormalize]: 2.70084e-07 [partial_unused_args_eliminate]: 2.25985e-06 [add_cache_embedding]: 1.42697e-05 [add_recomputation]: 6.6e-05 [cse_after_recomputation]: 2.99402e-05, [1] [Cycle 1]: 2.49799e-05, [1] [cse]: 1.948e-05 [environ_conv]: 7.20983e-06 [swap_dp_allreduce_reducescatter]: 7.97026e-06 [bias_add_comm_swap]: 2.35997e-06 [label_micro_interleaved_index]: 2.21003e-06 [label_fine_grained_interleaved_index]: 2.48989e-06 [merge_cast_opt]: 1.43005e-06 [slice_recompute_activation]: 2.02982e-06 [micro_interleaved_order_control]: 1.83005e-06 [assign_add_opt]: 7.68993e-06 [ForceFp32Comm]: 1.15018e-06 [remove_cast_before_assign_add]: 1.01002e-06 [full_micro_interleaved_order_control]: 2.65986e-06 [reorder_send_recv_between_fp_bp]: 2.2999e-06 [comm_op_add_attrs]: 9.49949e-07 [add_comm_op_reuse_tag]: 1.18976e-06 [interleave_split_concat_branches]: 9.69972e-07 [interleave_parallel_branches]: 7.30157e-07 [overlap_opt_shard_in_pipeline]: 1.15018e-06 [overlap_opt_shard_grad_in_pipeline]: 2.88012e-06 [control_data_broadcast_order]: 1.10967e-06 [grouped_pairwise_exchange_alltoall]: 1.8198e-06 [offloading_packed_experts]: 1.13016e-06 [overlap_recompute_and_grad_model_parallel]: 2.31992e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.29926e-07 [overlap_recompute_allgather_and_fa_grad]: 1.22981e-06 [overlap_grad_ring_attention]: 2.14996e-06 [overlap_grad_flash_sp]: 1.653e-05 [begin_end_overlap_inline]: 8.2003e-07 [split_matmul_comm_elemetwise]: 2.23005e-06 [split_layernorm_comm]: 1.90036e-06 [handle_group_info]: 1.01002e-06 [symbol_engine_optimizer]: 9.91598e-05, [1] [Cycle 1]: 9.44398e-05, [6] [build]: 4.36977e-06 [elim_shapecalc]: 1.394e-05 [elim_not_effective]: 1.97599e-05 [opt_reshape]: 1.06399e-05 [fold_const_symbol]: 1.75e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.72015e-06 [auto_monad_reorder]: 3.07499e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 7.69738e-07 [eliminate_special_op_node]: 0.00053789 [distribtued_split]: 4.59999e-05 [validate]: 3.68501e-05 [task_emit]: 0.0817195 [execute]: 1.36998e-05 Sums bootstrap : 0.000348s : 0.39% type_inference : 0.002674s : 2.99% auto_monad : 0.000136s : 0.15% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000021s : 0.02% optimize.rewriter_before_opt_a : 0.000041s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000045s : 0.05% optimize.opt_a.loop_unroll : 0.000028s : 0.03% optimize.opt_a.a_1 : 0.000660s : 0.74% optimize.opt_a.recompute_prepare : 0.000020s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000273s : 0.31% optimize.opt_a.accelerated_algorithm : 0.000021s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000020s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000013s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000019s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000019s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000041s : 0.05% optimize.opt_a.before_grad : 0.000033s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.03% optimize.opt_a.special_op_eliminate : 0.000020s : 0.02% optimize.opt_a.renormalize : 0.000487s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000054s : 0.06% optimize.opt_a.cse : 0.000058s : 0.06% optimize.opt_a.a_3 : 0.000129s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000149s : 0.17% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000197s : 0.22% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000515s : 0.58% optimize.opt_after_cconv.c_1 : 0.000063s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000069s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000066s : 0.07% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000018s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000538s : 0.60% distribtued_split : 0.000046s : 0.05% validate : 0.000037s : 0.04% task_emit : 0.081720s : 91.49% execute : 0.000014s : 0.02% Time group info: ------[substitution.] 0.000149 63 5.15% : 0.000008s : 2: substitution.depend_value_elim 2.22% : 0.000003s : 5: substitution.elim_not_effective 2.01% : 0.000003s : 5: substitution.fold_const_symbol 5.75% : 0.000009s : 6: substitution.graph_param_transform 46.57% : 0.000070s : 1: substitution.inline 4.62% : 0.000007s : 10: substitution.j_node_and_user_rematch 3.51% : 0.000005s : 6: substitution.load_eliminater 3.00% : 0.000004s : 2: substitution.reduce_all_const_elim 7.05% : 0.000011s : 10: substitution.remove_not_recompute_node 2.68% : 0.000004s : 2: substitution.replace_old_param 8.96% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 8.47% : 0.000013s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002644 2 89.03% : 0.002353s : 1: type_inference.infer 10.97% : 0.000290s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000269 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.27% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.10% : 0.000006s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.33% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000005s : 31: predicate.environ_get_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.91% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.81% : 0.000002s : 12: predicate.incorporate_call 0.71% : 0.000002s : 12: predicate.incorporate_call_switch 6.06% : 0.000016s : 63: predicate.inline 1.14% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.98% : 0.000003s : 12: predicate.less_batch_normalization 1.71% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000006s : 38: predicate.load_eliminater 1.26% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.67% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.35% : 0.000004s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.02% : 0.000003s : 13: predicate.reduce_eliminate 0.58% : 0.000002s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.76% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000003s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.47% : 0.000004s : 18: predicate.special_op_eliminate 1.05% : 0.000003s : 12: predicate.specialize_transform 1.08% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000006s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000005s : 26: predicate.switch_layer_defer_inline 4.24% : 0.000011s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.72% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.73% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.80% : 0.000008s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.64% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.57% : 0.000010s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.47% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000159 4 10.75% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.25% : 0.000142s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.103627 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000071s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.14% : 0.000149s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.36% : 0.000373s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000033s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.05% : 0.000055s : 1: distribtued_split 0.53% : 0.000552s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000023s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.51% : 0.000526s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.29% : 0.001339s : 80: opt.transform.opt_a 0.06% : 0.000062s : 1: opt.transform.opt_after_cconv 0.18% : 0.000185s : 27: opt.transform.opt_b 0.06% : 0.000067s : 1: opt.transform.opt_trans_graph 0.04% : 0.000038s : 3: opt.transform.special_op_eliminate 0.06% : 0.000057s : 4: opt.transform.symbol_engine_opt 5.73% : 0.005937s : 1: opt_a 0.15% : 0.000154s : 1: opt_after_cconv 0.28% : 0.000291s : 1: opt_b 7.53% : 0.007807s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000034s : 1: pre_auto_parallel 0.03% : 0.000026s : 1: py_interpret_to_execute 0.01% : 0.000015s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.26% : 0.000269s : 1: renormalize.infer 0.20% : 0.000211s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000155s : 1: rewriter_after_opt_a 0.04% : 0.000046s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000102s : 1: symbol_engine_optimizer 78.90% : 0.081759s : 1: task_emit 0.08% : 0.000088s : 1: tuple_transform 2.60% : 0.002694s : 1: type_inference 0.07% : 0.000075s : 1: validate TotalTime = 0.0778592, [21] [bootstrap]: 0.00028042 [type_inference]: 0.00229012 [auto_monad]: 9.75803e-05 [graph_reusing]: 1.64984e-06 [inline]: 1.05985e-06 [parallel-infer-symbol]: 1.30991e-06 [pre_auto_parallel]: 2.06898e-05 [insert-virtual-dataset]: 1.8198e-06 [parallel-infer-symbol-second]: 3.59956e-07 [dataset_repeat_opt]: 1.09011e-06 [pipeline_split]: 1.32015e-06 [optimize]: 0.0069051, [52] [py_interpret_to_execute]: 1.32201e-05 [rewriter_before_opt_a]: 3.04901e-05 [opt_a]: 0.00530168, [2] [Cycle 1]: 0.00155152, [43] [expand_dump_flag]: 2.56998e-06 [switch_simplify]: 3.00999e-05 [loop_unroll]: 1.29598e-05 [a_1]: 0.00038736 [recompute_prepare]: 8.99006e-06 [updatestate_depend_eliminate]: 7.6401e-06 [updatestate_assign_eliminate]: 5.37001e-06 [updatestate_loads_eliminate]: 6.04009e-06 [parameter_eliminate]: 2.46987e-06 [a_2]: 0.00011572 [accelerated_algorithm]: 8.29995e-06 [shard]: 1.36998e-06 [meta_shard_fg_expand]: 3.17022e-06 [shard_inline]: 8.66037e-06 [auto_parallel]: 1.13402e-05 [parallel]: 5.35976e-06 [flash_sp]: 8.07969e-06 [merge_comm]: 6.97002e-06 [allreduce_fusion]: 5.54975e-06 [matmul_add_comm_reduction]: 8.97003e-06 [allreduce_slice_to_reducescatter]: 6.20261e-07 [virtual_shard_identity]: 9.77982e-06 [virtual_dataset]: 8.2301e-06 [get_grad_eliminate_]: 7.91019e-06 [virtual_output]: 7.7202e-06 [merge_forward]: 5.15021e-06 [cell_reuse_recompute_pass]: 1.59023e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.64602e-05 [before_grad]: 1.38399e-05 [inplace_validation]: 4.57978e-06 [meta_fg_expand]: 5.46034e-06 [inplace_validation_after_expand]: 5.56e-06 [flash_sp_send_recv_attached]: 3.36021e-06 [receive_attached]: 1.55997e-06 [after_resolve]: 1.10501e-05 [a_after_grad]: 1.27698e-05 [special_op_eliminate]: 7.94977e-06 [renormalize]: 0.00045104 [add_forward_monad_depend]: 2.58023e-06 [auto_monad_grad]: 1.45985e-06 [auto_monad_eliminator]: 2.466e-05 [cse]: 2.68901e-05 [a_3]: 5.62896e-05 [Cycle 2]: 0.00077094, [43] [expand_dump_flag]: 1.01002e-06 [switch_simplify]: 9.37982e-06 [loop_unroll]: 7.38027e-06 [a_1]: 0.00020296 [recompute_prepare]: 7.01984e-06 [updatestate_depend_eliminate]: 5.69969e-06 [updatestate_assign_eliminate]: 4.63007e-06 [updatestate_loads_eliminate]: 4.73997e-06 [parameter_eliminate]: 1.03982e-06 [a_2]: 0.00010415 [accelerated_algorithm]: 8.44989e-06 [shard]: 1.05007e-06 [meta_shard_fg_expand]: 2.12993e-06 [shard_inline]: 7.68993e-06 [auto_parallel]: 1.011e-05 [parallel]: 2.99001e-06 [flash_sp]: 2.58023e-06 [merge_comm]: 6.04009e-06 [allreduce_fusion]: 4.82984e-06 [matmul_add_comm_reduction]: 7.44034e-06 [allreduce_slice_to_reducescatter]: 2.69618e-07 [virtual_shard_identity]: 8.40984e-06 [virtual_dataset]: 7.50972e-06 [get_grad_eliminate_]: 7.50972e-06 [virtual_output]: 7.2699e-06 [merge_forward]: 4.48013e-06 [cell_reuse_recompute_pass]: 1.70991e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50804e-05 [before_grad]: 1.26297e-05 [inplace_validation]: 4.42006e-06 [meta_fg_expand]: 4.62029e-06 [inplace_validation_after_expand]: 4.97e-06 [flash_sp_send_recv_attached]: 8.30274e-07 [receive_attached]: 6.99889e-07 [after_resolve]: 9.54978e-06 [a_after_grad]: 1.194e-05 [special_op_eliminate]: 7.18003e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.10133e-07 [auto_monad_grad]: 9.49949e-07 [auto_monad_eliminator]: 1.57598e-05 [cse]: 1.86702e-05 [a_3]: 4.86802e-05 [py_interpret_to_execute_after_opt_a]: 8.98028e-06 [slice_cell_reuse_recomputed_activation]: 1.88034e-06 [rewriter_after_opt_a]: 0.00013185 [convert_after_rewriter]: 8.10018e-06 [order_py_execute_after_rewriter]: 5.76023e-06 [opt_b]: 0.00023943, [1] [Cycle 1]: 0.00023437, [7] [b_1]: 0.00016164 [b_2]: 1.00201e-05 [updatestate_depend_eliminate]: 5.24009e-06 [updatestate_assign_eliminate]: 4.10015e-06 [updatestate_loads_eliminate]: 4.95976e-06 [renormalize]: 2.80328e-07 [cse]: 1.83801e-05 [optimize_parallel_all_gather_comm]: 7.75978e-06 [overlap_param_gather]: 8.99658e-07 [cconv]: 1.56099e-05 [loop_unroll]: 0.00048001 [opt_after_cconv]: 0.00013046, [1] [Cycle 1]: 0.0001245, [7] [c_1]: 5.32898e-05 [parameter_eliminate]: 1.8198e-06 [updatestate_depend_eliminate]: 7.23964e-06 [updatestate_assign_eliminate]: 4.52017e-06 [updatestate_loads_eliminate]: 4.90015e-06 [cse]: 2.06102e-05 [renormalize]: 4.09782e-07 [remove_dup_value]: 1.02199e-05 [tuple_transform]: 6.82799e-05, [1] [Cycle 1]: 6.39497e-05, [2] [d_1]: 5.44698e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 1.4198e-06 [add_cache_embedding]: 1.11801e-05 [add_recomputation]: 5.58002e-05 [cse_after_recomputation]: 2.738e-05, [1] [Cycle 1]: 2.23401e-05, [1] [cse]: 1.71401e-05 [environ_conv]: 6.47968e-06 [swap_dp_allreduce_reducescatter]: 7.28993e-06 [bias_add_comm_swap]: 1.58977e-06 [label_micro_interleaved_index]: 1.22003e-06 [label_fine_grained_interleaved_index]: 1.09011e-06 [merge_cast_opt]: 7.19912e-07 [slice_recompute_activation]: 1.19023e-06 [micro_interleaved_order_control]: 1.30013e-06 [assign_add_opt]: 6.48992e-06 [ForceFp32Comm]: 5.89993e-07 [remove_cast_before_assign_add]: 5.49946e-07 [full_micro_interleaved_order_control]: 1.26008e-06 [reorder_send_recv_between_fp_bp]: 1.45985e-06 [comm_op_add_attrs]: 5.99772e-07 [add_comm_op_reuse_tag]: 5.80214e-07 [interleave_split_concat_branches]: 5.20144e-07 [interleave_parallel_branches]: 5.60191e-07 [overlap_opt_shard_in_pipeline]: 1.21025e-06 [overlap_opt_shard_grad_in_pipeline]: 1.31037e-06 [control_data_broadcast_order]: 6.70087e-07 [grouped_pairwise_exchange_alltoall]: 7.19912e-07 [offloading_packed_experts]: 6.9011e-07 [overlap_recompute_and_grad_model_parallel]: 1.05985e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.49829e-07 [overlap_recompute_allgather_and_fa_grad]: 5.99772e-07 [overlap_grad_ring_attention]: 1.21025e-06 [overlap_grad_flash_sp]: 1.33603e-05 [begin_end_overlap_inline]: 4.30271e-07 [split_matmul_comm_elemetwise]: 1.23028e-06 [split_layernorm_comm]: 1.15018e-06 [handle_group_info]: 5.60191e-07 [symbol_engine_optimizer]: 8.38903e-05, [1] [Cycle 1]: 7.93301e-05, [6] [build]: 3.77977e-06 [elim_shapecalc]: 1.171e-05 [elim_not_effective]: 1.57901e-05 [opt_reshape]: 9.10973e-06 [fold_const_symbol]: 1.32299e-05 [renormalize]: 1.60187e-07 [pipeline_parallel_scheduler]: 1.34995e-06 [auto_monad_reorder]: 2.302e-05 [get_jit_bprop_graph]: 3.1013e-07 [rewriter_after_jit_bprop_graph]: 3.1013e-07 [eliminate_special_op_node]: 0.00050378 [distribtued_split]: 3.32198e-05 [validate]: 2.95402e-05 [task_emit]: 0.0674331 [execute]: 7.47992e-06 Sums bootstrap : 0.000280s : 0.38% type_inference : 0.002290s : 3.10% auto_monad : 0.000098s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000020s : 0.03% optimize.opt_a.a_1 : 0.000590s : 0.80% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000220s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000451s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000046s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000132s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000480s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.02% optimize.add_recomputation : 0.000056s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000504s : 0.68% distribtued_split : 0.000033s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.067433s : 91.22% execute : 0.000007s : 0.01% Time group info: ------[substitution.] 0.000111 63 4.78% : 0.000005s : 2: substitution.depend_value_elim 2.10% : 0.000002s : 5: substitution.elim_not_effective 1.94% : 0.000002s : 5: substitution.fold_const_symbol 5.41% : 0.000006s : 6: substitution.graph_param_transform 48.79% : 0.000054s : 1: substitution.inline 4.82% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.54% : 0.000004s : 6: substitution.load_eliminater 2.23% : 0.000002s : 2: substitution.reduce_all_const_elim 6.76% : 0.000008s : 10: substitution.remove_not_recompute_node 2.32% : 0.000003s : 2: substitution.replace_old_param 9.27% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.04% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002266 2 89.89% : 0.002037s : 1: type_inference.infer 10.11% : 0.000229s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000228 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.16% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.26% : 0.000005s : 25: predicate.arithmetic_simplify 0.85% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.51% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.26% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.91% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.19% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.88% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.16% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.30% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.58% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.35% : 0.000005s : 38: predicate.load_eliminater 1.24% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.82% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.65% : 0.000001s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.60% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.02% : 0.000002s : 12: predicate.shard_identity_eliminate 1.36% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.69% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.32% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.90% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.69% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.70% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.29% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.63% : 0.000001s : 6: predicate.value_based_eliminate 0.77% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.49% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000137 4 7.65% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.35% : 0.000126s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086647 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000060s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000109s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000004s : 1: bias_add_comm_swap 0.35% : 0.000304s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.60% : 0.000517s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000489s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.32% : 0.001142s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.05% : 0.000042s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.12% : 0.005305s : 1: opt_a 0.16% : 0.000135s : 1: opt_after_cconv 0.28% : 0.000243s : 1: opt_b 7.98% : 0.006913s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.30% : 0.000261s : 1: renormalize.infer 0.21% : 0.000185s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000137s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000087s : 1: symbol_engine_optimizer 77.85% : 0.067457s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.66% : 0.002306s : 1: type_inference 0.07% : 0.000062s : 1: validate TotalTime = 0.0793298, [21] [bootstrap]: 0.00028144 [type_inference]: 0.00230363 [auto_monad]: 0.00010843 [graph_reusing]: 1.95019e-06 [inline]: 1.55019e-06 [parallel-infer-symbol]: 1.2503e-06 [pre_auto_parallel]: 2.30102e-05 [insert-virtual-dataset]: 2.14996e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 9.20147e-07 [pipeline_split]: 1.10036e-06 [optimize]: 0.00687078, [52] [py_interpret_to_execute]: 1.28602e-05 [rewriter_before_opt_a]: 3.258e-05 [opt_a]: 0.00527605, [2] [Cycle 1]: 0.00150882, [43] [expand_dump_flag]: 2.52016e-06 [switch_simplify]: 2.58102e-05 [loop_unroll]: 1.30697e-05 [a_1]: 0.00038939 [recompute_prepare]: 8.60961e-06 [updatestate_depend_eliminate]: 7.68015e-06 [updatestate_assign_eliminate]: 5.55022e-06 [updatestate_loads_eliminate]: 5.8203e-06 [parameter_eliminate]: 2.10991e-06 [a_2]: 0.00011534 [accelerated_algorithm]: 8.70973e-06 [shard]: 2.06009e-06 [meta_shard_fg_expand]: 2.99001e-06 [shard_inline]: 8.38004e-06 [auto_parallel]: 1.091e-05 [parallel]: 6.48992e-06 [flash_sp]: 7.66013e-06 [merge_comm]: 6.95977e-06 [allreduce_fusion]: 4.9104e-06 [matmul_add_comm_reduction]: 8.77958e-06 [allreduce_slice_to_reducescatter]: 3.49712e-07 [virtual_shard_identity]: 9.53022e-06 [virtual_dataset]: 8.2599e-06 [get_grad_eliminate_]: 7.61962e-06 [virtual_output]: 8.02986e-06 [merge_forward]: 5.08036e-06 [cell_reuse_recompute_pass]: 1.55997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.61002e-05 [before_grad]: 1.32704e-05 [inplace_validation]: 4.52995e-06 [meta_fg_expand]: 5.01005e-06 [inplace_validation_after_expand]: 5.64987e-06 [flash_sp_send_recv_attached]: 3.58e-06 [receive_attached]: 1.70991e-06 [after_resolve]: 1.14301e-05 [a_after_grad]: 1.23098e-05 [special_op_eliminate]: 7.58003e-06 [renormalize]: 0.00041851 [add_forward_monad_depend]: 2.65008e-06 [auto_monad_grad]: 1.45007e-06 [auto_monad_eliminator]: 2.483e-05 [cse]: 2.44598e-05 [a_3]: 5.74603e-05 [Cycle 2]: 0.00077119, [43] [expand_dump_flag]: 8.79634e-07 [switch_simplify]: 9.50973e-06 [loop_unroll]: 7.81985e-06 [a_1]: 0.0002029 [recompute_prepare]: 7.41985e-06 [updatestate_depend_eliminate]: 5.52041e-06 [updatestate_assign_eliminate]: 4.90015e-06 [updatestate_loads_eliminate]: 4.73997e-06 [parameter_eliminate]: 1.03004e-06 [a_2]: 0.00010336 [accelerated_algorithm]: 8.42987e-06 [shard]: 1.11014e-06 [meta_shard_fg_expand]: 2.38977e-06 [shard_inline]: 1.02003e-05 [auto_parallel]: 1.03302e-05 [parallel]: 2.95974e-06 [flash_sp]: 2.39024e-06 [merge_comm]: 5.49993e-06 [allreduce_fusion]: 4.73019e-06 [matmul_add_comm_reduction]: 7.92975e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.74e-06 [virtual_dataset]: 7.34022e-06 [get_grad_eliminate_]: 7.41985e-06 [virtual_output]: 6.92997e-06 [merge_forward]: 4.46988e-06 [cell_reuse_recompute_pass]: 1.66008e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.41603e-05 [before_grad]: 1.234e-05 [inplace_validation]: 4.25009e-06 [meta_fg_expand]: 4.60027e-06 [inplace_validation_after_expand]: 5.09014e-06 [flash_sp_send_recv_attached]: 7.89762e-07 [receive_attached]: 7.70204e-07 [after_resolve]: 9.79006e-06 [a_after_grad]: 1.194e-05 [special_op_eliminate]: 7.44965e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 7.79983e-07 [auto_monad_grad]: 9.29926e-07 [auto_monad_eliminator]: 1.63899e-05 [cse]: 1.86698e-05 [a_3]: 4.97601e-05 [py_interpret_to_execute_after_opt_a]: 8.52998e-06 [slice_cell_reuse_recomputed_activation]: 1.85985e-06 [rewriter_after_opt_a]: 0.00013223 [convert_after_rewriter]: 8.15e-06 [order_py_execute_after_rewriter]: 5.93998e-06 [opt_b]: 0.00024319, [1] [Cycle 1]: 0.00023787, [7] [b_1]: 0.00016497 [b_2]: 9.76026e-06 [updatestate_depend_eliminate]: 5.22006e-06 [updatestate_assign_eliminate]: 4.58024e-06 [updatestate_loads_eliminate]: 4.6799e-06 [renormalize]: 2.90107e-07 [cse]: 1.80802e-05 [optimize_parallel_all_gather_comm]: 7.47992e-06 [overlap_param_gather]: 9.59728e-07 [cconv]: 1.609e-05 [loop_unroll]: 0.00047922 [opt_after_cconv]: 0.00012628, [1] [Cycle 1]: 0.0001204, [7] [c_1]: 5.16302e-05 [parameter_eliminate]: 1.79e-06 [updatestate_depend_eliminate]: 7.16001e-06 [updatestate_assign_eliminate]: 4.52017e-06 [updatestate_loads_eliminate]: 4.6799e-06 [cse]: 1.948e-05 [renormalize]: 3.50177e-07 [remove_dup_value]: 1.01998e-05 [tuple_transform]: 6.70999e-05, [1] [Cycle 1]: 6.28401e-05, [2] [d_1]: 5.347e-05 [renormalize]: 1.80211e-07 [partial_unused_args_eliminate]: 1.4198e-06 [add_cache_embedding]: 1.15703e-05 [add_recomputation]: 5.31301e-05 [cse_after_recomputation]: 2.60998e-05, [1] [Cycle 1]: 2.17604e-05, [1] [cse]: 1.64602e-05 [environ_conv]: 6.14999e-06 [swap_dp_allreduce_reducescatter]: 6.65011e-06 [bias_add_comm_swap]: 1.39e-06 [label_micro_interleaved_index]: 1.43005e-06 [label_fine_grained_interleaved_index]: 1.13016e-06 [merge_cast_opt]: 5.99772e-07 [slice_recompute_activation]: 8.50298e-07 [micro_interleaved_order_control]: 9.80217e-07 [assign_add_opt]: 6.71996e-06 [ForceFp32Comm]: 5.09899e-07 [remove_cast_before_assign_add]: 6.19795e-07 [full_micro_interleaved_order_control]: 1.14972e-06 [reorder_send_recv_between_fp_bp]: 9.29926e-07 [comm_op_add_attrs]: 5.09899e-07 [add_comm_op_reuse_tag]: 4.80097e-07 [interleave_split_concat_branches]: 4.70318e-07 [interleave_parallel_branches]: 4.69852e-07 [overlap_opt_shard_in_pipeline]: 6.39819e-07 [overlap_opt_shard_grad_in_pipeline]: 1.03004e-06 [control_data_broadcast_order]: 5.59725e-07 [grouped_pairwise_exchange_alltoall]: 5.20144e-07 [offloading_packed_experts]: 5.60191e-07 [overlap_recompute_and_grad_model_parallel]: 1.26008e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.30271e-07 [overlap_recompute_allgather_and_fa_grad]: 5.0012e-07 [overlap_grad_ring_attention]: 1.22003e-06 [overlap_grad_flash_sp]: 1.20099e-05 [begin_end_overlap_inline]: 3.90224e-07 [split_matmul_comm_elemetwise]: 1.05007e-06 [split_layernorm_comm]: 9.59728e-07 [handle_group_info]: 4.89876e-07 [symbol_engine_optimizer]: 8.37501e-05, [1] [Cycle 1]: 7.96197e-05, [6] [build]: 3.47989e-06 [elim_shapecalc]: 1.19796e-05 [elim_not_effective]: 1.60802e-05 [opt_reshape]: 9.13022e-06 [fold_const_symbol]: 1.31498e-05 [renormalize]: 1.99769e-07 [pipeline_parallel_scheduler]: 8.30274e-07 [auto_monad_reorder]: 2.24202e-05 [get_jit_bprop_graph]: 3.1013e-07 [rewriter_after_jit_bprop_graph]: 2.5006e-07 [eliminate_special_op_node]: 0.00050749 [distribtued_split]: 3.26699e-05 [validate]: 2.91001e-05 [task_emit]: 0.0688993 [execute]: 9.01986e-06 Sums bootstrap : 0.000281s : 0.37% type_inference : 0.002304s : 3.06% auto_monad : 0.000108s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000023s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000592s : 0.79% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000219s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000019s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000030s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000419s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.05% optimize.opt_a.cse : 0.000043s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000132s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000479s : 0.64% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000019s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000053s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000000s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000000s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000507s : 0.67% distribtued_split : 0.000033s : 0.04% validate : 0.000029s : 0.04% task_emit : 0.068899s : 91.41% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000112 63 4.78% : 0.000005s : 2: substitution.depend_value_elim 2.09% : 0.000002s : 5: substitution.elim_not_effective 1.75% : 0.000002s : 5: substitution.fold_const_symbol 5.31% : 0.000006s : 6: substitution.graph_param_transform 48.30% : 0.000054s : 1: substitution.inline 4.48% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.75% : 0.000004s : 6: substitution.load_eliminater 2.28% : 0.000003s : 2: substitution.reduce_all_const_elim 6.47% : 0.000007s : 10: substitution.remove_not_recompute_node 2.66% : 0.000003s : 2: substitution.replace_old_param 9.56% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.57% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002278 2 89.44% : 0.002037s : 1: type_inference.infer 10.56% : 0.000241s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000228 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.27% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.91% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.12% : 0.000005s : 25: predicate.arithmetic_simplify 0.88% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.22% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.90% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.64% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.18% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.87% : 0.000004s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.88% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.14% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.43% : 0.000012s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.12% : 0.000003s : 12: predicate.less_batch_normalization 1.64% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.34% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.84% : 0.000002s : 13: predicate.minmaximum_grad 0.66% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.63% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.48% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.86% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.47% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 0.96% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.38% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 1.01% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.05% : 0.000009s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.87% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.92% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.79% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.35% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.48% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.57% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000145 4 7.40% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.60% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088051 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000058s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.14% : 0.000122s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.34% : 0.000304s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000040s : 1: distribtued_split 0.59% : 0.000520s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000489s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.29% : 0.001139s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.18% : 0.000155s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.05% : 0.000044s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.00% : 0.005280s : 1: opt_a 0.15% : 0.000130s : 1: opt_after_cconv 0.28% : 0.000246s : 1: opt_b 7.81% : 0.006879s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000029s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.25% : 0.000223s : 1: renormalize.infer 0.22% : 0.000191s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000138s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000087s : 1: symbol_engine_optimizer 78.28% : 0.068925s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.64% : 0.002323s : 1: type_inference 0.07% : 0.000059s : 1: validate TotalTime = 0.0808268, [21] [bootstrap]: 0.00029585 [type_inference]: 0.00257447 [auto_monad]: 0.00012258 [graph_reusing]: 2.4098e-06 [inline]: 1.19023e-06 [parallel-infer-symbol]: 1.74018e-06 [pre_auto_parallel]: 2.224e-05 [insert-virtual-dataset]: 2.70968e-06 [parallel-infer-symbol-second]: 3.30154e-07 [dataset_repeat_opt]: 1.46031e-06 [pipeline_split]: 1.13994e-06 [optimize]: 0.00702803, [52] [py_interpret_to_execute]: 1.434e-05 [rewriter_before_opt_a]: 3.28901e-05 [opt_a]: 0.00537814, [2] [Cycle 1]: 0.00151414, [43] [expand_dump_flag]: 2.80002e-06 [switch_simplify]: 2.80701e-05 [loop_unroll]: 1.35899e-05 [a_1]: 0.00034063 [recompute_prepare]: 8.74978e-06 [updatestate_depend_eliminate]: 8.97981e-06 [updatestate_assign_eliminate]: 5.47012e-06 [updatestate_loads_eliminate]: 6.94999e-06 [parameter_eliminate]: 3.00026e-06 [a_2]: 0.00011819 [accelerated_algorithm]: 8.64966e-06 [shard]: 2.23983e-06 [meta_shard_fg_expand]: 3.9204e-06 [shard_inline]: 8.6003e-06 [auto_parallel]: 1.21901e-05 [parallel]: 6.8699e-06 [flash_sp]: 9.58983e-06 [merge_comm]: 7.81985e-06 [allreduce_fusion]: 5.65965e-06 [matmul_add_comm_reduction]: 9.85991e-06 [allreduce_slice_to_reducescatter]: 5.09899e-07 [virtual_shard_identity]: 9.6499e-06 [virtual_dataset]: 8.1202e-06 [get_grad_eliminate_]: 7.87992e-06 [virtual_output]: 8.05035e-06 [merge_forward]: 5.49993e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65598e-05 [before_grad]: 1.35801e-05 [inplace_validation]: 5.17024e-06 [meta_fg_expand]: 4.84008e-06 [inplace_validation_after_expand]: 6.13974e-06 [flash_sp_send_recv_attached]: 4.14997e-06 [receive_attached]: 2.20025e-06 [after_resolve]: 1.12401e-05 [a_after_grad]: 1.29901e-05 [special_op_eliminate]: 7.88039e-06 [renormalize]: 0.00044328 [add_forward_monad_depend]: 3.2098e-06 [auto_monad_grad]: 1.76998e-06 [auto_monad_eliminator]: 2.97702e-05 [cse]: 2.83802e-05 [a_3]: 5.76e-05 [Cycle 2]: 0.00078264, [43] [expand_dump_flag]: 1.09989e-06 [switch_simplify]: 9.07015e-06 [loop_unroll]: 9.75002e-06 [a_1]: 0.00019964 [recompute_prepare]: 7.35978e-06 [updatestate_depend_eliminate]: 5.95022e-06 [updatestate_assign_eliminate]: 5.03985e-06 [updatestate_loads_eliminate]: 5.21028e-06 [parameter_eliminate]: 1.28988e-06 [a_2]: 0.00010522 [accelerated_algorithm]: 7.98004e-06 [shard]: 1.44029e-06 [meta_shard_fg_expand]: 2.78978e-06 [shard_inline]: 7.79005e-06 [auto_parallel]: 1.093e-05 [parallel]: 3.36021e-06 [flash_sp]: 3.62983e-06 [merge_comm]: 6.12997e-06 [allreduce_fusion]: 5.43008e-06 [matmul_add_comm_reduction]: 7.96001e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 9.90974e-06 [virtual_dataset]: 8.02986e-06 [get_grad_eliminate_]: 7.99028e-06 [virtual_output]: 7.09994e-06 [merge_forward]: 4.72972e-06 [cell_reuse_recompute_pass]: 1.8701e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.58399e-05 [before_grad]: 1.26804e-05 [inplace_validation]: 4.56022e-06 [meta_fg_expand]: 4.6296e-06 [inplace_validation_after_expand]: 5.3402e-06 [flash_sp_send_recv_attached]: 9.20147e-07 [receive_attached]: 6.20261e-07 [after_resolve]: 9.68995e-06 [a_after_grad]: 1.21999e-05 [special_op_eliminate]: 7.16979e-06 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 9.20147e-07 [auto_monad_grad]: 1.13016e-06 [auto_monad_eliminator]: 1.91201e-05 [cse]: 1.95699e-05 [a_3]: 4.907e-05 [py_interpret_to_execute_after_opt_a]: 9.34955e-06 [slice_cell_reuse_recomputed_activation]: 2.02004e-06 [rewriter_after_opt_a]: 0.00013305 [convert_after_rewriter]: 8.83965e-06 [order_py_execute_after_rewriter]: 5.74999e-06 [opt_b]: 0.00024129, [1] [Cycle 1]: 0.00023561, [7] [b_1]: 0.00016244 [b_2]: 9.76026e-06 [updatestate_depend_eliminate]: 5.53019e-06 [updatestate_assign_eliminate]: 4.57e-06 [updatestate_loads_eliminate]: 5.24009e-06 [renormalize]: 2.90107e-07 [cse]: 1.88299e-05 [optimize_parallel_all_gather_comm]: 8.10018e-06 [overlap_param_gather]: 1.13994e-06 [cconv]: 2.13198e-05 [loop_unroll]: 0.00047147 [opt_after_cconv]: 0.00013472, [1] [Cycle 1]: 0.00012838, [7] [c_1]: 5.35399e-05 [parameter_eliminate]: 2.37999e-06 [updatestate_depend_eliminate]: 8.18027e-06 [updatestate_assign_eliminate]: 4.82006e-06 [updatestate_loads_eliminate]: 5.29038e-06 [cse]: 2.24598e-05 [renormalize]: 2.99886e-07 [remove_dup_value]: 1.33198e-05 [tuple_transform]: 6.88401e-05, [1] [Cycle 1]: 6.396e-05, [2] [d_1]: 5.446e-05 [renormalize]: 1.99769e-07 [partial_unused_args_eliminate]: 1.74996e-06 [add_cache_embedding]: 1.352e-05 [add_recomputation]: 6.37998e-05 [cse_after_recomputation]: 2.67499e-05, [1] [Cycle 1]: 2.23801e-05, [1] [cse]: 1.75401e-05 [environ_conv]: 7.02962e-06 [swap_dp_allreduce_reducescatter]: 7.11996e-06 [bias_add_comm_swap]: 2.21981e-06 [label_micro_interleaved_index]: 1.56974e-06 [label_fine_grained_interleaved_index]: 1.83005e-06 [merge_cast_opt]: 1.19023e-06 [slice_recompute_activation]: 1.34995e-06 [micro_interleaved_order_control]: 1.73971e-06 [assign_add_opt]: 7.49016e-06 [ForceFp32Comm]: 1.03004e-06 [remove_cast_before_assign_add]: 8.89879e-07 [full_micro_interleaved_order_control]: 2.20025e-06 [reorder_send_recv_between_fp_bp]: 1.26986e-06 [comm_op_add_attrs]: 7.59959e-07 [add_comm_op_reuse_tag]: 7.59959e-07 [interleave_split_concat_branches]: 7.79983e-07 [interleave_parallel_branches]: 6.3004e-07 [overlap_opt_shard_in_pipeline]: 9.89996e-07 [overlap_opt_shard_grad_in_pipeline]: 1.91992e-06 [control_data_broadcast_order]: 9.89996e-07 [grouped_pairwise_exchange_alltoall]: 8.801e-07 [offloading_packed_experts]: 9.59728e-07 [overlap_recompute_and_grad_model_parallel]: 1.47009e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.29923e-07 [overlap_recompute_allgather_and_fa_grad]: 8.99658e-07 [overlap_grad_ring_attention]: 1.4999e-06 [overlap_grad_flash_sp]: 1.36099e-05 [begin_end_overlap_inline]: 5.69969e-07 [split_matmul_comm_elemetwise]: 1.83983e-06 [split_layernorm_comm]: 1.60979e-06 [handle_group_info]: 7.39936e-07 [symbol_engine_optimizer]: 9.80799e-05, [1] [Cycle 1]: 9.35304e-05, [6] [build]: 3.72995e-06 [elim_shapecalc]: 1.209e-05 [elim_not_effective]: 1.60597e-05 [opt_reshape]: 8.64966e-06 [fold_const_symbol]: 1.44597e-05 [renormalize]: 2.20258e-07 [pipeline_parallel_scheduler]: 1.34995e-06 [auto_monad_reorder]: 2.75299e-05 [get_jit_bprop_graph]: 4.29805e-07 [rewriter_after_jit_bprop_graph]: 4.09782e-07 [eliminate_special_op_node]: 0.00048414 [distribtued_split]: 3.80501e-05 [validate]: 3.35299e-05 [task_emit]: 0.0699348 [execute]: 1.07298e-05 Sums bootstrap : 0.000296s : 0.39% type_inference : 0.002574s : 3.35% auto_monad : 0.000123s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000037s : 0.05% optimize.opt_a.loop_unroll : 0.000023s : 0.03% optimize.opt_a.a_1 : 0.000540s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000009s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000443s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000048s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000133s : 0.17% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000021s : 0.03% optimize.loop_unroll : 0.000471s : 0.61% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000028s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000484s : 0.63% distribtued_split : 0.000038s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.069935s : 91.10% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000126 63 5.29% : 0.000007s : 2: substitution.depend_value_elim 1.99% : 0.000003s : 5: substitution.elim_not_effective 2.22% : 0.000003s : 5: substitution.fold_const_symbol 5.04% : 0.000006s : 6: substitution.graph_param_transform 50.26% : 0.000063s : 1: substitution.inline 4.53% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.15% : 0.000004s : 6: substitution.load_eliminater 2.10% : 0.000003s : 2: substitution.reduce_all_const_elim 5.99% : 0.000008s : 10: substitution.remove_not_recompute_node 2.52% : 0.000003s : 2: substitution.replace_old_param 8.67% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.24% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002546 2 89.67% : 0.002283s : 1: type_inference.infer 10.33% : 0.000263s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000062 1 100.00% : 0.000062s : 1: match.inline ------[predicate.] 0.000228 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.18% : 0.000005s : 25: predicate.arithmetic_simplify 0.80% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.47% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.90% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.27% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.20% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 2.00% : 0.000005s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.35% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000000s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.49% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.69% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.47% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.39% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.81% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.87% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.84% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.23% : 0.000003s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.20% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.78% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.98% : 0.000002s : 12: predicate.shard_identity_eliminate 1.31% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.17% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.21% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.15% : 0.000009s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.76% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.76% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.53% : 0.000003s : 25: predicate.tuple_list_get_set_item_eliminator 2.43% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.47% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.31% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.87% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000154 4 9.44% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.56% : 0.000139s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089677 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000136s : 1: auto_monad 0.04% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000320s : 1: bootstrap 0.03% : 0.000025s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000046s : 1: distribtued_split 0.55% : 0.000497s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000480s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001097s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.00% : 0.005382s : 1: opt_a 0.15% : 0.000139s : 1: opt_after_cconv 0.27% : 0.000244s : 1: opt_b 7.85% : 0.007037s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000028s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000240s : 1: renormalize.infer 0.22% : 0.000198s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000139s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000101s : 1: symbol_engine_optimizer 78.02% : 0.069965s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.89% : 0.002593s : 1: type_inference 0.07% : 0.000067s : 1: validate TotalTime = 0.0813564, [21] [bootstrap]: 0.00032455 [type_inference]: 0.0025997 [auto_monad]: 0.00013639 [graph_reusing]: 2.61003e-06 [inline]: 1.51992e-06 [parallel-infer-symbol]: 2.86987e-06 [pre_auto_parallel]: 2.89297e-05 [insert-virtual-dataset]: 3.08035e-06 [parallel-infer-symbol-second]: 4.20026e-07 [dataset_repeat_opt]: 1.28988e-06 [pipeline_split]: 1.87987e-06 [optimize]: 0.00722282, [52] [py_interpret_to_execute]: 1.56402e-05 [rewriter_before_opt_a]: 3.66e-05 [opt_a]: 0.00547826, [2] [Cycle 1]: 0.00157421, [43] [expand_dump_flag]: 3.2098e-06 [switch_simplify]: 3.07797e-05 [loop_unroll]: 1.30096e-05 [a_1]: 0.00034882 [recompute_prepare]: 9.33977e-06 [updatestate_depend_eliminate]: 8.92999e-06 [updatestate_assign_eliminate]: 6.08992e-06 [updatestate_loads_eliminate]: 7.30017e-06 [parameter_eliminate]: 3.46033e-06 [a_2]: 0.00011894 [accelerated_algorithm]: 8.57981e-06 [shard]: 2.53972e-06 [meta_shard_fg_expand]: 3.87011e-06 [shard_inline]: 8.69995e-06 [auto_parallel]: 1.19298e-05 [parallel]: 8.10018e-06 [flash_sp]: 1.13696e-05 [merge_comm]: 8.50996e-06 [allreduce_fusion]: 5.13997e-06 [matmul_add_comm_reduction]: 1.049e-05 [allreduce_slice_to_reducescatter]: 4.79631e-07 [virtual_shard_identity]: 9.24012e-06 [virtual_dataset]: 7.92043e-06 [get_grad_eliminate_]: 7.53021e-06 [virtual_output]: 7.44034e-06 [merge_forward]: 5.92973e-06 [cell_reuse_recompute_pass]: 1.88965e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.73603e-05 [before_grad]: 1.36802e-05 [inplace_validation]: 5.67967e-06 [meta_fg_expand]: 5.39981e-06 [inplace_validation_after_expand]: 6.71996e-06 [flash_sp_send_recv_attached]: 4.99003e-06 [receive_attached]: 3.03984e-06 [after_resolve]: 1.18101e-05 [a_after_grad]: 1.25603e-05 [special_op_eliminate]: 8.27014e-06 [renormalize]: 0.0004609 [add_forward_monad_depend]: 3.98979e-06 [auto_monad_grad]: 2.02004e-06 [auto_monad_eliminator]: 3.32599e-05 [cse]: 3.458e-05 [a_3]: 5.81602e-05 [Cycle 2]: 0.00078373, [43] [expand_dump_flag]: 1.03982e-06 [switch_simplify]: 9.18005e-06 [loop_unroll]: 7.53021e-06 [a_1]: 0.0002032 [recompute_prepare]: 7.41007e-06 [updatestate_depend_eliminate]: 6.04009e-06 [updatestate_assign_eliminate]: 5.22006e-06 [updatestate_loads_eliminate]: 5.60982e-06 [parameter_eliminate]: 1.51014e-06 [a_2]: 0.00010542 [accelerated_algorithm]: 8.39029e-06 [shard]: 1.18976e-06 [meta_shard_fg_expand]: 2.45031e-06 [shard_inline]: 8.32975e-06 [auto_parallel]: 1.12997e-05 [parallel]: 3.98001e-06 [flash_sp]: 3.79002e-06 [merge_comm]: 6.17001e-06 [allreduce_fusion]: 4.78001e-06 [matmul_add_comm_reduction]: 8.04011e-06 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 8.45967e-06 [virtual_dataset]: 7.45011e-06 [get_grad_eliminate_]: 7.26013e-06 [virtual_output]: 7.05989e-06 [merge_forward]: 4.97e-06 [cell_reuse_recompute_pass]: 2.06986e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.56201e-05 [before_grad]: 1.249e-05 [inplace_validation]: 4.63985e-06 [meta_fg_expand]: 4.75999e-06 [inplace_validation_after_expand]: 5.37001e-06 [flash_sp_send_recv_attached]: 9.19681e-07 [receive_attached]: 8.09785e-07 [after_resolve]: 1.02101e-05 [a_after_grad]: 1.23498e-05 [special_op_eliminate]: 7.6904e-06 [renormalize]: 6.00703e-08 [add_forward_monad_depend]: 9.89996e-07 [auto_monad_grad]: 1.17021e-06 [auto_monad_eliminator]: 1.889e-05 [cse]: 1.89398e-05 [a_3]: 4.831e-05 [py_interpret_to_execute_after_opt_a]: 9.62988e-06 [slice_cell_reuse_recomputed_activation]: 2.72039e-06 [rewriter_after_opt_a]: 0.00014867 [convert_after_rewriter]: 9.09995e-06 [order_py_execute_after_rewriter]: 6.94999e-06 [opt_b]: 0.0002444, [1] [Cycle 1]: 0.00023822, [7] [b_1]: 0.0001621 [b_2]: 9.66014e-06 [updatestate_depend_eliminate]: 5.51995e-06 [updatestate_assign_eliminate]: 4.79957e-06 [updatestate_loads_eliminate]: 5.39981e-06 [renormalize]: 3.7998e-07 [cse]: 1.87899e-05 [optimize_parallel_all_gather_comm]: 8.53976e-06 [overlap_param_gather]: 1.69966e-06 [cconv]: 2.38898e-05 [loop_unroll]: 0.00049749 [opt_after_cconv]: 0.00013493, [1] [Cycle 1]: 0.00012854, [7] [c_1]: 5.38998e-05 [parameter_eliminate]: 2.56021e-06 [updatestate_depend_eliminate]: 8.33999e-06 [updatestate_assign_eliminate]: 4.60027e-06 [updatestate_loads_eliminate]: 5.4203e-06 [cse]: 2.15997e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.27899e-05 [tuple_transform]: 6.82999e-05, [1] [Cycle 1]: 6.36098e-05, [2] [d_1]: 5.463e-05 [renormalize]: 1.79745e-07 [partial_unused_args_eliminate]: 2.19001e-06 [add_cache_embedding]: 1.39498e-05 [add_recomputation]: 9.04896e-05 [cse_after_recomputation]: 3.00501e-05, [1] [Cycle 1]: 2.46502e-05, [1] [cse]: 1.91e-05 [environ_conv]: 7.43009e-06 [swap_dp_allreduce_reducescatter]: 8.63988e-06 [bias_add_comm_swap]: 2.4098e-06 [label_micro_interleaved_index]: 1.94972e-06 [label_fine_grained_interleaved_index]: 2.33017e-06 [merge_cast_opt]: 1.34995e-06 [slice_recompute_activation]: 2.09035e-06 [micro_interleaved_order_control]: 1.84961e-06 [assign_add_opt]: 8.6599e-06 [ForceFp32Comm]: 8.79634e-07 [remove_cast_before_assign_add]: 1.09989e-06 [full_micro_interleaved_order_control]: 2.25008e-06 [reorder_send_recv_between_fp_bp]: 2.08011e-06 [comm_op_add_attrs]: 1.08965e-06 [add_comm_op_reuse_tag]: 1.13016e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 6.9011e-07 [overlap_opt_shard_in_pipeline]: 1.36998e-06 [overlap_opt_shard_grad_in_pipeline]: 2.59979e-06 [control_data_broadcast_order]: 1.20001e-06 [grouped_pairwise_exchange_alltoall]: 1.85007e-06 [offloading_packed_experts]: 1.17021e-06 [overlap_recompute_and_grad_model_parallel]: 2.21003e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.15018e-06 [overlap_recompute_allgather_and_fa_grad]: 8.00006e-07 [overlap_grad_ring_attention]: 1.91014e-06 [overlap_grad_flash_sp]: 1.392e-05 [begin_end_overlap_inline]: 8.70321e-07 [split_matmul_comm_elemetwise]: 2.36044e-06 [split_layernorm_comm]: 1.88034e-06 [handle_group_info]: 1.0198e-06 [symbol_engine_optimizer]: 8.43396e-05, [1] [Cycle 1]: 7.98199e-05, [6] [build]: 3.85009e-06 [elim_shapecalc]: 1.234e-05 [elim_not_effective]: 1.60402e-05 [opt_reshape]: 8.86014e-06 [fold_const_symbol]: 1.35503e-05 [renormalize]: 2.39816e-07 [pipeline_parallel_scheduler]: 1.47009e-06 [auto_monad_reorder]: 3.01204e-05 [get_jit_bprop_graph]: 4.80097e-07 [rewriter_after_jit_bprop_graph]: 4.29805e-07 [eliminate_special_op_node]: 0.00051495 [distribtued_split]: 4.34001e-05 [validate]: 3.536e-05 [task_emit]: 0.0701498 [execute]: 1.07903e-05 Sums bootstrap : 0.000325s : 0.42% type_inference : 0.002600s : 3.37% auto_monad : 0.000136s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000029s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000552s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000461s : 0.60% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.07% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000149s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000497s : 0.64% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000090s : 0.12% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000009s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000515s : 0.67% distribtued_split : 0.000043s : 0.06% validate : 0.000035s : 0.05% task_emit : 0.070150s : 90.81% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000136 63 5.08% : 0.000007s : 2: substitution.depend_value_elim 1.98% : 0.000003s : 5: substitution.elim_not_effective 1.84% : 0.000002s : 5: substitution.fold_const_symbol 5.21% : 0.000007s : 6: substitution.graph_param_transform 51.43% : 0.000070s : 1: substitution.inline 3.81% : 0.000005s : 10: substitution.j_node_and_user_rematch 2.99% : 0.000004s : 6: substitution.load_eliminater 2.62% : 0.000004s : 2: substitution.reduce_all_const_elim 5.74% : 0.000008s : 10: substitution.remove_not_recompute_node 2.49% : 0.000003s : 2: substitution.replace_old_param 8.96% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.85% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002568 2 88.70% : 0.002278s : 1: type_inference.infer 11.30% : 0.000290s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000069 1 100.00% : 0.000069s : 1: match.inline ------[predicate.] 0.000231 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 1.16% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.83% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.20% : 0.000005s : 25: predicate.arithmetic_simplify 0.77% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.56% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.82% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_depend_swap 1.84% : 0.000004s : 31: predicate.environ_get_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.36% : 0.000003s : 14: predicate.float_depend_g_call 0.79% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.86% : 0.000014s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.76% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.34% : 0.000005s : 38: predicate.load_eliminater 1.30% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.81% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.42% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000003s : 13: predicate.reduce_eliminate 0.63% : 0.000001s : 12: predicate.remove_not_recompute_node 1.21% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.78% : 0.000002s : 13: predicate.reshape_eliminate 0.77% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.46% : 0.000001s : 6: predicate.row_tensor_eliminate 1.01% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.87% : 0.000002s : 12: predicate.shard_identity_eliminate 1.33% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.03% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.45% : 0.000010s : 43: predicate.switch_simplify 0.82% : 0.000002s : 13: predicate.tile_eliminate 0.87% : 0.000002s : 13: predicate.transpose_eliminate 1.92% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.62% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.84% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.50% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.75% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000172 4 9.80% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.20% : 0.000155s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090431 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.11% : 0.000096s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.16% : 0.000149s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000348s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.04% : 0.000033s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000051s : 1: distribtued_split 0.59% : 0.000529s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000507s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001110s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.06% : 0.005482s : 1: opt_a 0.15% : 0.000139s : 1: opt_after_cconv 0.27% : 0.000247s : 1: opt_b 8.00% : 0.007231s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000036s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000247s : 1: renormalize.infer 0.23% : 0.000209s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000155s : 1: rewriter_after_opt_a 0.05% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000088s : 1: symbol_engine_optimizer 77.60% : 0.070176s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.90% : 0.002619s : 1: type_inference 0.08% : 0.000075s : 1: validate TotalTime = 0.0816968, [21] [bootstrap]: 0.00031727 [type_inference]: 0.00267217 [auto_monad]: 0.00012946 [graph_reusing]: 2.54018e-06 [inline]: 1.45007e-06 [parallel-infer-symbol]: 1.5297e-06 [pre_auto_parallel]: 2.55499e-05 [insert-virtual-dataset]: 2.75997e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 1.43982e-06 [pipeline_split]: 1.31037e-06 [optimize]: 0.00752359, [52] [py_interpret_to_execute]: 1.52397e-05 [rewriter_before_opt_a]: 3.48398e-05 [opt_a]: 0.00567413, [2] [Cycle 1]: 0.00154354, [43] [expand_dump_flag]: 3.93996e-06 [switch_simplify]: 2.938e-05 [loop_unroll]: 1.30096e-05 [a_1]: 0.00034174 [recompute_prepare]: 8.70042e-06 [updatestate_depend_eliminate]: 8.93977e-06 [updatestate_assign_eliminate]: 5.84964e-06 [updatestate_loads_eliminate]: 7.34022e-06 [parameter_eliminate]: 3.24985e-06 [a_2]: 0.00011671 [accelerated_algorithm]: 8.6003e-06 [shard]: 2.35019e-06 [meta_shard_fg_expand]: 3.56976e-06 [shard_inline]: 8.35024e-06 [auto_parallel]: 1.20797e-05 [parallel]: 7.20005e-06 [flash_sp]: 9.28994e-06 [merge_comm]: 7.12974e-06 [allreduce_fusion]: 4.92996e-06 [matmul_add_comm_reduction]: 9.28994e-06 [allreduce_slice_to_reducescatter]: 4.10248e-07 [virtual_shard_identity]: 9.62988e-06 [virtual_dataset]: 8.18027e-06 [get_grad_eliminate_]: 7.60984e-06 [virtual_output]: 7.93999e-06 [merge_forward]: 5.81983e-06 [cell_reuse_recompute_pass]: 2.02982e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.67298e-05 [before_grad]: 1.32699e-05 [inplace_validation]: 5.01005e-06 [meta_fg_expand]: 5.65965e-06 [inplace_validation_after_expand]: 6.21984e-06 [flash_sp_send_recv_attached]: 4.76977e-06 [receive_attached]: 2.87965e-06 [after_resolve]: 1.095e-05 [a_after_grad]: 1.26399e-05 [special_op_eliminate]: 7.39004e-06 [renormalize]: 0.00045818 [add_forward_monad_depend]: 3.39001e-06 [auto_monad_grad]: 2.01026e-06 [auto_monad_eliminator]: 3.24398e-05 [cse]: 3.1e-05 [a_3]: 5.699e-05 [Cycle 2]: 0.00078977, [43] [expand_dump_flag]: 1.19023e-06 [switch_simplify]: 9.47015e-06 [loop_unroll]: 7.79005e-06 [a_1]: 0.00021111 [recompute_prepare]: 7.07014e-06 [updatestate_depend_eliminate]: 5.81006e-06 [updatestate_assign_eliminate]: 4.68967e-06 [updatestate_loads_eliminate]: 5.41005e-06 [parameter_eliminate]: 1.28988e-06 [a_2]: 0.00010556 [accelerated_algorithm]: 8.27992e-06 [shard]: 1.30991e-06 [meta_shard_fg_expand]: 2.8098e-06 [shard_inline]: 7.75e-06 [auto_parallel]: 1.09999e-05 [parallel]: 3.56976e-06 [flash_sp]: 3.41982e-06 [merge_comm]: 5.91995e-06 [allreduce_fusion]: 4.63007e-06 [matmul_add_comm_reduction]: 8.11974e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.7698e-06 [virtual_dataset]: 7.39982e-06 [get_grad_eliminate_]: 7.30995e-06 [virtual_output]: 6.97002e-06 [merge_forward]: 4.56022e-06 [cell_reuse_recompute_pass]: 2.11969e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.493e-05 [before_grad]: 1.20401e-05 [inplace_validation]: 4.39957e-06 [meta_fg_expand]: 4.91971e-06 [inplace_validation_after_expand]: 5.46034e-06 [flash_sp_send_recv_attached]: 8.40053e-07 [receive_attached]: 8.29808e-07 [after_resolve]: 9.96981e-06 [a_after_grad]: 1.215e-05 [special_op_eliminate]: 7.56979e-06 [renormalize]: 1.00117e-07 [add_forward_monad_depend]: 8.00006e-07 [auto_monad_grad]: 1.09011e-06 [auto_monad_eliminator]: 1.80802e-05 [cse]: 1.946e-05 [a_3]: 4.91203e-05 [py_interpret_to_execute_after_opt_a]: 9.07015e-06 [slice_cell_reuse_recomputed_activation]: 2.36975e-06 [rewriter_after_opt_a]: 0.0001433 [convert_after_rewriter]: 9.79006e-06 [order_py_execute_after_rewriter]: 1.20201e-05 [opt_b]: 0.00024539, [1] [Cycle 1]: 0.00023963, [7] [b_1]: 0.0001642 [b_2]: 9.67039e-06 [updatestate_depend_eliminate]: 5.28013e-06 [updatestate_assign_eliminate]: 4.82006e-06 [updatestate_loads_eliminate]: 5.20004e-06 [renormalize]: 2.19792e-07 [cse]: 1.96402e-05 [optimize_parallel_all_gather_comm]: 8.83033e-06 [overlap_param_gather]: 1.20001e-06 [cconv]: 2.29902e-05 [loop_unroll]: 0.00064146 [opt_after_cconv]: 0.00013647, [1] [Cycle 1]: 0.00013009, [7] [c_1]: 5.61401e-05 [parameter_eliminate]: 2.44007e-06 [updatestate_depend_eliminate]: 8.02008e-06 [updatestate_assign_eliminate]: 4.84008e-06 [updatestate_loads_eliminate]: 5.03007e-06 [cse]: 2.148e-05 [renormalize]: 3.90224e-07 [remove_dup_value]: 1.375e-05 [tuple_transform]: 6.96001e-05, [1] [Cycle 1]: 6.52e-05, [2] [d_1]: 5.581e-05 [renormalize]: 2.99886e-07 [partial_unused_args_eliminate]: 1.89012e-06 [add_cache_embedding]: 1.44201e-05 [add_recomputation]: 6.141e-05 [cse_after_recomputation]: 2.65301e-05, [1] [Cycle 1]: 2.16896e-05, [1] [cse]: 1.67997e-05 [environ_conv]: 7.41007e-06 [swap_dp_allreduce_reducescatter]: 7.09016e-06 [bias_add_comm_swap]: 2.14996e-06 [label_micro_interleaved_index]: 2.19978e-06 [label_fine_grained_interleaved_index]: 1.91992e-06 [merge_cast_opt]: 1.22981e-06 [slice_recompute_activation]: 1.87987e-06 [micro_interleaved_order_control]: 1.88034e-06 [assign_add_opt]: 7.86036e-06 [ForceFp32Comm]: 8.00006e-07 [remove_cast_before_assign_add]: 1.05985e-06 [full_micro_interleaved_order_control]: 2.2701e-06 [reorder_send_recv_between_fp_bp]: 2.19001e-06 [comm_op_add_attrs]: 1.09011e-06 [add_comm_op_reuse_tag]: 9.89996e-07 [interleave_split_concat_branches]: 8.2003e-07 [interleave_parallel_branches]: 6.99889e-07 [overlap_opt_shard_in_pipeline]: 1.39e-06 [overlap_opt_shard_grad_in_pipeline]: 2.29012e-06 [control_data_broadcast_order]: 1.1404e-06 [grouped_pairwise_exchange_alltoall]: 1.36998e-06 [offloading_packed_experts]: 1.29966e-06 [overlap_recompute_and_grad_model_parallel]: 1.8999e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.0198e-06 [overlap_recompute_allgather_and_fa_grad]: 1.34995e-06 [overlap_grad_ring_attention]: 1.60001e-06 [overlap_grad_flash_sp]: 1.54101e-05 [begin_end_overlap_inline]: 8.00006e-07 [split_matmul_comm_elemetwise]: 1.72015e-06 [split_layernorm_comm]: 1.51992e-06 [handle_group_info]: 9.09902e-07 [symbol_engine_optimizer]: 8.39401e-05, [1] [Cycle 1]: 7.95797e-05, [6] [build]: 3.5502e-06 [elim_shapecalc]: 1.22199e-05 [elim_not_effective]: 1.573e-05 [opt_reshape]: 8.84011e-06 [fold_const_symbol]: 1.373e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.34995e-06 [auto_monad_reorder]: 2.84696e-05 [get_jit_bprop_graph]: 4.00003e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00052482 [distribtued_split]: 4.14499e-05 [validate]: 3.597e-05 [task_emit]: 0.0701323 [execute]: 1.07503e-05 Sums bootstrap : 0.000317s : 0.41% type_inference : 0.002672s : 3.45% auto_monad : 0.000129s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000553s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000458s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000050s : 0.07% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000143s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000012s : 0.02% optimize.opt_b.b_1 : 0.000164s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000641s : 0.83% optimize.opt_after_cconv.c_1 : 0.000056s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000028s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000525s : 0.68% distribtued_split : 0.000041s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.070132s : 90.64% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000129 63 4.85% : 0.000006s : 2: substitution.depend_value_elim 1.80% : 0.000002s : 5: substitution.elim_not_effective 1.90% : 0.000002s : 5: substitution.fold_const_symbol 5.35% : 0.000007s : 6: substitution.graph_param_transform 50.51% : 0.000065s : 1: substitution.inline 3.94% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.46% : 0.000004s : 6: substitution.load_eliminater 2.51% : 0.000003s : 2: substitution.reduce_all_const_elim 5.70% : 0.000007s : 10: substitution.remove_not_recompute_node 2.68% : 0.000003s : 2: substitution.replace_old_param 9.01% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.28% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002643 2 88.86% : 0.002349s : 1: type_inference.infer 11.14% : 0.000294s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000231 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 1.15% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.28% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.43% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.87% : 0.000004s : 31: predicate.environ_get_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.58% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000003s : 12: predicate.less_batch_normalization 1.65% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000006s : 38: predicate.load_eliminater 1.50% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.72% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000003s : 13: predicate.reduce_eliminate 0.61% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.89% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.30% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.43% : 0.000006s : 38: predicate.stopgrad_eliminater 0.47% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.40% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.86% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.70% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.57% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.77% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.33% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.55% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000172 4 9.41% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.59% : 0.000156s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091086 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000143s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000341s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.59% : 0.000539s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.72% : 0.000652s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.04% : 0.000032s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001103s : 80: opt.transform.opt_a 0.06% : 0.000054s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.23% : 0.005678s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.27% : 0.000248s : 1: opt_b 8.27% : 0.007532s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.02% : 0.000016s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.28% : 0.000256s : 1: renormalize.infer 0.22% : 0.000196s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000149s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000087s : 1: symbol_engine_optimizer 77.03% : 0.070161s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.95% : 0.002691s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.0832551, [21] [bootstrap]: 0.00033719 [type_inference]: 0.00266585 [auto_monad]: 0.00013937 [graph_reusing]: 2.37022e-06 [inline]: 1.13994e-06 [parallel-infer-symbol]: 1.64006e-06 [pre_auto_parallel]: 2.76701e-05 [insert-virtual-dataset]: 2.68966e-06 [parallel-infer-symbol-second]: 3.80445e-07 [dataset_repeat_opt]: 6.89644e-07 [pipeline_split]: 9.20147e-07 [optimize]: 0.0073878, [52] [py_interpret_to_execute]: 1.491e-05 [rewriter_before_opt_a]: 3.67798e-05 [opt_a]: 0.00562581, [2] [Cycle 1]: 0.00160536, [43] [expand_dump_flag]: 4.28967e-06 [switch_simplify]: 3.10601e-05 [loop_unroll]: 1.32998e-05 [a_1]: 0.00035245 [recompute_prepare]: 8.58027e-06 [updatestate_depend_eliminate]: 8.74e-06 [updatestate_assign_eliminate]: 6.54999e-06 [updatestate_loads_eliminate]: 7.8897e-06 [parameter_eliminate]: 3.64985e-06 [a_2]: 0.00011984 [accelerated_algorithm]: 8.80007e-06 [shard]: 2.52016e-06 [meta_shard_fg_expand]: 3.91016e-06 [shard_inline]: 8.57981e-06 [auto_parallel]: 1.22301e-05 [parallel]: 9.2499e-06 [flash_sp]: 1.20401e-05 [merge_comm]: 9.23034e-06 [allreduce_fusion]: 5.66989e-06 [matmul_add_comm_reduction]: 1.12196e-05 [allreduce_slice_to_reducescatter]: 4.30271e-07 [virtual_shard_identity]: 9.81009e-06 [virtual_dataset]: 8.15e-06 [get_grad_eliminate_]: 7.62986e-06 [virtual_output]: 8.08993e-06 [merge_forward]: 5.79981e-06 [cell_reuse_recompute_pass]: 1.85985e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.80001e-05 [before_grad]: 1.434e-05 [inplace_validation]: 5.0799e-06 [meta_fg_expand]: 5.86035e-06 [inplace_validation_after_expand]: 6.50994e-06 [flash_sp_send_recv_attached]: 4.75021e-06 [receive_attached]: 2.88012e-06 [after_resolve]: 1.16597e-05 [a_after_grad]: 1.274e-05 [special_op_eliminate]: 8.25012e-06 [renormalize]: 0.00046461 [add_forward_monad_depend]: 3.93996e-06 [auto_monad_grad]: 2.25985e-06 [auto_monad_eliminator]: 3.559e-05 [cse]: 3.616e-05 [a_3]: 5.85699e-05 [Cycle 2]: 0.00081157, [43] [expand_dump_flag]: 1.16974e-06 [switch_simplify]: 9.73977e-06 [loop_unroll]: 7.68015e-06 [a_1]: 0.00020658 [recompute_prepare]: 7.41985e-06 [updatestate_depend_eliminate]: 5.55022e-06 [updatestate_assign_eliminate]: 5.0501e-06 [updatestate_loads_eliminate]: 5.11995e-06 [parameter_eliminate]: 1.22981e-06 [a_2]: 0.00010853 [accelerated_algorithm]: 8.78982e-06 [shard]: 1.28988e-06 [meta_shard_fg_expand]: 2.73017e-06 [shard_inline]: 8.02986e-06 [auto_parallel]: 1.08997e-05 [parallel]: 3.81004e-06 [flash_sp]: 3.60003e-06 [merge_comm]: 6.28969e-06 [allreduce_fusion]: 5.11995e-06 [matmul_add_comm_reduction]: 8.15978e-06 [allreduce_slice_to_reducescatter]: 2.89641e-07 [virtual_shard_identity]: 9.12976e-06 [virtual_dataset]: 7.60984e-06 [get_grad_eliminate_]: 7.8599e-06 [virtual_output]: 7.89994e-06 [merge_forward]: 5.03007e-06 [cell_reuse_recompute_pass]: 2.1602e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.57403e-05 [before_grad]: 1.32299e-05 [inplace_validation]: 4.42984e-06 [meta_fg_expand]: 5.13997e-06 [inplace_validation_after_expand]: 5.65033e-06 [flash_sp_send_recv_attached]: 1.11014e-06 [receive_attached]: 9.89996e-07 [after_resolve]: 1.053e-05 [a_after_grad]: 1.27503e-05 [special_op_eliminate]: 7.68993e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.70204e-07 [auto_monad_grad]: 1.36998e-06 [auto_monad_eliminator]: 2.003e-05 [cse]: 2.03e-05 [a_3]: 5.03496e-05 [py_interpret_to_execute_after_opt_a]: 9.43011e-06 [slice_cell_reuse_recomputed_activation]: 2.45962e-06 [rewriter_after_opt_a]: 0.00015235 [convert_after_rewriter]: 1.00802e-05 [order_py_execute_after_rewriter]: 7.03009e-06 [opt_b]: 0.00025198, [1] [Cycle 1]: 0.0002457, [7] [b_1]: 0.00016764 [b_2]: 9.98005e-06 [updatestate_depend_eliminate]: 5.47012e-06 [updatestate_assign_eliminate]: 4.75021e-06 [updatestate_loads_eliminate]: 5.3104e-06 [renormalize]: 2.70084e-07 [cse]: 1.92299e-05 [optimize_parallel_all_gather_comm]: 9.07015e-06 [overlap_param_gather]: 1.60001e-06 [cconv]: 2.55401e-05 [loop_unroll]: 0.00049475 [opt_after_cconv]: 0.00015206, [1] [Cycle 1]: 0.00014533, [7] [c_1]: 6.76098e-05 [parameter_eliminate]: 2.77022e-06 [updatestate_depend_eliminate]: 8.54023e-06 [updatestate_assign_eliminate]: 4.63985e-06 [updatestate_loads_eliminate]: 5.3402e-06 [cse]: 2.19303e-05 [renormalize]: 4.69852e-07 [remove_dup_value]: 1.51098e-05 [tuple_transform]: 7.272e-05, [1] [Cycle 1]: 6.72196e-05, [2] [d_1]: 5.72302e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 2.06009e-06 [add_cache_embedding]: 1.493e-05 [add_recomputation]: 6.37104e-05 [cse_after_recomputation]: 2.81199e-05, [1] [Cycle 1]: 2.25501e-05, [1] [cse]: 1.67899e-05 [environ_conv]: 7.49016e-06 [swap_dp_allreduce_reducescatter]: 8.24034e-06 [bias_add_comm_swap]: 2.54018e-06 [label_micro_interleaved_index]: 1.91014e-06 [label_fine_grained_interleaved_index]: 2.37022e-06 [merge_cast_opt]: 1.43982e-06 [slice_recompute_activation]: 2.23005e-06 [micro_interleaved_order_control]: 1.93994e-06 [assign_add_opt]: 7.08038e-06 [ForceFp32Comm]: 8.49832e-07 [remove_cast_before_assign_add]: 8.69855e-07 [full_micro_interleaved_order_control]: 2.70968e-06 [reorder_send_recv_between_fp_bp]: 2.37999e-06 [comm_op_add_attrs]: 1.11992e-06 [add_comm_op_reuse_tag]: 1.13994e-06 [interleave_split_concat_branches]: 9.00123e-07 [interleave_parallel_branches]: 1.00024e-06 [overlap_opt_shard_in_pipeline]: 1.3602e-06 [overlap_opt_shard_grad_in_pipeline]: 2.46009e-06 [control_data_broadcast_order]: 1.15018e-06 [grouped_pairwise_exchange_alltoall]: 1.41002e-06 [offloading_packed_experts]: 1.11992e-06 [overlap_recompute_and_grad_model_parallel]: 2.64961e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.50298e-07 [overlap_recompute_allgather_and_fa_grad]: 1.18976e-06 [overlap_grad_ring_attention]: 2.33017e-06 [overlap_grad_flash_sp]: 1.56802e-05 [begin_end_overlap_inline]: 7.70204e-07 [split_matmul_comm_elemetwise]: 1.8999e-06 [split_layernorm_comm]: 2.02982e-06 [handle_group_info]: 1.03004e-06 [symbol_engine_optimizer]: 8.92701e-05, [1] [Cycle 1]: 8.39899e-05, [6] [build]: 4.35999e-06 [elim_shapecalc]: 1.25598e-05 [elim_not_effective]: 1.685e-05 [opt_reshape]: 8.95979e-06 [fold_const_symbol]: 1.434e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.67964e-06 [auto_monad_reorder]: 3.06498e-05 [get_jit_bprop_graph]: 5.09899e-07 [rewriter_after_jit_bprop_graph]: 4.30271e-07 [eliminate_special_op_node]: 0.00056188 [distribtued_split]: 4.10001e-05 [validate]: 3.53702e-05 [task_emit]: 0.0717442 [execute]: 1.23298e-05 Sums bootstrap : 0.000337s : 0.43% type_inference : 0.002666s : 3.37% auto_monad : 0.000139s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000559s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000228s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000028s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000465s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000056s : 0.07% optimize.opt_a.cse : 0.000056s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000152s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000168s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000026s : 0.03% optimize.loop_unroll : 0.000495s : 0.63% optimize.opt_after_cconv.c_1 : 0.000068s : 0.09% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000562s : 0.71% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.071744s : 90.81% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000139 63 4.78% : 0.000007s : 2: substitution.depend_value_elim 2.22% : 0.000003s : 5: substitution.elim_not_effective 1.90% : 0.000003s : 5: substitution.fold_const_symbol 5.66% : 0.000008s : 6: substitution.graph_param_transform 50.60% : 0.000070s : 1: substitution.inline 3.88% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.44% : 0.000005s : 6: substitution.load_eliminater 2.68% : 0.000004s : 2: substitution.reduce_all_const_elim 5.53% : 0.000008s : 10: substitution.remove_not_recompute_node 2.60% : 0.000004s : 2: substitution.replace_old_param 9.15% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 7.57% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002634 2 88.91% : 0.002341s : 1: type_inference.infer 11.09% : 0.000292s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000069 1 100.00% : 0.000069s : 1: match.inline ------[predicate.] 0.000233 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.29% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.68% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.24% : 0.000005s : 25: predicate.arithmetic_simplify 0.78% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.40% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.80% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_depend_swap 1.88% : 0.000004s : 31: predicate.environ_get_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.84% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.74% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.34% : 0.000005s : 38: predicate.load_eliminater 1.52% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.81% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.81% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.81% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000003s : 13: predicate.reduce_eliminate 0.59% : 0.000001s : 12: predicate.remove_not_recompute_node 1.12% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.91% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.58% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.39% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 1.17% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.25% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.80% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.76% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.41% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.78% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.74% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.94% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000169 4 10.86% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.14% : 0.000150s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092544 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000069s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.17% : 0.000153s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000363s : 1: bootstrap 0.03% : 0.000030s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.62% : 0.000577s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000505s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001131s : 80: opt.transform.opt_a 0.07% : 0.000066s : 1: opt.transform.opt_after_cconv 0.17% : 0.000158s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005630s : 1: opt_a 0.17% : 0.000156s : 1: opt_after_cconv 0.28% : 0.000256s : 1: opt_b 7.99% : 0.007396s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.27% : 0.000251s : 1: renormalize.infer 0.23% : 0.000208s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000158s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 77.56% : 0.071778s : 1: task_emit 0.08% : 0.000076s : 1: tuple_transform 2.90% : 0.002684s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.0851448, [21] [bootstrap]: 0.00037924 [type_inference]: 0.00306015 [auto_monad]: 0.00014516 [graph_reusing]: 2.89967e-06 [inline]: 1.74018e-06 [parallel-infer-symbol]: 2.25985e-06 [pre_auto_parallel]: 2.898e-05 [insert-virtual-dataset]: 2.94019e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 1.29966e-06 [pipeline_split]: 1.70991e-06 [optimize]: 0.00849775, [52] [py_interpret_to_execute]: 1.81901e-05 [rewriter_before_opt_a]: 4.55296e-05 [opt_a]: 0.00659372, [2] [Cycle 1]: 0.00186077, [43] [expand_dump_flag]: 3.93996e-06 [switch_simplify]: 3.42703e-05 [loop_unroll]: 1.62199e-05 [a_1]: 0.00041705 [recompute_prepare]: 1.08099e-05 [updatestate_depend_eliminate]: 9.11998e-06 [updatestate_assign_eliminate]: 6.36978e-06 [updatestate_loads_eliminate]: 8.17981e-06 [parameter_eliminate]: 3.49991e-06 [a_2]: 0.00014596 [accelerated_algorithm]: 1.093e-05 [shard]: 2.2701e-06 [meta_shard_fg_expand]: 4.96022e-06 [shard_inline]: 1.05998e-05 [auto_parallel]: 1.34301e-05 [parallel]: 9.36957e-06 [flash_sp]: 1.26502e-05 [merge_comm]: 8.84989e-06 [allreduce_fusion]: 6.19981e-06 [matmul_add_comm_reduction]: 1.133e-05 [allreduce_slice_to_reducescatter]: 4.89876e-07 [virtual_shard_identity]: 1.17403e-05 [virtual_dataset]: 1.03996e-05 [get_grad_eliminate_]: 9.92976e-06 [virtual_output]: 1.02301e-05 [merge_forward]: 6.26035e-06 [cell_reuse_recompute_pass]: 2.04984e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.15401e-05 [before_grad]: 1.81799e-05 [inplace_validation]: 5.82961e-06 [meta_fg_expand]: 6.67991e-06 [inplace_validation_after_expand]: 7.83009e-06 [flash_sp_send_recv_attached]: 6.00005e-06 [receive_attached]: 2.84007e-06 [after_resolve]: 1.375e-05 [a_after_grad]: 1.59899e-05 [special_op_eliminate]: 9.30019e-06 [renormalize]: 0.00056658 [add_forward_monad_depend]: 3.55998e-06 [auto_monad_grad]: 1.95997e-06 [auto_monad_eliminator]: 3.578e-05 [cse]: 3.791e-05 [a_3]: 7.02604e-05 [Cycle 2]: 0.00093904, [43] [expand_dump_flag]: 1.24006e-06 [switch_simplify]: 1.11503e-05 [loop_unroll]: 9.2797e-06 [a_1]: 0.00026245 [recompute_prepare]: 1.00001e-05 [updatestate_depend_eliminate]: 6.78981e-06 [updatestate_assign_eliminate]: 5.5097e-06 [updatestate_loads_eliminate]: 5.79981e-06 [parameter_eliminate]: 1.60001e-06 [a_2]: 0.0001293 [accelerated_algorithm]: 1.01998e-05 [shard]: 1.22981e-06 [meta_shard_fg_expand]: 2.93972e-06 [shard_inline]: 9.45991e-06 [auto_parallel]: 1.19503e-05 [parallel]: 6.4699e-06 [flash_sp]: 3.6303e-06 [merge_comm]: 7.16979e-06 [allreduce_fusion]: 5.56977e-06 [matmul_add_comm_reduction]: 8.36002e-06 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 1.131e-05 [virtual_dataset]: 9.5102e-06 [get_grad_eliminate_]: 9.1698e-06 [virtual_output]: 9.0301e-06 [merge_forward]: 5.13997e-06 [cell_reuse_recompute_pass]: 2.13971e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.99596e-05 [before_grad]: 1.63298e-05 [inplace_validation]: 4.86011e-06 [meta_fg_expand]: 5.85988e-06 [inplace_validation_after_expand]: 5.9898e-06 [flash_sp_send_recv_attached]: 9.49949e-07 [receive_attached]: 8.49832e-07 [after_resolve]: 1.23098e-05 [a_after_grad]: 1.49501e-05 [special_op_eliminate]: 9.11998e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 9.39704e-07 [auto_monad_grad]: 1.26008e-06 [auto_monad_eliminator]: 2.01804e-05 [cse]: 2.23201e-05 [a_3]: 5.87101e-05 [py_interpret_to_execute_after_opt_a]: 9.96003e-06 [slice_cell_reuse_recomputed_activation]: 2.48011e-06 [rewriter_after_opt_a]: 0.00015155 [convert_after_rewriter]: 9.60985e-06 [order_py_execute_after_rewriter]: 6.52019e-06 [opt_b]: 0.00028442, [1] [Cycle 1]: 0.00027811, [7] [b_1]: 0.00019635 [b_2]: 1.17002e-05 [updatestate_depend_eliminate]: 5.49015e-06 [updatestate_assign_eliminate]: 4.77955e-06 [updatestate_loads_eliminate]: 5.64987e-06 [renormalize]: 3.30154e-07 [cse]: 2.09301e-05 [optimize_parallel_all_gather_comm]: 9.39006e-06 [overlap_param_gather]: 1.60001e-06 [cconv]: 2.59303e-05 [loop_unroll]: 0.00053443 [opt_after_cconv]: 0.00015394, [1] [Cycle 1]: 0.00014727, [7] [c_1]: 6.49099e-05 [parameter_eliminate]: 2.54018e-06 [updatestate_depend_eliminate]: 9.07993e-06 [updatestate_assign_eliminate]: 5.22984e-06 [updatestate_loads_eliminate]: 5.93998e-06 [cse]: 2.36901e-05 [renormalize]: 4.4005e-07 [remove_dup_value]: 1.52602e-05 [tuple_transform]: 8.424e-05, [1] [Cycle 1]: 7.94302e-05, [2] [d_1]: 6.851e-05 [renormalize]: 2.79862e-07 [partial_unused_args_eliminate]: 2.16998e-06 [add_cache_embedding]: 1.552e-05 [add_recomputation]: 7.26702e-05 [cse_after_recomputation]: 2.88198e-05, [1] [Cycle 1]: 2.384e-05, [1] [cse]: 1.84001e-05 [environ_conv]: 7.93999e-06 [swap_dp_allreduce_reducescatter]: 7.68993e-06 [bias_add_comm_swap]: 2.69013e-06 [label_micro_interleaved_index]: 2.52994e-06 [label_fine_grained_interleaved_index]: 2.3297e-06 [merge_cast_opt]: 1.36998e-06 [slice_recompute_activation]: 2.23005e-06 [micro_interleaved_order_control]: 1.91992e-06 [assign_add_opt]: 8.38004e-06 [ForceFp32Comm]: 1.30013e-06 [remove_cast_before_assign_add]: 1.18976e-06 [full_micro_interleaved_order_control]: 2.68035e-06 [reorder_send_recv_between_fp_bp]: 2.33995e-06 [comm_op_add_attrs]: 1.07009e-06 [add_comm_op_reuse_tag]: 1.20001e-06 [interleave_split_concat_branches]: 8.99658e-07 [interleave_parallel_branches]: 1.07009e-06 [overlap_opt_shard_in_pipeline]: 5.25033e-06 [overlap_opt_shard_grad_in_pipeline]: 2.36975e-06 [control_data_broadcast_order]: 1.30013e-06 [grouped_pairwise_exchange_alltoall]: 1.39978e-06 [offloading_packed_experts]: 1.17999e-06 [overlap_recompute_and_grad_model_parallel]: 2.14996e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.09785e-07 [overlap_recompute_allgather_and_fa_grad]: 1.86963e-06 [overlap_grad_ring_attention]: 2.21981e-06 [overlap_grad_flash_sp]: 1.73096e-05 [begin_end_overlap_inline]: 8.00006e-07 [split_matmul_comm_elemetwise]: 2.07964e-06 [split_layernorm_comm]: 1.90968e-06 [handle_group_info]: 1.16974e-06 [symbol_engine_optimizer]: 9.99998e-05, [1] [Cycle 1]: 9.477e-05, [6] [build]: 4.35999e-06 [elim_shapecalc]: 1.43102e-05 [elim_not_effective]: 1.95e-05 [opt_reshape]: 1.08504e-05 [fold_const_symbol]: 1.729e-05 [renormalize]: 3.19909e-07 [pipeline_parallel_scheduler]: 1.55997e-06 [auto_monad_reorder]: 3.31597e-05 [get_jit_bprop_graph]: 7.00355e-07 [rewriter_after_jit_bprop_graph]: 4.29805e-07 [eliminate_special_op_node]: 0.00052713 [distribtued_split]: 5.19198e-05 [validate]: 4.00101e-05 [task_emit]: 0.0720837 [execute]: 1.29701e-05 Sums bootstrap : 0.000379s : 0.47% type_inference : 0.003060s : 3.81% auto_monad : 0.000145s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000029s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000018s : 0.02% optimize.rewriter_before_opt_a : 0.000046s : 0.06% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000045s : 0.06% optimize.opt_a.loop_unroll : 0.000025s : 0.03% optimize.opt_a.a_1 : 0.000679s : 0.85% optimize.opt_a.recompute_prepare : 0.000021s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000275s : 0.34% optimize.opt_a.accelerated_algorithm : 0.000021s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000008s : 0.01% optimize.opt_a.shard_inline : 0.000020s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000016s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000023s : 0.03% optimize.opt_a.virtual_dataset : 0.000020s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000019s : 0.02% optimize.opt_a.virtual_output : 0.000019s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000041s : 0.05% optimize.opt_a.before_grad : 0.000035s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000013s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000031s : 0.04% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000567s : 0.71% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000056s : 0.07% optimize.opt_a.cse : 0.000060s : 0.08% optimize.opt_a.a_3 : 0.000129s : 0.16% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000152s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000196s : 0.24% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000021s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000026s : 0.03% optimize.loop_unroll : 0.000534s : 0.67% optimize.opt_after_cconv.c_1 : 0.000065s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000069s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000016s : 0.02% optimize.add_recomputation : 0.000073s : 0.09% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000003s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000005s : 0.01% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000527s : 0.66% distribtued_split : 0.000052s : 0.06% validate : 0.000040s : 0.05% task_emit : 0.072084s : 89.80% execute : 0.000013s : 0.02% Time group info: ------[substitution.] 0.000157 63 5.33% : 0.000008s : 2: substitution.depend_value_elim 2.55% : 0.000004s : 5: substitution.elim_not_effective 2.04% : 0.000003s : 5: substitution.fold_const_symbol 5.83% : 0.000009s : 6: substitution.graph_param_transform 47.88% : 0.000075s : 1: substitution.inline 4.82% : 0.000008s : 10: substitution.j_node_and_user_rematch 3.23% : 0.000005s : 6: substitution.load_eliminater 2.89% : 0.000005s : 2: substitution.reduce_all_const_elim 6.83% : 0.000011s : 10: substitution.remove_not_recompute_node 2.56% : 0.000004s : 2: substitution.replace_old_param 8.13% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 7.89% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.003028 2 89.16% : 0.002699s : 1: type_inference.infer 10.84% : 0.000328s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000074 1 100.00% : 0.000074s : 1: match.inline ------[predicate.] 0.000273 1420 0.75% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.21% : 0.000006s : 25: predicate.arithmetic_simplify 0.79% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.40% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000003s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.51% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.30% : 0.000004s : 19: predicate.environ_add_const_eliminate 1.05% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_depend_swap 1.86% : 0.000005s : 31: predicate.environ_get_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.36% : 0.000004s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.73% : 0.000002s : 12: predicate.incorporate_call_switch 6.02% : 0.000016s : 63: predicate.inline 1.16% : 0.000003s : 12: predicate.inline_without_move 0.43% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000003s : 12: predicate.less_batch_normalization 1.74% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000006s : 38: predicate.load_eliminater 1.28% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.80% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.70% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.08% : 0.000003s : 14: predicate.partial_defer_inline 1.30% : 0.000004s : 19: predicate.partial_eliminate 0.89% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 1.06% : 0.000003s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000003s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000003s : 12: predicate.shard_identity_eliminate 1.40% : 0.000004s : 18: predicate.special_op_eliminate 1.08% : 0.000003s : 12: predicate.specialize_transform 0.99% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.38% : 0.000006s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000003s : 14: predicate.switch_defer_inline 1.68% : 0.000005s : 26: predicate.switch_layer_defer_inline 4.10% : 0.000011s : 43: predicate.switch_simplify 0.88% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000005s : 25: predicate.tuple_to_list_eliminator_ 2.32% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.38% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.88% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000188 4 9.19% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.81% : 0.000171s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.095919 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000078s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.17% : 0.000158s : 1: auto_monad 0.04% : 0.000040s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.42% : 0.000406s : 1: bootstrap 0.03% : 0.000030s : 1: cconv 0.00% : 0.000005s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000061s : 1: distribtued_split 0.56% : 0.000541s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000010s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000544s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000018s : 1: opt.transform.loop_unroll_optimizer 1.42% : 0.001366s : 80: opt.transform.opt_a 0.07% : 0.000063s : 1: opt.transform.opt_after_cconv 0.19% : 0.000184s : 27: opt.transform.opt_b 0.07% : 0.000067s : 1: opt.transform.opt_trans_graph 0.04% : 0.000039s : 3: opt.transform.special_op_eliminate 0.06% : 0.000057s : 4: opt.transform.symbol_engine_opt 6.88% : 0.006598s : 1: opt_a 0.16% : 0.000158s : 1: opt_after_cconv 0.30% : 0.000288s : 1: opt_b 8.87% : 0.008506s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000009s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.01% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000006s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000035s : 1: pre_auto_parallel 0.02% : 0.000023s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000020s : 1: remove_dup_value 0.35% : 0.000333s : 1: renormalize.infer 0.24% : 0.000227s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000158s : 1: rewriter_after_opt_a 0.05% : 0.000050s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000006s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000103s : 1: symbol_engine_optimizer 75.19% : 0.072117s : 1: task_emit 0.09% : 0.000088s : 1: tuple_transform 3.21% : 0.003079s : 1: type_inference 0.08% : 0.000079s : 1: validate TotalTime = 0.0889735, [21] [bootstrap]: 0.00031049 [type_inference]: 0.0026439 [auto_monad]: 0.00013855 [graph_reusing]: 2.61003e-06 [inline]: 1.4999e-06 [parallel-infer-symbol]: 2.54996e-06 [pre_auto_parallel]: 2.86801e-05 [insert-virtual-dataset]: 2.99001e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 1.11992e-06 [pipeline_split]: 1.64984e-06 [optimize]: 0.00755979, [52] [py_interpret_to_execute]: 1.52704e-05 [rewriter_before_opt_a]: 3.66499e-05 [opt_a]: 0.00571581, [2] [Cycle 1]: 0.00149576, [43] [expand_dump_flag]: 2.14996e-06 [switch_simplify]: 2.63499e-05 [loop_unroll]: 1.31601e-05 [a_1]: 0.00033733 [recompute_prepare]: 8.91974e-06 [updatestate_depend_eliminate]: 8.43965e-06 [updatestate_assign_eliminate]: 5.8203e-06 [updatestate_loads_eliminate]: 6.29015e-06 [parameter_eliminate]: 2.44984e-06 [a_2]: 0.00011509 [accelerated_algorithm]: 8.92999e-06 [shard]: 1.4999e-06 [meta_shard_fg_expand]: 2.69013e-06 [shard_inline]: 8.31997e-06 [auto_parallel]: 1.20299e-05 [parallel]: 4.6799e-06 [flash_sp]: 6.19004e-06 [merge_comm]: 6.94999e-06 [allreduce_fusion]: 4.82006e-06 [matmul_add_comm_reduction]: 8.41031e-06 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 9.89018e-06 [virtual_dataset]: 8.18027e-06 [get_grad_eliminate_]: 8.10018e-06 [virtual_output]: 8.27992e-06 [merge_forward]: 4.51971e-06 [cell_reuse_recompute_pass]: 1.87987e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.805e-05 [before_grad]: 1.369e-05 [inplace_validation]: 4.27989e-06 [meta_fg_expand]: 4.93973e-06 [inplace_validation_after_expand]: 5.0501e-06 [flash_sp_send_recv_attached]: 2.61981e-06 [receive_attached]: 1.47987e-06 [after_resolve]: 1.10599e-05 [a_after_grad]: 1.30399e-05 [special_op_eliminate]: 8.71997e-06 [renormalize]: 0.00045 [add_forward_monad_depend]: 3.08966e-06 [auto_monad_grad]: 1.30013e-06 [auto_monad_eliminator]: 2.306e-05 [cse]: 2.39103e-05 [a_3]: 5.882e-05 [Cycle 2]: 0.00078858, [43] [expand_dump_flag]: 1.18976e-06 [switch_simplify]: 9.39006e-06 [loop_unroll]: 7.62008e-06 [a_1]: 0.00020165 [recompute_prepare]: 7.28015e-06 [updatestate_depend_eliminate]: 5.89015e-06 [updatestate_assign_eliminate]: 5.51995e-06 [updatestate_loads_eliminate]: 5.41005e-06 [parameter_eliminate]: 1.15996e-06 [a_2]: 0.00010615 [accelerated_algorithm]: 8.16025e-06 [shard]: 1.16974e-06 [meta_shard_fg_expand]: 2.69013e-06 [shard_inline]: 7.87014e-06 [auto_parallel]: 1.09603e-05 [parallel]: 3.74019e-06 [flash_sp]: 2.37022e-06 [merge_comm]: 5.79981e-06 [allreduce_fusion]: 4.92996e-06 [matmul_add_comm_reduction]: 7.73976e-06 [allreduce_slice_to_reducescatter]: 5.49946e-07 [virtual_shard_identity]: 8.57003e-06 [virtual_dataset]: 7.66991e-06 [get_grad_eliminate_]: 7.43009e-06 [virtual_output]: 7.06036e-06 [merge_forward]: 4.63007e-06 [cell_reuse_recompute_pass]: 2.00002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52499e-05 [before_grad]: 1.26399e-05 [inplace_validation]: 4.65009e-06 [meta_fg_expand]: 4.73997e-06 [inplace_validation_after_expand]: 5.17024e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 7.89762e-07 [after_resolve]: 1.01002e-05 [a_after_grad]: 1.194e-05 [special_op_eliminate]: 7.50972e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.49832e-07 [auto_monad_grad]: 1.17021e-06 [auto_monad_eliminator]: 1.91401e-05 [cse]: 2.066e-05 [a_3]: 4.90099e-05 [py_interpret_to_execute_after_opt_a]: 9.18005e-06 [slice_cell_reuse_recomputed_activation]: 1.6503e-06 [rewriter_after_opt_a]: 0.0001328 [convert_after_rewriter]: 8.86014e-06 [order_py_execute_after_rewriter]: 5.13997e-06 [opt_b]: 0.00024942, [1] [Cycle 1]: 0.00024284, [7] [b_1]: 0.00016466 [b_2]: 1.03498e-05 [updatestate_depend_eliminate]: 5.51995e-06 [updatestate_assign_eliminate]: 4.78001e-06 [updatestate_loads_eliminate]: 5.24987e-06 [renormalize]: 4.10248e-07 [cse]: 1.97403e-05 [optimize_parallel_all_gather_comm]: 7.68015e-06 [overlap_param_gather]: 1.11992e-06 [cconv]: 1.61296e-05 [loop_unroll]: 0.00067231 [opt_after_cconv]: 0.0001416, [1] [Cycle 1]: 0.00013436, [7] [c_1]: 5.45797e-05 [parameter_eliminate]: 2.82004e-06 [updatestate_depend_eliminate]: 9.14e-06 [updatestate_assign_eliminate]: 5.30016e-06 [updatestate_loads_eliminate]: 5.27967e-06 [cse]: 2.37799e-05 [renormalize]: 5.49946e-07 [remove_dup_value]: 8.61008e-06 [tuple_transform]: 7.032e-05, [1] [Cycle 1]: 6.55102e-05, [2] [d_1]: 5.56004e-05 [renormalize]: 1.99769e-07 [partial_unused_args_eliminate]: 1.45985e-06 [add_cache_embedding]: 1.11498e-05 [add_recomputation]: 5.326e-05 [cse_after_recomputation]: 2.79802e-05, [1] [Cycle 1]: 2.23601e-05, [1] [cse]: 1.69501e-05 [environ_conv]: 6.76e-06 [swap_dp_allreduce_reducescatter]: 6.80005e-06 [bias_add_comm_swap]: 1.4198e-06 [label_micro_interleaved_index]: 1.09989e-06 [label_fine_grained_interleaved_index]: 8.60076e-07 [merge_cast_opt]: 5.89993e-07 [slice_recompute_activation]: 8.49832e-07 [micro_interleaved_order_control]: 9.89996e-07 [assign_add_opt]: 6.57979e-06 [ForceFp32Comm]: 8.29808e-07 [remove_cast_before_assign_add]: 5.20144e-07 [full_micro_interleaved_order_control]: 8.801e-07 [reorder_send_recv_between_fp_bp]: 7.59959e-07 [comm_op_add_attrs]: 5.29923e-07 [add_comm_op_reuse_tag]: 5.09899e-07 [interleave_split_concat_branches]: 6.80331e-07 [interleave_parallel_branches]: 4.50294e-07 [overlap_opt_shard_in_pipeline]: 9.89996e-07 [overlap_opt_shard_grad_in_pipeline]: 8.49832e-07 [control_data_broadcast_order]: 6.10016e-07 [grouped_pairwise_exchange_alltoall]: 5.60191e-07 [offloading_packed_experts]: 5.49946e-07 [overlap_recompute_and_grad_model_parallel]: 1.05985e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.10248e-07 [overlap_recompute_allgather_and_fa_grad]: 4.50294e-07 [overlap_grad_ring_attention]: 8.00006e-07 [overlap_grad_flash_sp]: 1.194e-05 [begin_end_overlap_inline]: 3.90224e-07 [split_matmul_comm_elemetwise]: 1.15996e-06 [split_layernorm_comm]: 1.01002e-06 [handle_group_info]: 4.00003e-07 [symbol_engine_optimizer]: 8.80999e-05, [1] [Cycle 1]: 8.28202e-05, [6] [build]: 3.93996e-06 [elim_shapecalc]: 1.20499e-05 [elim_not_effective]: 1.60299e-05 [opt_reshape]: 9.52976e-06 [fold_const_symbol]: 1.352e-05 [renormalize]: 2.20258e-07 [pipeline_parallel_scheduler]: 9.30391e-07 [auto_monad_reorder]: 2.13003e-05 [get_jit_bprop_graph]: 2.70084e-07 [rewriter_after_jit_bprop_graph]: 2.59839e-07 [eliminate_special_op_node]: 0.00051933 [distribtued_split]: 3.46801e-05 [validate]: 3.28901e-05 [task_emit]: 0.0774162 [execute]: 8.95001e-06 Sums bootstrap : 0.000310s : 0.37% type_inference : 0.002644s : 3.13% auto_monad : 0.000139s : 0.16% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000029s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.02% optimize.opt_a.a_1 : 0.000539s : 0.64% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000221s : 0.26% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000009s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000450s : 0.53% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000042s : 0.05% optimize.opt_a.cse : 0.000045s : 0.05% optimize.opt_a.a_3 : 0.000108s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000133s : 0.16% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.19% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000672s : 0.80% optimize.opt_after_cconv.c_1 : 0.000055s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000053s : 0.06% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000000s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.01% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000000s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000021s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000519s : 0.61% distribtued_split : 0.000035s : 0.04% validate : 0.000033s : 0.04% task_emit : 0.077416s : 91.59% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000118 63 4.49% : 0.000005s : 2: substitution.depend_value_elim 2.34% : 0.000003s : 5: substitution.elim_not_effective 1.68% : 0.000002s : 5: substitution.fold_const_symbol 5.07% : 0.000006s : 6: substitution.graph_param_transform 49.26% : 0.000058s : 1: substitution.inline 4.42% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.27% : 0.000004s : 6: substitution.load_eliminater 2.18% : 0.000003s : 2: substitution.reduce_all_const_elim 6.93% : 0.000008s : 10: substitution.remove_not_recompute_node 2.64% : 0.000003s : 2: substitution.replace_old_param 9.62% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.11% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002615 2 88.79% : 0.002322s : 1: type_inference.infer 11.21% : 0.000293s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000057 1 100.00% : 0.000057s : 1: match.inline ------[predicate.] 0.000232 1420 0.87% : 0.000002s : 13: predicate.accumulaten_eliminater 1.12% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.18% : 0.000005s : 25: predicate.arithmetic_simplify 0.81% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.31% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.80% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.90% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.64% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.94% : 0.000005s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.77% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.76% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.66% : 0.000013s : 63: predicate.inline 1.00% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.33% : 0.000003s : 12: predicate.less_batch_normalization 1.72% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000006s : 38: predicate.load_eliminater 1.39% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.12% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.23% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.86% : 0.000002s : 13: predicate.print_const_string_wrapper 0.92% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000003s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.50% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.60% : 0.000001s : 6: predicate.row_tensor_eliminate 1.11% : 0.000003s : 12: predicate.same_eliminate 0.52% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.46% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.20% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.90% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.18% : 0.000010s : 43: predicate.switch_simplify 0.82% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.72% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.80% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.84% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.52% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.27% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.57% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.88% : 0.000002s : 12: predicate.virtual_output_eliminate 0.47% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000162 4 5.83% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.17% : 0.000152s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.098364 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000058s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.15% : 0.000152s : 1: auto_monad 0.03% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.34% : 0.000333s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.04% : 0.000043s : 1: distribtued_split 0.54% : 0.000534s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.69% : 0.000683s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.11% : 0.001095s : 80: opt.transform.opt_a 0.05% : 0.000053s : 1: opt.transform.opt_after_cconv 0.16% : 0.000155s : 27: opt.transform.opt_b 0.05% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000034s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.81% : 0.005720s : 1: opt_a 0.15% : 0.000146s : 1: opt_after_cconv 0.26% : 0.000253s : 1: opt_b 7.69% : 0.007568s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000035s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.24% : 0.000236s : 1: renormalize.infer 0.21% : 0.000209s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000139s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000091s : 1: symbol_engine_optimizer 78.73% : 0.077447s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.71% : 0.002663s : 1: type_inference 0.07% : 0.000067s : 1: validate TotalTime = 0.078249, [21] [bootstrap]: 0.00028384 [type_inference]: 0.00217444 [auto_monad]: 9.80604e-05 [graph_reusing]: 1.79e-06 [inline]: 1.07009e-06 [parallel-infer-symbol]: 1.30991e-06 [pre_auto_parallel]: 2.142e-05 [insert-virtual-dataset]: 1.83005e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 8.59611e-07 [pipeline_split]: 1.23028e-06 [optimize]: 0.00681964, [52] [py_interpret_to_execute]: 1.33403e-05 [rewriter_before_opt_a]: 2.963e-05 [opt_a]: 0.00522728, [2] [Cycle 1]: 0.00141262, [43] [expand_dump_flag]: 2.90992e-06 [switch_simplify]: 2.44202e-05 [loop_unroll]: 1.31801e-05 [a_1]: 0.00032306 [recompute_prepare]: 8.61986e-06 [updatestate_depend_eliminate]: 7.36024e-06 [updatestate_assign_eliminate]: 4.95976e-06 [updatestate_loads_eliminate]: 6.31995e-06 [parameter_eliminate]: 2.02004e-06 [a_2]: 0.00011399 [accelerated_algorithm]: 8.55001e-06 [shard]: 1.45985e-06 [meta_shard_fg_expand]: 3.66988e-06 [shard_inline]: 8.07969e-06 [auto_parallel]: 1.091e-05 [parallel]: 5.91995e-06 [flash_sp]: 7.78027e-06 [merge_comm]: 7.2401e-06 [allreduce_fusion]: 5.18002e-06 [matmul_add_comm_reduction]: 9.20007e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 9.76026e-06 [virtual_dataset]: 8.44989e-06 [get_grad_eliminate_]: 7.56979e-06 [virtual_output]: 7.47992e-06 [merge_forward]: 4.96022e-06 [cell_reuse_recompute_pass]: 1.45985e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.628e-05 [before_grad]: 1.36201e-05 [inplace_validation]: 4.92996e-06 [meta_fg_expand]: 5.32996e-06 [inplace_validation_after_expand]: 5.32996e-06 [flash_sp_send_recv_attached]: 3.61959e-06 [receive_attached]: 1.74018e-06 [after_resolve]: 1.057e-05 [a_after_grad]: 1.25701e-05 [special_op_eliminate]: 7.7798e-06 [renormalize]: 0.00039175 [add_forward_monad_depend]: 2.33017e-06 [auto_monad_grad]: 1.41002e-06 [auto_monad_eliminator]: 2.40402e-05 [cse]: 2.563e-05 [a_3]: 5.781e-05 [Cycle 2]: 0.00080842, [43] [expand_dump_flag]: 9.4017e-07 [switch_simplify]: 9.46037e-06 [loop_unroll]: 7.95024e-06 [a_1]: 0.0002376 [recompute_prepare]: 7.6401e-06 [updatestate_depend_eliminate]: 5.62007e-06 [updatestate_assign_eliminate]: 4.73997e-06 [updatestate_loads_eliminate]: 4.82006e-06 [parameter_eliminate]: 9.69972e-07 [a_2]: 0.00010446 [accelerated_algorithm]: 8.48994e-06 [shard]: 1.15018e-06 [meta_shard_fg_expand]: 2.37022e-06 [shard_inline]: 7.54977e-06 [auto_parallel]: 1.03703e-05 [parallel]: 3.09013e-06 [flash_sp]: 2.71015e-06 [merge_comm]: 5.79981e-06 [allreduce_fusion]: 5.34998e-06 [matmul_add_comm_reduction]: 7.3798e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 8.44011e-06 [virtual_dataset]: 7.66013e-06 [get_grad_eliminate_]: 7.45989e-06 [virtual_output]: 7.22008e-06 [merge_forward]: 4.42984e-06 [cell_reuse_recompute_pass]: 2.02982e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.45002e-05 [before_grad]: 1.23898e-05 [inplace_validation]: 4.28967e-06 [meta_fg_expand]: 4.64031e-06 [inplace_validation_after_expand]: 4.99003e-06 [flash_sp_send_recv_attached]: 8.2003e-07 [receive_attached]: 6.50063e-07 [after_resolve]: 9.16002e-06 [a_after_grad]: 1.21403e-05 [special_op_eliminate]: 7.44965e-06 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 7.29691e-07 [auto_monad_grad]: 9.39704e-07 [auto_monad_eliminator]: 1.61296e-05 [cse]: 1.88602e-05 [a_3]: 4.89801e-05 [py_interpret_to_execute_after_opt_a]: 8.80985e-06 [slice_cell_reuse_recomputed_activation]: 1.78022e-06 [rewriter_after_opt_a]: 0.00012593 [convert_after_rewriter]: 9.92976e-06 [order_py_execute_after_rewriter]: 5.38956e-06 [opt_b]: 0.00024134, [1] [Cycle 1]: 0.00023627, [7] [b_1]: 0.000161 [b_2]: 1.00201e-05 [updatestate_depend_eliminate]: 5.27035e-06 [updatestate_assign_eliminate]: 4.38001e-06 [updatestate_loads_eliminate]: 4.86989e-06 [renormalize]: 2.59839e-07 [cse]: 1.864e-05 [optimize_parallel_all_gather_comm]: 8.1202e-06 [overlap_param_gather]: 9.4017e-07 [cconv]: 1.62199e-05 [loop_unroll]: 0.00048069 [opt_after_cconv]: 0.00012846, [1] [Cycle 1]: 0.00012249, [7] [c_1]: 5.11101e-05 [parameter_eliminate]: 2.10991e-06 [updatestate_depend_eliminate]: 7.03987e-06 [updatestate_assign_eliminate]: 4.99003e-06 [updatestate_loads_eliminate]: 4.76977e-06 [cse]: 2.06004e-05 [renormalize]: 3.50177e-07 [remove_dup_value]: 1.03097e-05 [tuple_transform]: 6.78902e-05, [1] [Cycle 1]: 6.36298e-05, [2] [d_1]: 5.45699e-05 [renormalize]: 1.60187e-07 [partial_unused_args_eliminate]: 1.27964e-06 [add_cache_embedding]: 1.16699e-05 [add_recomputation]: 5.22099e-05 [cse_after_recomputation]: 2.61199e-05, [1] [Cycle 1]: 2.16197e-05, [1] [cse]: 1.647e-05 [environ_conv]: 6.78981e-06 [swap_dp_allreduce_reducescatter]: 7.43009e-06 [bias_add_comm_swap]: 2.00002e-06 [label_micro_interleaved_index]: 1.20001e-06 [label_fine_grained_interleaved_index]: 1.62981e-06 [merge_cast_opt]: 1.04029e-06 [slice_recompute_activation]: 1.2801e-06 [micro_interleaved_order_control]: 1.23028e-06 [assign_add_opt]: 6.50017e-06 [ForceFp32Comm]: 5.60191e-07 [remove_cast_before_assign_add]: 5.99772e-07 [full_micro_interleaved_order_control]: 1.46963e-06 [reorder_send_recv_between_fp_bp]: 1.08033e-06 [comm_op_add_attrs]: 5.69969e-07 [add_comm_op_reuse_tag]: 5.69969e-07 [interleave_split_concat_branches]: 5.0012e-07 [interleave_parallel_branches]: 5.49946e-07 [overlap_opt_shard_in_pipeline]: 9.09902e-07 [overlap_opt_shard_grad_in_pipeline]: 1.15996e-06 [control_data_broadcast_order]: 9.80217e-07 [grouped_pairwise_exchange_alltoall]: 6.50063e-07 [offloading_packed_experts]: 7.10133e-07 [overlap_recompute_and_grad_model_parallel]: 9.99775e-07 [overlap_grad_matmul_and_grad_allreduce]: 4.80097e-07 [overlap_recompute_allgather_and_fa_grad]: 5.89993e-07 [overlap_grad_ring_attention]: 1.27032e-06 [overlap_grad_flash_sp]: 1.173e-05 [begin_end_overlap_inline]: 4.4005e-07 [split_matmul_comm_elemetwise]: 1.11992e-06 [split_layernorm_comm]: 1.11992e-06 [handle_group_info]: 5.49946e-07 [symbol_engine_optimizer]: 8.34097e-05, [1] [Cycle 1]: 7.952e-05, [6] [build]: 3.89991e-06 [elim_shapecalc]: 1.213e-05 [elim_not_effective]: 1.60001e-05 [opt_reshape]: 8.80007e-06 [fold_const_symbol]: 1.32197e-05 [renormalize]: 1.99769e-07 [pipeline_parallel_scheduler]: 9.80217e-07 [auto_monad_reorder]: 2.15401e-05 [get_jit_bprop_graph]: 3.00352e-07 [rewriter_after_jit_bprop_graph]: 3.49712e-07 [eliminate_special_op_node]: 0.0004933 [distribtued_split]: 3.36799e-05 [validate]: 2.885e-05 [task_emit]: 0.0680402 [execute]: 7.89994e-06 Sums bootstrap : 0.000284s : 0.38% type_inference : 0.002174s : 2.93% auto_monad : 0.000098s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000034s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000561s : 0.75% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000218s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000392s : 0.53% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000126s : 0.17% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000481s : 0.65% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000493s : 0.66% distribtued_split : 0.000034s : 0.05% validate : 0.000029s : 0.04% task_emit : 0.068040s : 91.57% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000110 63 3.76% : 0.000004s : 2: substitution.depend_value_elim 2.61% : 0.000003s : 5: substitution.elim_not_effective 2.03% : 0.000002s : 5: substitution.fold_const_symbol 5.92% : 0.000006s : 6: substitution.graph_param_transform 48.69% : 0.000053s : 1: substitution.inline 4.63% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.33% : 0.000004s : 6: substitution.load_eliminater 2.41% : 0.000003s : 2: substitution.reduce_all_const_elim 6.63% : 0.000007s : 10: substitution.remove_not_recompute_node 2.21% : 0.000002s : 2: substitution.replace_old_param 9.30% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.47% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002151 2 89.93% : 0.001935s : 1: type_inference.infer 10.07% : 0.000217s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000052 1 100.00% : 0.000052s : 1: match.inline ------[predicate.] 0.000262 1420 0.67% : 0.000002s : 13: predicate.accumulaten_eliminater 1.00% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.63% : 0.000002s : 12: predicate.addn_check_dump 0.66% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.84% : 0.000005s : 25: predicate.arithmetic_simplify 0.76% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.64% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000001s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.13% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.75% : 0.000002s : 12: predicate.depend_value_elim 0.73% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.84% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.79% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.22% : 0.000001s : 6: predicate.elim_not_effective 0.50% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.03% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.03% : 0.000003s : 19: predicate.environ_get_add_eliminate 0.97% : 0.000003s : 19: predicate.environ_get_depend_swap 1.68% : 0.000004s : 31: predicate.environ_get_eliminate 0.96% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.74% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.17% : 0.000003s : 14: predicate.float_depend_g_call 0.63% : 0.000002s : 12: predicate.float_environ_get_switch 0.91% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000001s : 6: predicate.fold_const_symbol 0.72% : 0.000002s : 12: predicate.get_grad_eliminate 0.26% : 0.000001s : 6: predicate.graph_param_transform 0.68% : 0.000002s : 12: predicate.incorporate_call 0.61% : 0.000002s : 12: predicate.incorporate_call_switch 4.88% : 0.000013s : 63: predicate.inline 0.93% : 0.000002s : 12: predicate.inline_without_move 0.35% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.92% : 0.000002s : 12: predicate.less_batch_normalization 1.49% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.08% : 0.000005s : 38: predicate.load_eliminater 1.10% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.12% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.57% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.63% : 0.000002s : 12: predicate.merge_addn 0.63% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.70% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.58% : 0.000002s : 6: predicate.mutable_eliminate 0.38% : 0.000001s : 6: predicate.opt_reshape 0.44% : 0.000001s : 6: predicate.parallel_virtual_node 0.96% : 0.000003s : 14: predicate.partial_defer_inline 1.12% : 0.000003s : 19: predicate.partial_eliminate 0.73% : 0.000002s : 13: predicate.print_const_string_wrapper 0.73% : 0.000002s : 12: predicate.reduce_all_const_elim 0.98% : 0.000003s : 13: predicate.reduce_eliminate 0.50% : 0.000001s : 12: predicate.remove_not_recompute_node 13.86% : 0.000036s : 25: predicate.replace_applicator 0.41% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.71% : 0.000002s : 13: predicate.reshape_eliminate 0.70% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.46% : 0.000001s : 6: predicate.row_tensor_eliminate 0.92% : 0.000002s : 12: predicate.same_eliminate 0.41% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.76% : 0.000002s : 12: predicate.shard_identity_eliminate 1.20% : 0.000003s : 18: predicate.special_op_eliminate 0.90% : 0.000002s : 12: predicate.specialize_transform 0.95% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.85% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.03% : 0.000005s : 38: predicate.stopgrad_eliminater 0.36% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.78% : 0.000002s : 14: predicate.switch_defer_inline 1.49% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.59% : 0.000009s : 43: predicate.switch_simplify 0.69% : 0.000002s : 13: predicate.tile_eliminate 0.72% : 0.000002s : 13: predicate.transpose_eliminate 1.47% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.43% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.44% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.31% : 0.000003s : 25: predicate.tuple_list_get_set_item_eliminator 2.27% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.47% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.06% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.20% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.70% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.68% : 0.000002s : 12: predicate.virtual_output_eliminate 0.48% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000123 4 9.03% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.97% : 0.000112s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086847 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000057s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000109s : 1: auto_monad 0.03% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000304s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.58% : 0.000506s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000490s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.27% : 0.001107s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.02% : 0.005231s : 1: opt_a 0.15% : 0.000132s : 1: opt_after_cconv 0.28% : 0.000244s : 1: opt_b 7.86% : 0.006827s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000211s : 1: renormalize.infer 0.20% : 0.000176s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000131s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 78.37% : 0.068064s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.52% : 0.002190s : 1: type_inference 0.07% : 0.000060s : 1: validate TotalTime = 0.0802662, [21] [bootstrap]: 0.00030948 [type_inference]: 0.00241997 [auto_monad]: 0.000127 [graph_reusing]: 1.97021e-06 [inline]: 1.37975e-06 [parallel-infer-symbol]: 1.81003e-06 [pre_auto_parallel]: 2.51401e-05 [insert-virtual-dataset]: 3.49991e-06 [parallel-infer-symbol-second]: 3.1013e-07 [dataset_repeat_opt]: 1.45007e-06 [pipeline_split]: 1.41002e-06 [optimize]: 0.0071217, [52] [py_interpret_to_execute]: 1.46301e-05 [rewriter_before_opt_a]: 3.44403e-05 [opt_a]: 0.00549736, [2] [Cycle 1]: 0.0015387, [43] [expand_dump_flag]: 2.7502e-06 [switch_simplify]: 2.879e-05 [loop_unroll]: 1.34204e-05 [a_1]: 0.00034518 [recompute_prepare]: 9.44035e-06 [updatestate_depend_eliminate]: 7.79005e-06 [updatestate_assign_eliminate]: 5.16977e-06 [updatestate_loads_eliminate]: 5.87013e-06 [parameter_eliminate]: 3.43006e-06 [a_2]: 0.00011599 [accelerated_algorithm]: 8.33999e-06 [shard]: 2.04984e-06 [meta_shard_fg_expand]: 3.30992e-06 [shard_inline]: 1.28103e-05 [auto_parallel]: 1.171e-05 [parallel]: 4.5998e-06 [flash_sp]: 9.19029e-06 [merge_comm]: 7.54977e-06 [allreduce_fusion]: 5.41983e-06 [matmul_add_comm_reduction]: 1.04401e-05 [allreduce_slice_to_reducescatter]: 5.09899e-07 [virtual_shard_identity]: 1.03596e-05 [virtual_dataset]: 8.16025e-06 [get_grad_eliminate_]: 9.66014e-06 [virtual_output]: 7.70018e-06 [merge_forward]: 5.45988e-06 [cell_reuse_recompute_pass]: 1.73971e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.68798e-05 [before_grad]: 1.36802e-05 [inplace_validation]: 4.57978e-06 [meta_fg_expand]: 4.93973e-06 [inplace_validation_after_expand]: 5.47012e-06 [flash_sp_send_recv_attached]: 3.72995e-06 [receive_attached]: 2.38977e-06 [after_resolve]: 1.13798e-05 [a_after_grad]: 1.24597e-05 [special_op_eliminate]: 8.84011e-06 [renormalize]: 0.00044495 [add_forward_monad_depend]: 3.89991e-06 [auto_monad_grad]: 1.88965e-06 [auto_monad_eliminator]: 3.15802e-05 [cse]: 3.14801e-05 [a_3]: 5.90398e-05 [Cycle 2]: 0.00077549, [43] [expand_dump_flag]: 1.09011e-06 [switch_simplify]: 9.06968e-06 [loop_unroll]: 7.67969e-06 [a_1]: 0.00020203 [recompute_prepare]: 7.31042e-06 [updatestate_depend_eliminate]: 5.92973e-06 [updatestate_assign_eliminate]: 4.81959e-06 [updatestate_loads_eliminate]: 5.26989e-06 [parameter_eliminate]: 1.23959e-06 [a_2]: 0.00010507 [accelerated_algorithm]: 7.96001e-06 [shard]: 1.20979e-06 [meta_shard_fg_expand]: 2.46009e-06 [shard_inline]: 7.7202e-06 [auto_parallel]: 1.09798e-05 [parallel]: 3.6899e-06 [flash_sp]: 2.81027e-06 [merge_comm]: 5.83986e-06 [allreduce_fusion]: 5.47012e-06 [matmul_add_comm_reduction]: 8.10018e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 8.88016e-06 [virtual_dataset]: 7.56001e-06 [get_grad_eliminate_]: 7.35e-06 [virtual_output]: 6.83032e-06 [merge_forward]: 4.35021e-06 [cell_reuse_recompute_pass]: 1.95997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52099e-05 [before_grad]: 1.23698e-05 [inplace_validation]: 4.11971e-06 [meta_fg_expand]: 4.52995e-06 [inplace_validation_after_expand]: 4.98025e-06 [flash_sp_send_recv_attached]: 9.09902e-07 [receive_attached]: 8.99658e-07 [after_resolve]: 9.79006e-06 [a_after_grad]: 1.19801e-05 [special_op_eliminate]: 7.2401e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.90344e-07 [auto_monad_grad]: 1.11014e-06 [auto_monad_eliminator]: 1.88099e-05 [cse]: 1.97804e-05 [a_3]: 4.82998e-05 [py_interpret_to_execute_after_opt_a]: 8.40984e-06 [slice_cell_reuse_recomputed_activation]: 1.17999e-06 [rewriter_after_opt_a]: 0.00013701 [convert_after_rewriter]: 8.97981e-06 [order_py_execute_after_rewriter]: 6.33998e-06 [opt_b]: 0.0002403, [1] [Cycle 1]: 0.00023468, [7] [b_1]: 0.00016199 [b_2]: 9.21031e-06 [updatestate_depend_eliminate]: 5.49015e-06 [updatestate_assign_eliminate]: 4.63985e-06 [updatestate_loads_eliminate]: 5.12972e-06 [renormalize]: 2.80328e-07 [cse]: 1.82497e-05 [optimize_parallel_all_gather_comm]: 8.49972e-06 [overlap_param_gather]: 1.34995e-06 [cconv]: 2.38502e-05 [loop_unroll]: 0.00046017 [opt_after_cconv]: 0.00012892, [1] [Cycle 1]: 0.00012313, [7] [c_1]: 5.067e-05 [parameter_eliminate]: 2.46987e-06 [updatestate_depend_eliminate]: 8.00984e-06 [updatestate_assign_eliminate]: 4.67012e-06 [updatestate_loads_eliminate]: 4.96022e-06 [cse]: 2.104e-05 [renormalize]: 3.59956e-07 [remove_dup_value]: 1.196e-05 [tuple_transform]: 7.02501e-05, [1] [Cycle 1]: 6.58198e-05, [2] [d_1]: 5.57299e-05 [renormalize]: 2.19792e-07 [partial_unused_args_eliminate]: 1.8701e-06 [add_cache_embedding]: 1.20997e-05 [add_recomputation]: 6.01797e-05 [cse_after_recomputation]: 2.64999e-05, [1] [Cycle 1]: 2.163e-05, [1] [cse]: 1.65701e-05 [environ_conv]: 6.44987e-06 [swap_dp_allreduce_reducescatter]: 7.68015e-06 [bias_add_comm_swap]: 1.99024e-06 [label_micro_interleaved_index]: 1.66008e-06 [label_fine_grained_interleaved_index]: 1.66986e-06 [merge_cast_opt]: 1.11014e-06 [slice_recompute_activation]: 1.32993e-06 [micro_interleaved_order_control]: 1.62004e-06 [assign_add_opt]: 6.69993e-06 [ForceFp32Comm]: 7.70204e-07 [remove_cast_before_assign_add]: 1.0198e-06 [full_micro_interleaved_order_control]: 1.74996e-06 [reorder_send_recv_between_fp_bp]: 1.57021e-06 [comm_op_add_attrs]: 9.69972e-07 [add_comm_op_reuse_tag]: 9.19681e-07 [interleave_split_concat_branches]: 8.29808e-07 [interleave_parallel_branches]: 7.90227e-07 [overlap_opt_shard_in_pipeline]: 8.89879e-07 [overlap_opt_shard_grad_in_pipeline]: 2.06986e-06 [control_data_broadcast_order]: 8.59611e-07 [grouped_pairwise_exchange_alltoall]: 1.38022e-06 [offloading_packed_experts]: 8.69855e-07 [overlap_recompute_and_grad_model_parallel]: 1.82027e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.07987e-06 [overlap_recompute_allgather_and_fa_grad]: 1.24006e-06 [overlap_grad_ring_attention]: 1.52038e-06 [overlap_grad_flash_sp]: 1.369e-05 [begin_end_overlap_inline]: 1.03004e-06 [split_matmul_comm_elemetwise]: 2.07033e-06 [split_layernorm_comm]: 1.57021e-06 [handle_group_info]: 9.4017e-07 [symbol_engine_optimizer]: 8.28002e-05, [1] [Cycle 1]: 7.847e-05, [6] [build]: 3.78024e-06 [elim_shapecalc]: 1.15098e-05 [elim_not_effective]: 1.645e-05 [opt_reshape]: 8.80007e-06 [fold_const_symbol]: 1.35098e-05 [renormalize]: 1.8999e-07 [pipeline_parallel_scheduler]: 1.45985e-06 [auto_monad_reorder]: 2.86601e-05 [get_jit_bprop_graph]: 3.70201e-07 [rewriter_after_jit_bprop_graph]: 5.80214e-07 [eliminate_special_op_node]: 0.00047258 [distribtued_split]: 4.05903e-05 [validate]: 3.20603e-05 [task_emit]: 0.0694286 [execute]: 1.10604e-05 Sums bootstrap : 0.000309s : 0.41% type_inference : 0.002420s : 3.18% auto_monad : 0.000127s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000547s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000221s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000021s : 0.03% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000017s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000009s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000445s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.07% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000137s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000460s : 0.60% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000473s : 0.62% distribtued_split : 0.000041s : 0.05% validate : 0.000032s : 0.04% task_emit : 0.069429s : 91.22% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000128 63 3.82% : 0.000005s : 2: substitution.depend_value_elim 2.19% : 0.000003s : 5: substitution.elim_not_effective 1.98% : 0.000003s : 5: substitution.fold_const_symbol 5.46% : 0.000007s : 6: substitution.graph_param_transform 50.90% : 0.000065s : 1: substitution.inline 4.03% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.57% : 0.000005s : 6: substitution.load_eliminater 2.94% : 0.000004s : 2: substitution.reduce_all_const_elim 5.70% : 0.000007s : 10: substitution.remove_not_recompute_node 2.48% : 0.000003s : 2: substitution.replace_old_param 8.65% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.27% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002390 2 88.65% : 0.002119s : 1: type_inference.infer 11.35% : 0.000271s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000224 1420 0.86% : 0.000002s : 13: predicate.accumulaten_eliminater 1.00% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.80% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.29% : 0.000005s : 25: predicate.arithmetic_simplify 0.79% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.26% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.98% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.36% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 1.00% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.59% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.25% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.81% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.25% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.21% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.01% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.03% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.24% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.32% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.86% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.74% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.80% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.38% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.59% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.58% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000155 4 10.89% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.11% : 0.000138s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089211 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.16% : 0.000139s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000334s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.54% : 0.000486s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000004s : 1: label_micro_interleaved_index 0.53% : 0.000469s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001107s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.17% : 0.005501s : 1: opt_a 0.15% : 0.000133s : 1: opt_after_cconv 0.27% : 0.000243s : 1: opt_b 7.99% : 0.007130s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.27% : 0.000243s : 1: renormalize.infer 0.22% : 0.000196s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000142s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 77.86% : 0.069460s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.73% : 0.002437s : 1: type_inference 0.07% : 0.000066s : 1: validate TotalTime = 0.0810533, [21] [bootstrap]: 0.0002902 [type_inference]: 0.00217738 [auto_monad]: 9.913e-05 [graph_reusing]: 1.47987e-06 [inline]: 1.17999e-06 [parallel-infer-symbol]: 9.49949e-07 [pre_auto_parallel]: 2.05603e-05 [insert-virtual-dataset]: 1.8198e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 8.49832e-07 [pipeline_split]: 1.41002e-06 [optimize]: 0.00688194, [52] [py_interpret_to_execute]: 1.26199e-05 [rewriter_before_opt_a]: 3.00799e-05 [opt_a]: 0.00528263, [2] [Cycle 1]: 0.00143144, [43] [expand_dump_flag]: 3.05986e-06 [switch_simplify]: 2.49697e-05 [loop_unroll]: 1.312e-05 [a_1]: 0.00032279 [recompute_prepare]: 8.46013e-06 [updatestate_depend_eliminate]: 7.68015e-06 [updatestate_assign_eliminate]: 5.38956e-06 [updatestate_loads_eliminate]: 5.70994e-06 [parameter_eliminate]: 1.99024e-06 [a_2]: 0.00011244 [accelerated_algorithm]: 8.36002e-06 [shard]: 1.75973e-06 [meta_shard_fg_expand]: 3.60981e-06 [shard_inline]: 8.25012e-06 [auto_parallel]: 1.09402e-05 [parallel]: 6.06012e-06 [flash_sp]: 7.63033e-06 [merge_comm]: 7.16001e-06 [allreduce_fusion]: 5.32996e-06 [matmul_add_comm_reduction]: 9.09017e-06 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 9.46969e-06 [virtual_dataset]: 8.15e-06 [get_grad_eliminate_]: 7.41007e-06 [virtual_output]: 7.66013e-06 [merge_forward]: 5.26011e-06 [cell_reuse_recompute_pass]: 1.58977e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.61198e-05 [before_grad]: 1.34297e-05 [inplace_validation]: 4.27011e-06 [meta_fg_expand]: 5.24987e-06 [inplace_validation_after_expand]: 5.34998e-06 [flash_sp_send_recv_attached]: 3.38024e-06 [receive_attached]: 1.94972e-06 [after_resolve]: 1.05202e-05 [a_after_grad]: 1.272e-05 [special_op_eliminate]: 7.84034e-06 [renormalize]: 0.00041054 [add_forward_monad_depend]: 2.65986e-06 [auto_monad_grad]: 1.83005e-06 [auto_monad_eliminator]: 2.45501e-05 [cse]: 2.57399e-05 [a_3]: 5.77201e-05 [Cycle 2]: 0.00084488, [43] [expand_dump_flag]: 9.60194e-07 [switch_simplify]: 8.78982e-06 [loop_unroll]: 9.78028e-06 [a_1]: 0.0002584 [recompute_prepare]: 8.25012e-06 [updatestate_depend_eliminate]: 6.00982e-06 [updatestate_assign_eliminate]: 4.72972e-06 [updatestate_loads_eliminate]: 4.96022e-06 [parameter_eliminate]: 1.11992e-06 [a_2]: 0.00010542 [accelerated_algorithm]: 8.10996e-06 [shard]: 1.22003e-06 [meta_shard_fg_expand]: 2.49036e-06 [shard_inline]: 7.82032e-06 [auto_parallel]: 9.64012e-06 [parallel]: 3.32994e-06 [flash_sp]: 2.73995e-06 [merge_comm]: 5.46034e-06 [allreduce_fusion]: 4.97978e-06 [matmul_add_comm_reduction]: 7.63033e-06 [allreduce_slice_to_reducescatter]: 2.39816e-07 [virtual_shard_identity]: 8.36002e-06 [virtual_dataset]: 7.91997e-06 [get_grad_eliminate_]: 7.97957e-06 [virtual_output]: 7.42031e-06 [merge_forward]: 5.62007e-06 [cell_reuse_recompute_pass]: 2.20025e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.632e-05 [before_grad]: 1.255e-05 [inplace_validation]: 4.42006e-06 [meta_fg_expand]: 4.97978e-06 [inplace_validation_after_expand]: 5.23031e-06 [flash_sp_send_recv_attached]: 8.09785e-07 [receive_attached]: 8.99658e-07 [after_resolve]: 9.56003e-06 [a_after_grad]: 1.20299e-05 [special_op_eliminate]: 7.58981e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 9.60194e-07 [auto_monad_grad]: 9.49949e-07 [auto_monad_eliminator]: 1.767e-05 [cse]: 1.83401e-05 [a_3]: 4.865e-05 [py_interpret_to_execute_after_opt_a]: 8.72975e-06 [slice_cell_reuse_recomputed_activation]: 1.72993e-06 [rewriter_after_opt_a]: 0.00013118 [convert_after_rewriter]: 8.10996e-06 [order_py_execute_after_rewriter]: 5.41983e-06 [opt_b]: 0.0002419, [1] [Cycle 1]: 0.00023695, [7] [b_1]: 0.00016397 [b_2]: 9.70997e-06 [updatestate_depend_eliminate]: 5.35976e-06 [updatestate_assign_eliminate]: 4.54998e-06 [updatestate_loads_eliminate]: 4.82984e-06 [renormalize]: 2.70084e-07 [cse]: 1.72202e-05 [optimize_parallel_all_gather_comm]: 7.56001e-06 [overlap_param_gather]: 8.2003e-07 [cconv]: 1.62898e-05 [loop_unroll]: 0.00047995 [opt_after_cconv]: 0.0001275, [1] [Cycle 1]: 0.00012189, [7] [c_1]: 5.23599e-05 [parameter_eliminate]: 1.74996e-06 [updatestate_depend_eliminate]: 7.07014e-06 [updatestate_assign_eliminate]: 4.42984e-06 [updatestate_loads_eliminate]: 5.08968e-06 [cse]: 1.92299e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 1.154e-05 [tuple_transform]: 6.80503e-05, [1] [Cycle 1]: 6.37104e-05, [2] [d_1]: 5.43301e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 2.06009e-06 [add_cache_embedding]: 1.14404e-05 [add_recomputation]: 5.51599e-05 [cse_after_recomputation]: 2.47001e-05, [1] [Cycle 1]: 2.04798e-05, [1] [cse]: 1.55498e-05 [environ_conv]: 6.4401e-06 [swap_dp_allreduce_reducescatter]: 7.09016e-06 [bias_add_comm_swap]: 1.91992e-06 [label_micro_interleaved_index]: 1.59023e-06 [label_fine_grained_interleaved_index]: 1.41002e-06 [merge_cast_opt]: 7.19912e-07 [slice_recompute_activation]: 1.16974e-06 [micro_interleaved_order_control]: 1.58977e-06 [assign_add_opt]: 6.8401e-06 [ForceFp32Comm]: 5.89993e-07 [remove_cast_before_assign_add]: 6.49597e-07 [full_micro_interleaved_order_control]: 1.3602e-06 [reorder_send_recv_between_fp_bp]: 1.26986e-06 [comm_op_add_attrs]: 6.50063e-07 [add_comm_op_reuse_tag]: 6.50063e-07 [interleave_split_concat_branches]: 5.40167e-07 [interleave_parallel_branches]: 5.49946e-07 [overlap_opt_shard_in_pipeline]: 6.89644e-07 [overlap_opt_shard_grad_in_pipeline]: 1.12038e-06 [control_data_broadcast_order]: 6.59842e-07 [grouped_pairwise_exchange_alltoall]: 9.89996e-07 [offloading_packed_experts]: 1.03982e-06 [overlap_recompute_and_grad_model_parallel]: 1.75973e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.60191e-07 [overlap_recompute_allgather_and_fa_grad]: 7.19912e-07 [overlap_grad_ring_attention]: 1.32993e-06 [overlap_grad_flash_sp]: 1.13696e-05 [begin_end_overlap_inline]: 5.19678e-07 [split_matmul_comm_elemetwise]: 1.20001e-06 [split_layernorm_comm]: 1.31968e-06 [handle_group_info]: 5.49946e-07 [symbol_engine_optimizer]: 8.27699e-05, [1] [Cycle 1]: 7.868e-05, [6] [build]: 3.24007e-06 [elim_shapecalc]: 1.169e-05 [elim_not_effective]: 1.613e-05 [opt_reshape]: 8.95979e-06 [fold_const_symbol]: 1.32602e-05 [renormalize]: 2.19792e-07 [pipeline_parallel_scheduler]: 9.69972e-07 [auto_monad_reorder]: 2.36603e-05 [get_jit_bprop_graph]: 3.59956e-07 [rewriter_after_jit_bprop_graph]: 2.99886e-07 [eliminate_special_op_node]: 0.00049243 [distribtued_split]: 3.49102e-05 [validate]: 3.01697e-05 [task_emit]: 0.0707597 [execute]: 8.53976e-06 Sums bootstrap : 0.000290s : 0.38% type_inference : 0.002177s : 2.82% auto_monad : 0.000099s : 0.13% graph_reusing : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000034s : 0.04% optimize.opt_a.loop_unroll : 0.000023s : 0.03% optimize.opt_a.a_1 : 0.000581s : 0.75% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000218s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000411s : 0.53% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000042s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000131s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000480s : 0.62% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000019s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000055s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.01% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000492s : 0.64% distribtued_split : 0.000035s : 0.05% validate : 0.000030s : 0.04% task_emit : 0.070760s : 91.79% execute : 0.000009s : 0.01% TotalTime = 0.0809264, [21] [bootstrap]: 0.00029546 [type_inference]: 0.00234938 [auto_monad]: 0.00012694 [graph_reusing]: 2.25985e-06 [inline]: 1.20979e-06 [parallel-infer-symbol]: 1.95997e-06 [pre_auto_parallel]: 2.506e-05 [insert-virtual-dataset]: 3.01981e-06 [parallel-infer-symbol-second]: 3.70201e-07 [dataset_repeat_opt]: 7.00355e-07 [pipeline_split]: 1.43982e-06 [optimize]: 0.00713176, [52] [py_interpret_to_execute]: 1.48099e-05 [rewriter_before_opt_a]: 3.72101e-05 [opt_a]: 0.00548437, [2] [Cycle 1]: 0.00154058, [43] [expand_dump_flag]: 3.48967e-06 [switch_simplify]: 2.902e-05 [loop_unroll]: 1.31e-05 [a_1]: 0.00034354 [recompute_prepare]: 8.50996e-06 [updatestate_depend_eliminate]: 8.67015e-06 [updatestate_assign_eliminate]: 6.07036e-06 [updatestate_loads_eliminate]: 7.35e-06 [parameter_eliminate]: 3.53018e-06 [a_2]: 0.00011625 [accelerated_algorithm]: 8.00006e-06 [shard]: 2.18023e-06 [meta_shard_fg_expand]: 3.68012e-06 [shard_inline]: 8.52998e-06 [auto_parallel]: 1.131e-05 [parallel]: 7.3202e-06 [flash_sp]: 1.129e-05 [merge_comm]: 7.72998e-06 [allreduce_fusion]: 5.49015e-06 [matmul_add_comm_reduction]: 1.04099e-05 [allreduce_slice_to_reducescatter]: 3.29688e-07 [virtual_shard_identity]: 8.95001e-06 [virtual_dataset]: 7.93999e-06 [get_grad_eliminate_]: 7.83987e-06 [virtual_output]: 7.49994e-06 [merge_forward]: 5.66011e-06 [cell_reuse_recompute_pass]: 1.68988e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65598e-05 [before_grad]: 1.37701e-05 [inplace_validation]: 5.4799e-06 [meta_fg_expand]: 5.60004e-06 [inplace_validation_after_expand]: 6.42985e-06 [flash_sp_send_recv_attached]: 4.73997e-06 [receive_attached]: 2.63005e-06 [after_resolve]: 1.10101e-05 [a_after_grad]: 1.24299e-05 [special_op_eliminate]: 8.33999e-06 [renormalize]: 0.00042056 [add_forward_monad_depend]: 3.17022e-06 [auto_monad_grad]: 1.2503e-06 [auto_monad_eliminator]: 2.79699e-05 [cse]: 3.22401e-05 [a_3]: 5.86901e-05 [Cycle 2]: 0.00076484, [43] [expand_dump_flag]: 1.11014e-06 [switch_simplify]: 9.89018e-06 [loop_unroll]: 8.15e-06 [a_1]: 0.00019837 [recompute_prepare]: 7.19028e-06 [updatestate_depend_eliminate]: 5.72018e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.22006e-06 [parameter_eliminate]: 1.19023e-06 [a_2]: 0.00010356 [accelerated_algorithm]: 8.21985e-06 [shard]: 1.27964e-06 [meta_shard_fg_expand]: 2.63005e-06 [shard_inline]: 7.94977e-06 [auto_parallel]: 1.133e-05 [parallel]: 3.36021e-06 [flash_sp]: 3.30014e-06 [merge_comm]: 5.88037e-06 [allreduce_fusion]: 4.84008e-06 [matmul_add_comm_reduction]: 7.7202e-06 [allreduce_slice_to_reducescatter]: 3.00352e-07 [virtual_shard_identity]: 8.3698e-06 [virtual_dataset]: 7.58003e-06 [get_grad_eliminate_]: 7.10972e-06 [virtual_output]: 7.01984e-06 [merge_forward]: 4.44008e-06 [cell_reuse_recompute_pass]: 1.80025e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52099e-05 [before_grad]: 1.251e-05 [inplace_validation]: 4.18955e-06 [meta_fg_expand]: 4.71994e-06 [inplace_validation_after_expand]: 5.09992e-06 [flash_sp_send_recv_attached]: 9.00123e-07 [receive_attached]: 7.19912e-07 [after_resolve]: 9.85991e-06 [a_after_grad]: 1.13202e-05 [special_op_eliminate]: 7.17025e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.09668e-07 [auto_monad_grad]: 1.11992e-06 [auto_monad_eliminator]: 1.809e-05 [cse]: 1.95298e-05 [a_3]: 4.88302e-05 [py_interpret_to_execute_after_opt_a]: 8.73022e-06 [slice_cell_reuse_recomputed_activation]: 2.03028e-06 [rewriter_after_opt_a]: 0.00014896 [convert_after_rewriter]: 8.40006e-06 [order_py_execute_after_rewriter]: 5.01005e-06 [opt_b]: 0.00024459, [1] [Cycle 1]: 0.00023888, [7] [b_1]: 0.00016199 [b_2]: 9.96003e-06 [updatestate_depend_eliminate]: 5.1898e-06 [updatestate_assign_eliminate]: 4.48991e-06 [updatestate_loads_eliminate]: 5.26011e-06 [renormalize]: 3.29688e-07 [cse]: 1.95201e-05 [optimize_parallel_all_gather_comm]: 7.97026e-06 [overlap_param_gather]: 7.19912e-07 [cconv]: 1.97398e-05 [loop_unroll]: 0.00046628 [opt_after_cconv]: 0.00016205, [1] [Cycle 1]: 0.00015597, [7] [c_1]: 8.05603e-05 [parameter_eliminate]: 2.52994e-06 [updatestate_depend_eliminate]: 7.98004e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.4501e-06 [cse]: 2.21198e-05 [renormalize]: 3.1013e-07 [remove_dup_value]: 9.73977e-06 [tuple_transform]: 6.84899e-05, [1] [Cycle 1]: 6.37597e-05, [2] [d_1]: 5.42901e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.34995e-06 [add_cache_embedding]: 1.07898e-05 [add_recomputation]: 5.05997e-05 [cse_after_recomputation]: 2.68598e-05, [1] [Cycle 1]: 2.21599e-05, [1] [cse]: 1.68397e-05 [environ_conv]: 5.92973e-06 [swap_dp_allreduce_reducescatter]: 6.92997e-06 [bias_add_comm_swap]: 1.3602e-06 [label_micro_interleaved_index]: 9.70438e-07 [label_fine_grained_interleaved_index]: 8.79634e-07 [merge_cast_opt]: 5.69969e-07 [slice_recompute_activation]: 1.15996e-06 [micro_interleaved_order_control]: 8.00006e-07 [assign_add_opt]: 6.50017e-06 [ForceFp32Comm]: 4.79631e-07 [remove_cast_before_assign_add]: 4.20026e-07 [full_micro_interleaved_order_control]: 8.60076e-07 [reorder_send_recv_between_fp_bp]: 7.79983e-07 [comm_op_add_attrs]: 4.50294e-07 [add_comm_op_reuse_tag]: 5.49946e-07 [interleave_split_concat_branches]: 5.49946e-07 [interleave_parallel_branches]: 4.4005e-07 [overlap_opt_shard_in_pipeline]: 2.12993e-06 [overlap_opt_shard_grad_in_pipeline]: 8.59611e-07 [control_data_broadcast_order]: 5.40167e-07 [grouped_pairwise_exchange_alltoall]: 6.10016e-07 [offloading_packed_experts]: 4.99655e-07 [overlap_recompute_and_grad_model_parallel]: 9.29926e-07 [overlap_grad_matmul_and_grad_allreduce]: 3.70201e-07 [overlap_recompute_allgather_and_fa_grad]: 4.49829e-07 [overlap_grad_ring_attention]: 8.60076e-07 [overlap_grad_flash_sp]: 1.14101e-05 [begin_end_overlap_inline]: 3.90224e-07 [split_matmul_comm_elemetwise]: 9.4017e-07 [split_layernorm_comm]: 8.49832e-07 [handle_group_info]: 4.20026e-07 [symbol_engine_optimizer]: 8.51098e-05, [1] [Cycle 1]: 8.07098e-05, [6] [build]: 3.33972e-06 [elim_shapecalc]: 1.18101e-05 [elim_not_effective]: 1.80202e-05 [opt_reshape]: 8.82009e-06 [fold_const_symbol]: 1.331e-05 [renormalize]: 3.1013e-07 [pipeline_parallel_scheduler]: 9.00123e-07 [auto_monad_reorder]: 2.08397e-05 [get_jit_bprop_graph]: 2.90107e-07 [rewriter_after_jit_bprop_graph]: 2.5006e-07 [eliminate_special_op_node]: 0.00048384 [distribtued_split]: 3.66699e-05 [validate]: 3.01502e-05 [task_emit]: 0.0701875 [execute]: 6.76e-06 Sums bootstrap : 0.000295s : 0.38% type_inference : 0.002349s : 3.06% auto_monad : 0.000127s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000542s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000421s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000046s : 0.06% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000149s : 0.19% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000020s : 0.03% optimize.loop_unroll : 0.000466s : 0.61% optimize.opt_after_cconv.c_1 : 0.000081s : 0.10% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000051s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000000s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000000s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.01% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000000s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000021s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000484s : 0.63% distribtued_split : 0.000037s : 0.05% validate : 0.000030s : 0.04% task_emit : 0.070187s : 91.44% execute : 0.000007s : 0.01% TotalTime = 0.0811969, [21] [bootstrap]: 0.00031451 [type_inference]: 0.00252268 [auto_monad]: 0.00012632 [graph_reusing]: 2.3297e-06 [inline]: 1.45985e-06 [parallel-infer-symbol]: 1.72993e-06 [pre_auto_parallel]: 2.37701e-05 [insert-virtual-dataset]: 2.92016e-06 [parallel-infer-symbol-second]: 6.89644e-07 [dataset_repeat_opt]: 1.57999e-06 [pipeline_split]: 1.34995e-06 [optimize]: 0.0072752, [52] [py_interpret_to_execute]: 1.39303e-05 [rewriter_before_opt_a]: 0.00010788 [opt_a]: 0.00548831, [2] [Cycle 1]: 0.00156055, [43] [expand_dump_flag]: 3.4296e-06 [switch_simplify]: 3.01297e-05 [loop_unroll]: 1.36402e-05 [a_1]: 0.00033701 [recompute_prepare]: 9.15024e-06 [updatestate_depend_eliminate]: 8.97003e-06 [updatestate_assign_eliminate]: 5.49015e-06 [updatestate_loads_eliminate]: 7.22986e-06 [parameter_eliminate]: 3.22983e-06 [a_2]: 0.00011759 [accelerated_algorithm]: 8.55979e-06 [shard]: 2.35997e-06 [meta_shard_fg_expand]: 3.67966e-06 [shard_inline]: 8.2003e-06 [auto_parallel]: 1.19102e-05 [parallel]: 7.93999e-06 [flash_sp]: 9.74024e-06 [merge_comm]: 7.97026e-06 [allreduce_fusion]: 5.28991e-06 [matmul_add_comm_reduction]: 1.055e-05 [allreduce_slice_to_reducescatter]: 4.30271e-07 [virtual_shard_identity]: 9.11998e-06 [virtual_dataset]: 8.06991e-06 [get_grad_eliminate_]: 7.81985e-06 [virtual_output]: 8.08993e-06 [merge_forward]: 5.63702e-05 [cell_reuse_recompute_pass]: 1.89012e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.68998e-05 [before_grad]: 1.34199e-05 [inplace_validation]: 5.22006e-06 [meta_fg_expand]: 5.39981e-06 [inplace_validation_after_expand]: 6.4997e-06 [flash_sp_send_recv_attached]: 5.39003e-06 [receive_attached]: 2.61981e-06 [after_resolve]: 1.25598e-05 [a_after_grad]: 1.27098e-05 [special_op_eliminate]: 7.87014e-06 [renormalize]: 0.00042368 [add_forward_monad_depend]: 3.25032e-06 [auto_monad_grad]: 1.86032e-06 [auto_monad_eliminator]: 3.20203e-05 [cse]: 3.09199e-05 [a_3]: 5.82701e-05 [Cycle 2]: 0.00077756, [43] [expand_dump_flag]: 1.15018e-06 [switch_simplify]: 9.08971e-06 [loop_unroll]: 7.8897e-06 [a_1]: 0.00020383 [recompute_prepare]: 7.37002e-06 [updatestate_depend_eliminate]: 5.89993e-06 [updatestate_assign_eliminate]: 4.73997e-06 [updatestate_loads_eliminate]: 4.84986e-06 [parameter_eliminate]: 1.23028e-06 [a_2]: 0.00010328 [accelerated_algorithm]: 8.02008e-06 [shard]: 1.35042e-06 [meta_shard_fg_expand]: 2.44007e-06 [shard_inline]: 7.57026e-06 [auto_parallel]: 1.116e-05 [parallel]: 3.65963e-06 [flash_sp]: 3.24007e-06 [merge_comm]: 6.05965e-06 [allreduce_fusion]: 5.26989e-06 [matmul_add_comm_reduction]: 7.93021e-06 [allreduce_slice_to_reducescatter]: 2.60305e-07 [virtual_shard_identity]: 9.06968e-06 [virtual_dataset]: 8.04011e-06 [get_grad_eliminate_]: 7.39004e-06 [virtual_output]: 7.19028e-06 [merge_forward]: 4.73997e-06 [cell_reuse_recompute_pass]: 1.97999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.512e-05 [before_grad]: 1.24401e-05 [inplace_validation]: 4.19002e-06 [meta_fg_expand]: 4.94998e-06 [inplace_validation_after_expand]: 4.90993e-06 [flash_sp_send_recv_attached]: 9.10368e-07 [receive_attached]: 7.49715e-07 [after_resolve]: 9.88971e-06 [a_after_grad]: 1.17901e-05 [special_op_eliminate]: 7.08969e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.2003e-07 [auto_monad_grad]: 1.09011e-06 [auto_monad_eliminator]: 1.84099e-05 [cse]: 1.89198e-05 [a_3]: 4.80502e-05 [py_interpret_to_execute_after_opt_a]: 8.85036e-06 [slice_cell_reuse_recomputed_activation]: 2.24961e-06 [rewriter_after_opt_a]: 0.00013968 [convert_after_rewriter]: 1.17603e-05 [order_py_execute_after_rewriter]: 6.3898e-06 [opt_b]: 0.00023733, [1] [Cycle 1]: 0.00023202, [7] [b_1]: 0.00015986 [b_2]: 9.61963e-06 [updatestate_depend_eliminate]: 5.03985e-06 [updatestate_assign_eliminate]: 4.35021e-06 [updatestate_loads_eliminate]: 5.00027e-06 [renormalize]: 2.70084e-07 [cse]: 1.79303e-05 [optimize_parallel_all_gather_comm]: 8.92999e-06 [overlap_param_gather]: 1.95997e-06 [cconv]: 2.27001e-05 [loop_unroll]: 0.00050136 [opt_after_cconv]: 0.00015665, [1] [Cycle 1]: 0.00015064, [7] [c_1]: 7.58297e-05 [parameter_eliminate]: 2.35997e-06 [updatestate_depend_eliminate]: 8.05967e-06 [updatestate_assign_eliminate]: 4.48991e-06 [updatestate_loads_eliminate]: 5.26011e-06 [cse]: 2.11298e-05 [renormalize]: 3.29688e-07 [remove_dup_value]: 1.27601e-05 [tuple_transform]: 7.12899e-05, [1] [Cycle 1]: 6.67102e-05, [2] [d_1]: 5.726e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 2.10991e-06 [add_cache_embedding]: 1.37598e-05 [add_recomputation]: 6.20196e-05 [cse_after_recomputation]: 2.65697e-05, [1] [Cycle 1]: 2.17999e-05, [1] [cse]: 1.708e-05 [environ_conv]: 8.46013e-06 [swap_dp_allreduce_reducescatter]: 7.18003e-06 [bias_add_comm_swap]: 2.45031e-06 [label_micro_interleaved_index]: 2.03028e-06 [label_fine_grained_interleaved_index]: 1.76998e-06 [merge_cast_opt]: 1.29966e-06 [slice_recompute_activation]: 1.70013e-06 [micro_interleaved_order_control]: 2.12993e-06 [assign_add_opt]: 7.06036e-06 [ForceFp32Comm]: 8.2003e-07 [remove_cast_before_assign_add]: 1.03004e-06 [full_micro_interleaved_order_control]: 2.16998e-06 [reorder_send_recv_between_fp_bp]: 1.94972e-06 [comm_op_add_attrs]: 9.00123e-07 [add_comm_op_reuse_tag]: 1.0198e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 6.9011e-07 [overlap_opt_shard_in_pipeline]: 1.35973e-06 [overlap_opt_shard_grad_in_pipeline]: 2.30968e-06 [control_data_broadcast_order]: 1.02026e-06 [grouped_pairwise_exchange_alltoall]: 8.50298e-07 [offloading_packed_experts]: 1.55997e-06 [overlap_recompute_and_grad_model_parallel]: 2.21003e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.01002e-06 [overlap_recompute_allgather_and_fa_grad]: 9.49949e-07 [overlap_grad_ring_attention]: 1.91992e-06 [overlap_grad_flash_sp]: 1.42199e-05 [begin_end_overlap_inline]: 7.10133e-07 [split_matmul_comm_elemetwise]: 1.71969e-06 [split_layernorm_comm]: 1.79e-06 [handle_group_info]: 7.49715e-07 [symbol_engine_optimizer]: 8.521e-05, [1] [Cycle 1]: 8.05003e-05, [6] [build]: 3.51993e-06 [elim_shapecalc]: 1.21603e-05 [elim_not_effective]: 1.60099e-05 [opt_reshape]: 8.65012e-06 [fold_const_symbol]: 1.36797e-05 [renormalize]: 2.79862e-07 [pipeline_parallel_scheduler]: 1.24006e-06 [auto_monad_reorder]: 2.921e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00055401 [distribtued_split]: 4.18499e-05 [validate]: 3.614e-05 [task_emit]: 0.0700092 [execute]: 1.06702e-05 Sums bootstrap : 0.000315s : 0.41% type_inference : 0.002523s : 3.27% auto_monad : 0.000126s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000108s : 0.14% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.70% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000221s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000061s : 0.08% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.07% optimize.opt_a.cse : 0.000050s : 0.06% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000140s : 0.18% optimize.convert_after_rewriter : 0.000012s : 0.02% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000501s : 0.65% optimize.opt_after_cconv.c_1 : 0.000076s : 0.10% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000554s : 0.72% distribtued_split : 0.000042s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.070009s : 90.84% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000111 63 4.75% : 0.000005s : 2: substitution.depend_value_elim 2.36% : 0.000003s : 5: substitution.elim_not_effective 1.83% : 0.000002s : 5: substitution.fold_const_symbol 5.64% : 0.000006s : 6: substitution.graph_param_transform 47.47% : 0.000053s : 1: substitution.inline 4.47% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.51% : 0.000004s : 6: substitution.load_eliminater 2.65% : 0.000003s : 2: substitution.reduce_all_const_elim 6.89% : 0.000008s : 10: substitution.remove_not_recompute_node 2.46% : 0.000003s : 2: substitution.replace_old_param 9.47% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.50% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002153 2 89.96% : 0.001937s : 1: type_inference.infer 10.04% : 0.000216s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000052 1 100.00% : 0.000052s : 1: match.inline ------[predicate.] 0.000281 1420 0.63% : 0.000002s : 13: predicate.accumulaten_eliminater 0.92% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.58% : 0.000002s : 12: predicate.addn_check_dump 0.66% : 0.000002s : 13: predicate.addn_zero_filter 0.65% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.74% : 0.000005s : 25: predicate.arithmetic_simplify 0.78% : 0.000002s : 13: predicate.cast_eliminate 0.67% : 0.000002s : 12: predicate.check_bprop_eliminate 0.56% : 0.000002s : 12: predicate.compare_switch_simplify 0.18% : 0.000001s : 6: predicate.const_output_eliminate 0.38% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 0.97% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.67% : 0.000002s : 12: predicate.depend_value_elim 0.68% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.74% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.69% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.21% : 0.000001s : 6: predicate.elim_not_effective 0.46% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 20.51% : 0.000058s : 19: predicate.environ_add_const_eliminate 0.90% : 0.000003s : 19: predicate.environ_get_add_eliminate 0.89% : 0.000002s : 19: predicate.environ_get_depend_swap 1.54% : 0.000004s : 31: predicate.environ_get_eliminate 0.97% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.71% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.01% : 0.000003s : 14: predicate.float_depend_g_call 0.64% : 0.000002s : 12: predicate.float_environ_get_switch 0.87% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.18% : 0.000001s : 6: predicate.fold_const_symbol 0.72% : 0.000002s : 12: predicate.get_grad_eliminate 0.25% : 0.000001s : 6: predicate.graph_param_transform 0.62% : 0.000002s : 12: predicate.incorporate_call 0.56% : 0.000002s : 12: predicate.incorporate_call_switch 4.51% : 0.000013s : 63: predicate.inline 0.88% : 0.000002s : 12: predicate.inline_without_move 0.32% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.89% : 0.000002s : 12: predicate.less_batch_normalization 1.37% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 1.95% : 0.000005s : 38: predicate.load_eliminater 1.08% : 0.000003s : 6: predicate.loop_unroll_after_grad 0.95% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.46% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.58% : 0.000002s : 12: predicate.merge_addn 0.60% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.62% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.62% : 0.000002s : 13: predicate.minmaximum_grad 0.59% : 0.000002s : 6: predicate.mutable_eliminate 0.40% : 0.000001s : 6: predicate.opt_reshape 0.41% : 0.000001s : 6: predicate.parallel_virtual_node 0.97% : 0.000003s : 14: predicate.partial_defer_inline 1.02% : 0.000003s : 19: predicate.partial_eliminate 0.70% : 0.000002s : 13: predicate.print_const_string_wrapper 0.75% : 0.000002s : 12: predicate.reduce_all_const_elim 0.83% : 0.000002s : 13: predicate.reduce_eliminate 0.37% : 0.000001s : 12: predicate.remove_not_recompute_node 0.92% : 0.000003s : 25: predicate.replace_applicator 0.36% : 0.000001s : 12: predicate.replace_old_param 0.21% : 0.000001s : 6: predicate.reset_defer_inline 0.68% : 0.000002s : 13: predicate.reshape_eliminate 0.62% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.41% : 0.000001s : 6: predicate.row_tensor_eliminate 0.79% : 0.000002s : 12: predicate.same_eliminate 0.38% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.73% : 0.000002s : 12: predicate.shard_identity_eliminate 1.17% : 0.000003s : 18: predicate.special_op_eliminate 0.73% : 0.000002s : 12: predicate.specialize_transform 0.98% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.79% : 0.000002s : 12: predicate.stack_unstack_eliminate 1.88% : 0.000005s : 38: predicate.stopgrad_eliminater 0.36% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.71% : 0.000002s : 14: predicate.switch_defer_inline 1.35% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.27% : 0.000009s : 43: predicate.switch_simplify 0.66% : 0.000002s : 13: predicate.tile_eliminate 0.64% : 0.000002s : 13: predicate.transpose_eliminate 1.40% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.33% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.26% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.30% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.36% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.08% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.40% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 1.87% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 2.81% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.40% : 0.000001s : 6: predicate.value_based_eliminate 0.71% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.66% : 0.000002s : 12: predicate.virtual_output_eliminate 0.39% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000131 4 8.54% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.46% : 0.000120s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089751 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000060s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.12% : 0.000110s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000313s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.56% : 0.000505s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000490s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.26% : 0.001127s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 5.89% : 0.005286s : 1: opt_a 0.15% : 0.000131s : 1: opt_after_cconv 0.27% : 0.000245s : 1: opt_b 7.68% : 0.006891s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000026s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.24% : 0.000217s : 1: renormalize.infer 0.21% : 0.000188s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000137s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 78.87% : 0.070786s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.44% : 0.002193s : 1: type_inference 0.07% : 0.000061s : 1: validate Time group info: ------[substitution.] 0.000131 63 4.92% : 0.000006s : 2: substitution.depend_value_elim 1.64% : 0.000002s : 5: substitution.elim_not_effective 1.51% : 0.000002s : 5: substitution.fold_const_symbol 4.44% : 0.000006s : 6: substitution.graph_param_transform 51.48% : 0.000067s : 1: substitution.inline 4.19% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.27% : 0.000004s : 6: substitution.load_eliminater 2.93% : 0.000004s : 2: substitution.reduce_all_const_elim 6.39% : 0.000008s : 10: substitution.remove_not_recompute_node 2.35% : 0.000003s : 2: substitution.replace_old_param 9.52% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.37% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002321 2 88.49% : 0.002053s : 1: type_inference.infer 11.51% : 0.000267s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000228 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.07% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.81% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.11% : 0.000005s : 25: predicate.arithmetic_simplify 0.88% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.58% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.78% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 1.98% : 0.000005s : 31: predicate.environ_get_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.26% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.87% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000002s : 12: predicate.less_batch_normalization 1.69% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.27% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.27% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.82% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.92% : 0.000002s : 6: predicate.mutable_eliminate 0.51% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.79% : 0.000002s : 12: predicate.reduce_all_const_elim 1.00% : 0.000002s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.20% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.34% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.09% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.83% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.31% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.74% : 0.000002s : 13: predicate.transpose_eliminate 1.75% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.73% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.68% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.33% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.83% : 0.000002s : 12: predicate.virtual_output_eliminate 0.46% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000151 4 9.18% : 0.000014s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.82% : 0.000137s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089884 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000055s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.15% : 0.000139s : 1: auto_monad 0.03% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.35% : 0.000316s : 1: bootstrap 0.03% : 0.000023s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000046s : 1: distribtued_split 0.55% : 0.000497s : 1: eliminate_special_op_node 0.01% : 0.000013s : 1: environ_conv 0.01% : 0.000013s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.53% : 0.000475s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000003s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001090s : 80: opt.transform.opt_a 0.09% : 0.000079s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.11% : 0.005489s : 1: opt_a 0.19% : 0.000166s : 1: opt_after_cconv 0.28% : 0.000248s : 1: opt_b 7.94% : 0.007139s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000008s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.25% : 0.000225s : 1: renormalize.infer 0.21% : 0.000191s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000155s : 1: rewriter_after_opt_a 0.05% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000088s : 1: symbol_engine_optimizer 78.11% : 0.070209s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.63% : 0.002366s : 1: type_inference 0.07% : 0.000061s : 1: validate TotalTime = 0.0815105, [21] [bootstrap]: 0.00030469 [type_inference]: 0.00244002 [auto_monad]: 0.00012644 [graph_reusing]: 2.70968e-06 [inline]: 1.53016e-06 [parallel-infer-symbol]: 2.33995e-06 [pre_auto_parallel]: 2.599e-05 [insert-virtual-dataset]: 2.8098e-06 [parallel-infer-symbol-second]: 3.89758e-07 [dataset_repeat_opt]: 1.20001e-06 [pipeline_split]: 1.24983e-06 [optimize]: 0.00719154, [52] [py_interpret_to_execute]: 1.53603e-05 [rewriter_before_opt_a]: 3.51602e-05 [opt_a]: 0.00550542, [2] [Cycle 1]: 0.00164499, [43] [expand_dump_flag]: 3.3197e-06 [switch_simplify]: 2.98698e-05 [loop_unroll]: 1.32299e-05 [a_1]: 0.00033993 [recompute_prepare]: 9.17027e-06 [updatestate_depend_eliminate]: 8.71019e-06 [updatestate_assign_eliminate]: 5.64009e-06 [updatestate_loads_eliminate]: 7.32997e-06 [parameter_eliminate]: 3.02028e-06 [a_2]: 0.00011915 [accelerated_algorithm]: 8.17003e-06 [shard]: 2.08011e-06 [meta_shard_fg_expand]: 3.68012e-06 [shard_inline]: 8.5202e-06 [auto_parallel]: 1.18497e-05 [parallel]: 7.07991e-06 [flash_sp]: 1.07302e-05 [merge_comm]: 7.93021e-06 [allreduce_fusion]: 5.0501e-06 [matmul_add_comm_reduction]: 1.04499e-05 [allreduce_slice_to_reducescatter]: 4.99655e-07 [virtual_shard_identity]: 9.13022e-06 [virtual_dataset]: 8.35024e-06 [get_grad_eliminate_]: 7.50972e-06 [virtual_output]: 7.43987e-06 [merge_forward]: 6.76e-06 [cell_reuse_recompute_pass]: 1.83005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65603e-05 [before_grad]: 1.35498e-05 [inplace_validation]: 5.77979e-06 [meta_fg_expand]: 5.71972e-06 [inplace_validation_after_expand]: 6.31018e-06 [flash_sp_send_recv_attached]: 5.14043e-06 [receive_attached]: 2.63983e-06 [after_resolve]: 1.08997e-05 [a_after_grad]: 1.30399e-05 [special_op_eliminate]: 7.64988e-06 [renormalize]: 0.00055602 [add_forward_monad_depend]: 3.48967e-06 [auto_monad_grad]: 1.74996e-06 [auto_monad_eliminator]: 3.12398e-05 [cse]: 3.40799e-05 [a_3]: 5.69499e-05 [Cycle 2]: 0.00078225, [43] [expand_dump_flag]: 1.15996e-06 [switch_simplify]: 9.49996e-06 [loop_unroll]: 7.61962e-06 [a_1]: 0.00020218 [recompute_prepare]: 7.20005e-06 [updatestate_depend_eliminate]: 6.16023e-06 [updatestate_assign_eliminate]: 4.77023e-06 [updatestate_loads_eliminate]: 5.38956e-06 [parameter_eliminate]: 1.27964e-06 [a_2]: 0.00010438 [accelerated_algorithm]: 8.31997e-06 [shard]: 1.2503e-06 [meta_shard_fg_expand]: 2.68035e-06 [shard_inline]: 7.56001e-06 [auto_parallel]: 1.16299e-05 [parallel]: 3.60003e-06 [flash_sp]: 3.95998e-06 [merge_comm]: 6.08014e-06 [allreduce_fusion]: 4.99981e-06 [matmul_add_comm_reduction]: 8.22032e-06 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 9.07993e-06 [virtual_dataset]: 7.91019e-06 [get_grad_eliminate_]: 7.49994e-06 [virtual_output]: 6.94999e-06 [merge_forward]: 4.57978e-06 [cell_reuse_recompute_pass]: 1.98977e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51102e-05 [before_grad]: 1.23698e-05 [inplace_validation]: 4.2296e-06 [meta_fg_expand]: 5.03985e-06 [inplace_validation_after_expand]: 4.95976e-06 [flash_sp_send_recv_attached]: 9.4017e-07 [receive_attached]: 7.90227e-07 [after_resolve]: 9.77004e-06 [a_after_grad]: 1.17901e-05 [special_op_eliminate]: 7.26013e-06 [renormalize]: 9.03383e-08 [add_forward_monad_depend]: 8.29808e-07 [auto_monad_grad]: 1.14972e-06 [auto_monad_eliminator]: 1.809e-05 [cse]: 1.87098e-05 [a_3]: 4.84399e-05 [py_interpret_to_execute_after_opt_a]: 9.0301e-06 [slice_cell_reuse_recomputed_activation]: 2.38977e-06 [rewriter_after_opt_a]: 0.00014187 [convert_after_rewriter]: 8.96025e-06 [order_py_execute_after_rewriter]: 6.65011e-06 [opt_b]: 0.00024294, [1] [Cycle 1]: 0.00023691, [7] [b_1]: 0.00016298 [b_2]: 9.60985e-06 [updatestate_depend_eliminate]: 5.27967e-06 [updatestate_assign_eliminate]: 4.68036e-06 [updatestate_loads_eliminate]: 5.09992e-06 [renormalize]: 3.19909e-07 [cse]: 1.83904e-05 [optimize_parallel_all_gather_comm]: 8.52998e-06 [overlap_param_gather]: 1.45985e-06 [cconv]: 2.19299e-05 [loop_unroll]: 0.00049947 [opt_after_cconv]: 0.00013318, [1] [Cycle 1]: 0.00012676, [7] [c_1]: 5.28502e-05 [parameter_eliminate]: 2.41026e-06 [updatestate_depend_eliminate]: 7.91997e-06 [updatestate_assign_eliminate]: 4.63007e-06 [updatestate_loads_eliminate]: 5.43008e-06 [cse]: 2.10102e-05 [renormalize]: 3.20375e-07 [remove_dup_value]: 1.19698e-05 [tuple_transform]: 6.92601e-05, [1] [Cycle 1]: 6.442e-05, [2] [d_1]: 5.52898e-05 [renormalize]: 1.80211e-07 [partial_unused_args_eliminate]: 2.1602e-06 [add_cache_embedding]: 1.38804e-05 [add_recomputation]: 6.06501e-05 [cse_after_recomputation]: 2.54302e-05, [1] [Cycle 1]: 2.05999e-05, [1] [cse]: 1.554e-05 [environ_conv]: 7.55023e-06 [swap_dp_allreduce_reducescatter]: 6.99004e-06 [bias_add_comm_swap]: 2.22027e-06 [label_micro_interleaved_index]: 1.93017e-06 [label_fine_grained_interleaved_index]: 1.97999e-06 [merge_cast_opt]: 1.4999e-06 [slice_recompute_activation]: 2.07964e-06 [micro_interleaved_order_control]: 1.61026e-06 [assign_add_opt]: 7.28015e-06 [ForceFp32Comm]: 8.50298e-07 [remove_cast_before_assign_add]: 7.30157e-07 [full_micro_interleaved_order_control]: 2.21003e-06 [reorder_send_recv_between_fp_bp]: 1.81003e-06 [comm_op_add_attrs]: 1.06031e-06 [add_comm_op_reuse_tag]: 1.30991e-06 [interleave_split_concat_branches]: 7.39936e-07 [interleave_parallel_branches]: 6.79865e-07 [overlap_opt_shard_in_pipeline]: 1.05007e-06 [overlap_opt_shard_grad_in_pipeline]: 2.10991e-06 [control_data_broadcast_order]: 9.29926e-07 [grouped_pairwise_exchange_alltoall]: 9.69972e-07 [offloading_packed_experts]: 1.11992e-06 [overlap_recompute_and_grad_model_parallel]: 2.05962e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.17021e-06 [overlap_recompute_allgather_and_fa_grad]: 8.10251e-07 [overlap_grad_ring_attention]: 1.86032e-06 [overlap_grad_flash_sp]: 1.39098e-05 [begin_end_overlap_inline]: 5.69969e-07 [split_matmul_comm_elemetwise]: 2.06986e-06 [split_layernorm_comm]: 1.43982e-06 [handle_group_info]: 8.79634e-07 [symbol_engine_optimizer]: 8.45599e-05, [1] [Cycle 1]: 8.028e-05, [6] [build]: 3.74997e-06 [elim_shapecalc]: 1.268e-05 [elim_not_effective]: 1.57799e-05 [opt_reshape]: 8.92999e-06 [fold_const_symbol]: 1.371e-05 [renormalize]: 2.10013e-07 [pipeline_parallel_scheduler]: 1.13016e-06 [auto_monad_reorder]: 2.681e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 4.30271e-07 [eliminate_special_op_node]: 0.00051359 [distribtued_split]: 4.04902e-05 [validate]: 3.45199e-05 [task_emit]: 0.0705428 [execute]: 1.15801e-05 Sums bootstrap : 0.000305s : 0.39% type_inference : 0.002440s : 3.15% auto_monad : 0.000126s : 0.16% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000542s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000556s : 0.72% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000142s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000499s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engi Time group info: ------[substitution.] 0.000127 63 5.12% : 0.000007s : 2: substitution.depend_value_elim 1.75% : 0.000002s : 5: substitution.elim_not_effective 2.05% : 0.000003s : 5: substitution.fold_const_symbol 5.71% : 0.000007s : 6: substitution.graph_param_transform 48.68% : 0.000062s : 1: substitution.inline 4.02% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.65% : 0.000005s : 6: substitution.load_eliminater 2.71% : 0.000003s : 2: substitution.reduce_all_const_elim 6.14% : 0.000008s : 10: substitution.remove_not_recompute_node 3.04% : 0.000004s : 2: substitution.replace_old_param 9.31% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.82% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002494 2 89.25% : 0.002226s : 1: type_inference.infer 10.75% : 0.000268s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000061 1 100.00% : 0.000061s : 1: match.inline ------[predicate.] 0.000229 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.11% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.21% : 0.000005s : 25: predicate.arithmetic_simplify 0.81% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.40% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 1.02% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.19% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.86% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.67% : 0.000013s : 63: predicate.inline 0.94% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000002s : 12: predicate.less_batch_normalization 1.89% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000006s : 38: predicate.load_eliminater 1.22% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicatne_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000514s : 0.66% distribtued_split : 0.000040s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.070543s : 91.08% execute : 0.000012s : 0.01% e.mini_step_allgather_replace 0.72% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.20% : 0.000003s : 13: predicate.reduce_eliminate 0.61% : 0.000001s : 12: predicate.remove_not_recompute_node 1.18% : 0.000003s : 25: predicate.replace_applicator 0.62% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.89% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 0.92% : 0.000002s : 12: predicate.same_eliminate 0.52% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.34% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.60% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.46% : 0.000010s : 43: predicate.switch_simplify 0.86% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.80% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.76% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000148 4 10.94% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.06% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090293 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.15% : 0.000139s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000339s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.00005 TotalTime = 0.0814882, [21] [bootstrap]: 0.00032452 [type_inference]: 0.00252485 [auto_monad]: 0.00013772 [graph_reusing]: 2.56998e-06 [inline]: 1.4999e-06 [parallel-infer-symbol]: 2.73017e-06 [pre_auto_parallel]: 2.73702e-05 [insert-virtual-dataset]: 3.34997e-06 [parallel-infer-symbol-second]: 4.29805e-07 [dataset_repeat_opt]: 1.55997e-06 [pipeline_split]: 2.02982e-06 [optimize]: 0.00727073, [52] [py_interpret_to_execute]: 2.33701e-05 [rewriter_before_opt_a]: 3.932e-05 [opt_a]: 0.00552741, [2] [Cycle 1]: 0.00163756, [43] [expand_dump_flag]: 3.83006e-06 [switch_simplify]: 2.97497e-05 [loop_unroll]: 1.32299e-05 [a_1]: 0.00034869 [recompute_prepare]: 9.34023e-06 [updatestate_depend_eliminate]: 9.49996e-06 [updatestate_assign_eliminate]: 5.88037e-06 [updatestate_loads_eliminate]: 7.41985e-06 [parameter_eliminate]: 3.51993e-06 [a_2]: 0.00012247 [accelerated_algorithm]: 8.6301e-06 [shard]: 2.63005e-06 [meta_shard_fg_expand]: 4.71016e-06 [shard_inline]: 8.26037e-06 [auto_parallel]: 1.28797e-05 [parallel]: 8.45967e-06 [flash_sp]: 1.13002e-05 [merge_comm]: 9.13022e-06 [allreduce_fusion]: 5.86035e-06 [matmul_add_comm_reduction]: 1.15801e-05 [allreduce_slice_to_reducescatter]: 4.80097e-07 [virtual_shard_identity]: 9.97027e-06 [virtual_dataset]: 8.41962e-06 [get_grad_eliminate_]: 7.75e-06 [virtual_output]: 7.87014e-06 [merge_forward]: 6.17001e-06 [cell_reuse_recompute_pass]: 2.19001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.75899e-05 [before_grad]: 1.44597e-05 [inplace_validation]: 5.60982e-06 [meta_fg_expand]: 5.66989e-06 [inplace_validation_after_expand]: 6.97002e-06 [flash_sp_send_recv_attached]: 5.60004e-06 [receive_attached]: 3.13995e-06 [after_resolve]: 1.14101e-05 [a_after_grad]: 1.32499e-05 [special_op_eliminate]: 7.95024e-06 [renormalize]: 0.00044907 [add_forward_monad_depend]: 3.70014e-06 [auto_monad_grad]: 2.00002e-06 [auto_monad_eliminator]: 3.47202e-05 [cse]: 3.35597e-05 [a_3]: 5.85597e-05 [Cycle 2]: 0.00080432, [43] [expand_dump_flag]: 1.20001e-06 [switch_simplify]: 8.95979e-06 [loop_unroll]: 7.7202e-06 [a_1]: 0.00020546 [recompute_prepare]: 7.45011e-06 [updatestate_depend_eliminate]: 6.02985e-06 [updatestate_assign_eliminate]: 4.84008e-06 [updatestate_loads_eliminate]: 5.17024e-06 [parameter_eliminate]: 1.57021e-06 [a_2]: 0.00010506 [accelerated_algorithm]: 8.53976e-06 [shard]: 1.11992e-06 [meta_shard_fg_expand]: 2.50991e-06 [shard_inline]: 7.90972e-06 [auto_parallel]: 1.20201e-05 [parallel]: 3.8296e-06 [flash_sp]: 4.04008e-06 [merge_comm]: 6.14021e-06 [allreduce_fusion]: 5.27967e-06 [matmul_add_comm_reduction]: 8.40984e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 8.82987e-06 [virtual_dataset]: 7.82032e-06 [get_grad_eliminate_]: 7.45989e-06 [virtual_output]: 7.22986e-06 [merge_forward]: 5.02029e-06 [cell_reuse_recompute_pass]: 2.08989e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.53598e-05 [before_grad]: 1.29598e-05 [inplace_validation]: 5.07012e-06 [meta_fg_expand]: 5.07012e-06 [inplace_validation_after_expand]: 5.47012e-06 [flash_sp_send_recv_attached]: 8.49832e-07 [receive_attached]: 7.00355e-07 [after_resolve]: 1.017e-050s : 1: distribtued_split 0.63% : 0.000568s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000511s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001094s : 80: opt.transform.opt_a 0.08% : 0.000074s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005492s : 1: opt_a 0.18% : 0.000161s : 1: opt_after_cconv 0.27% : 0.000241s : 1: opt_b 8.07% : 0.007284s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000231s : 1: renormalize.infer 0.21% : 0.000186s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000145s : 1: rewriter_after_opt_a 0.12% : 0.000112s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000088s : 1: symbol_engine_optimizer 77.57% : 0.070037s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.81% : 0.002541s : 1: type_inference 0.08% : 0.000071s : 1: validate [a_after_grad]: 1.19498e-05 [special_op_eliminate]: 7.37002e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 9.59728e-07 [auto_monad_grad]: 1.27964e-06 [auto_monad_eliminator]: 1.88998e-05 [cse]: 2.009e-05 [a_3]: 4.928e-05 [py_interpret_to_execute_after_opt_a]: 9.13022e-06 [slice_cell_reuse_recomputed_activation]: 2.37999e-06 [rewriter_after_opt_a]: 0.0001469 [convert_after_rewriter]: 9.11998e-06 [order_py_execute_after_rewriter]: 6.38003e-06 [opt_b]: 0.00024754, [1] [Cycle 1]: 0.00024134, [7] [b_1]: 0.00016305 [b_2]: 9.64012e-06 [updatestate_depend_eliminate]: 5.62984e-06 [updatestate_assign_eliminate]: 4.68967e-06 [updatestate_loads_eliminate]: 5.30994e-06 [renormalize]: 3.49712e-07 [cse]: 1.89701e-05 [optimize_parallel_all_gather_comm]: 8.6003e-06 [overlap_param_gather]: 1.66008e-06 [cconv]: 2.43597e-05 [loop_unroll]: 0.00049476 [opt_after_cconv]: 0.00013701, [1] [Cycle 1]: 0.00012992, [7] [c_1]: 5.27496e-05 [parameter_eliminate]: 2.61003e-06 [updatestate_depend_eliminate]: 8.17003e-06 [updatestate_assign_eliminate]: 4.65987e-06 [updatestate_loads_eliminate]: 5.05988e-06 [cse]: 2.14698e-05 [renormalize]: 4.4005e-07 [remove_dup_value]: 1.37701e-05 [tuple_transform]: 7.16499e-05, [1] [Cycle 1]: 6.617e-05, [2] [d_1]: 5.60498e-05 [renormalize]: 2.90107e-07 [partial_unused_args_eliminate]: 1.99024e-06 [add_cache_embedding]: 1.35601e-05 [add_recomputation]: 6.30799e-05 [cse_after_recomputation]: 2.70898e-05, [1] [Cycle 1]: 2.18302e-05, [1] [cse]: 1.66101e-05 [environ_conv]: 7.11996e-06 [swap_dp_allreduce_reducescatter]: 7.70018e-06 [bias_add_comm_swap]: 2.71993e-06 [label_micro_interleaved_index]: 1.89012e-06 [label_fine_grained_interleaved_index]: 2.33995e-06 [merge_cast_opt]: 1.40024e-06 [slice_recompute_activation]: 1.8999e-06 [micro_interleaved_order_control]: 2.42004e-06 [assign_add_opt]: 6.98026e-06 [ForceFp32Comm]: 8.40053e-07 [remove_cast_before_assign_add]: 1.11014e-06 [full_micro_interleaved_order_control]: 2.70968e-06 [reorder_send_recv_between_fp_bp]: 2.3297e-06 [comm_op_add_attrs]: 1.1404e-06 [add_comm_op_reuse_tag]: 1.08033e-06 [interleave_split_concat_branches]: 9.10368e-07 [interleave_parallel_branches]: 9.4017e-07 [overlap_opt_shard_in_pipeline]: 1.72015e-06 [overlap_opt_shard_grad_in_pipeline]: 2.67988e-06 [control_data_broadcast_order]: 1.32993e-06 [grouped_pairwise_exchange_alltoall]: 1.34995e-06 [offloading_packed_experts]: 1.13016e-06 [overlap_recompute_and_grad_model_parallel]: 1.86032e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.89996e-07 [overlap_recompute_allgather_and_fa_grad]: 8.2003e-07 [overlap_grad_ring_attention]: 1.91992e-06 [overlap_grad_flash_sp]: 1.51098e-05 [begin_end_overlap_inline]: 8.50298e-07 [split_matmul_comm_elemetwise]: 2.14018e-06 [split_layernorm_comm]: 1.80025e-06 [handle_group_info]: 1.03004e-06 [symbol_engine_optimizer]: 8.80104e-05, [1] [Cycle 1]: 8.31797e-05, [6] [build]: 4.04008e-06 [elim_shapecalc]: 1.17696e-05 [elim_not_effective]: 1.76802e-05 [opt_reshape]: 8.66037e-06 [fold_const_symbol]: 1.415e-05 [renormalize]: 2.99886e-07 [pipeline_parallel_scheduler]: 1.83005e-06 [auto_monad_reorder]: 3.16799e-05 [get_jit_bprop_graph]: 4.29805e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.0005105 [distribtued_split]: 4.44101e-05 [validate]: 3.74801e-05 [task_emit]: 0.0701325 [execute]: 1.03e-05 Sums bootstrap : 0.000325s : 0.42% type_inference : 0.002525s : 3.27% auto_monad : 0.000138s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000027s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000023s : 0.03% optimize.rewriter_before_opt_a : 0.000039s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000554s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000228s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000449s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000054s : 0.07% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000147s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000495s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000510s : 0.66% distribtued_split : 0.000044s : 0.06% validate : 0.000037s : 0.05% task_emit : 0.070132s : 90.92% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000128 63 5.09% : 0.000007s : 2: substitution.depend_value_elim 1.88% : 0.000002s : 5: substitution.elim_not_effective 2.00% : 0.000003s : 5: substitution.fold_const_symbol 5.59% : 0.000007s : 6: substitution.graph_param_transform 50.12% : 0.000064s : 1: substitution.inline 4.17% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.29% : 0.000004s : 6: substitution.load_eliminater 2.51% : 0.000003s : 2: substitution.reduce_all_const_elim 6.01% : 0.000008s : 10: substitution.remove_not_recompute_node 2.60% : 0.000003s : 2: substitution.replace_old_param 8.78% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.96% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002410 2 88.73% : 0.002138s : 1: type_inference.infer 11.27% : 0.000272s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000229 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.08% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.14% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.50% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.80% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.25% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.27% : 0.000003s : 19: predicate.environ_get_depend_swap 1.84% : 0.000004s : 31: predicate.environ_get_eliminate 1.20% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.49% : 0.000013s : 63: predicate.inline 1.02% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.39% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.80% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.85% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.75% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.06% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.19% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.77% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.46% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.03% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.04% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.50% : 0.000010s : 43: predicate.switch_simplify 0.93% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.66% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.47% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.90% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.39% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.51% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000152 4 10.34% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.66% : 0.000136s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090635 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000139s : 1: auto_monad 0.04% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000327s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.58% : 0.000528s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000509s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001095s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005509s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000246s : 1: opt_b 7.94% : 0.007199s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.39% : 0.000357s : 1: renormalize.infer 0.21% : 0.000192s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000148s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000088s : 1: symbol_engine_optimizer 77.86% : 0.070569s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.71% : 0.002458s : 1: type_inference 0.08% : 0.000072s : 1: validate Time group info: ------[substitution.] 0.000135 63 5.22% : 0.000007s : 2: substitution.depend_value_elim 1.88% : 0.000003s : 5: substitution.elim_not_effective 2.18% : 0.000003s : 5: substitution.fold_const_symbol 5.28% : 0.000007s : 6: substitution.graph_param_transform 50.09% : 0.000068s : 1: substitution.inline 3.90% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.15% : 0.000004s : 6: substitution.load_eliminater 2.92% : 0.000004s : 2: substitution.reduce_all_const_elim 5.79% : 0.000008s : 10: substitution.remove_not_recompute_node 2.55% : 0.000003s : 2: substitution.replace_old_param 9.16% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.88% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002494 2 88.79% : 0.002214s : 1: type_inference.infer 11.21% : 0.000280s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000228 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.02% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.75% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.39% : 0.000005s : 25: predicate.arithmetic_simplify 0.88% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.38% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.95% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.51% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.88% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.48% : 0.000003s : 14: predicate.float_depend_g_call 0.69% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.70% : 0.000013s : 63: predicate.inline 1.10% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.77% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000005s : 38: predicate.load_eliminater 1.34% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.85% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.80% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.78% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.01% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.18% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.47% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.98% : 0.000002s : 12: predicate.shard_identity_eliminate 1.30% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.40% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.81% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.71% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.77% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.52% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.90% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000153 4 10.93% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.07% : 0.000136s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090560 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.17% : 0.000152s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000349s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.01% : 0.000006s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.14% : 0.000126s : 1: dataset_repeat_opt 0.06% : 0.000055s : 1: distribtued_split 0.58% : 0.000525s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000505s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001119s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.11% : 0.005532s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.28% : 0.000251s : 1: opt_b 8.04% : 0.007280s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000011s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.03% : 0.000029s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000243s : 1: renormalize.infer 0.22% : 0.000200s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000153s : 1: rewriter_after_opt_a 0.05% : 0.000044s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 77.47% : 0.070160s : 1: task_emit 0.08% : 0.000075s : 1: tuple_transform 2.81% : 0.002544s : 1: type_inference 0.08% : 0.000074s : 1: validate TotalTime = 0.0838279, [21] [bootstrap]: 0.0003301 [type_inference]: 0.00263628 [auto_monad]: 0.00031127 [graph_reusing]: 3.09013e-06 [inline]: 1.93994e-06 [parallel-infer-symbol]: 2.40002e-06 [pre_auto_parallel]: 2.82801e-05 [insert-virtual-dataset]: 2.90992e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 1.48965e-06 [pipeline_split]: 1.79e-06 [optimize]: 0.00800167, [52] [py_interpret_to_execute]: 1.67498e-05 [rewriter_before_opt_a]: 4.15999e-05 [opt_a]: 0.00608543, [2] [Cycle 1]: 0.00181811, [43] [expand_dump_flag]: 3.75975e-06 [switch_simplify]: 3.338e-05 [loop_unroll]: 1.62497e-05 [a_1]: 0.00046103 [recompute_prepare]: 1.16401e-05 [updatestate_depend_eliminate]: 9.36026e-06 [updatestate_assign_eliminate]: 6.04987e-06 [updatestate_loads_eliminate]: 8.38004e-06 [parameter_eliminate]: 3.83984e-06 [a_2]: 0.00014384 [accelerated_algorithm]: 1.00797e-05 [shard]: 2.31992e-06 [meta_shard_fg_expand]: 4.74043e-06 [shard_inline]: 1.06501e-05 [auto_parallel]: 1.30902e-05 [parallel]: 8.19005e-06 [flash_sp]: 1.21398e-05 [merge_comm]: 9.51998e-06 [allreduce_fusion]: 6.59004e-06 [matmul_add_comm_reduction]: 1.18897e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 1.228e-05 [virtual_dataset]: 1.03e-05 [get_grad_eliminate_]: 9.6499e-06 [virtual_output]: 9.45013e-06 [merge_forward]: 6.78003e-06 [cell_reuse_recompute_pass]: 1.91992e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.17902e-05 [before_grad]: 1.76099e-05 [inplace_validation]: 5.94975e-06 [meta_fg_expand]: 6.82008e-06 [inplace_validation_after_expand]: 7.90041e-06 [flash_sp_send_recv_attached]: 5.95022e-06 [receive_attached]: 3.14973e-06 [after_resolve]: 1.41901e-05 [a_after_grad]: 1.588e-05 [special_op_eliminate]: 9.22987e-06 [renormalize]: 0.00048684 [add_forward_monad_depend]: 3.62005e-06 [auto_monad_grad]: 2.69013e-06 [auto_monad_eliminator]: 3.52901e-05 [cse]: 3.652e-05 [a_3]: 6.931e-05 [Cycle 2]: 0.00092394, [43] [expand_dump_flag]: 1.22003e-06 [switch_simplify]: 1.08201e-05 [loop_unroll]: 1.16201e-05 [a_1]: 0.00025067 [recompute_prepare]: 9.23965e-06 [updatestate_depend_eliminate]: 6.77025e-06 [updatestate_assign_eliminate]: 5.17024e-06 [updatestate_loads_eliminate]: 5.97024e-06 [parameter_eliminate]: 1.2801e-06 [a_2]: 0.00012644 [accelerated_algorithm]: 9.79006e-06 [shard]: 1.26986e-06 [meta_shard_fg_expand]: 2.94996e-06 [shard_inline]: 9.79006e-06 [auto_parallel]: 1.173e-05 [parallel]: 3.94974e-06 [flash_sp]: 3.70992e-06 [merge_comm]: 7.11996e-06 [allreduce_fusion]: 6.10016e-06 [matmul_add_comm_reduction]: 8.31997e-06 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 1.07302e-05 [virtual_dataset]: 9.31975e-06 [get_grad_eliminate_]: 9.01008e-06 [virtual_output]: 8.71019e-06 [merge_forward]: 5.37001e-06 [cell_reuse_recompute_pass]: 2.08011e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.99401e-05 [before_grad]: 1.60402e-05 [inplace_validation]: 4.84008e-06 [meta_fg_expand]: 5.83008e-06 [inplace_validation_after_expand]: 6.14021e-06 [flash_sp_send_recv_attached]: 1.07009e-06 [receive_attached]: 7.30157e-07 [after_resolve]: 1.20802e-05 [a_after_grad]: 1.48201e-05 [special_op_eliminate]: 9.0301e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 1.15996e-06 [auto_monad_grad]: 1.12038e-06 [auto_monad_eliminator]: 1.99201e-05 [cse]: 2.14004e-05 [a_3]: 6.01201e-05 [py_interpret_to_execute_after_opt_a]: 1.05202e-05 [slice_cell_reuse_recomputed_activation]: 2.22959e-06 [rewriter_after_opt_a]: 0.00015278 [convert_after_rewriter]: 9.5102e-06 [order_py_execute_after_rewriter]: 7.11996e-06 [opt_b]: 0.00028596, [1] [Cycle 1]: 0.00028002, [7] [b_1]: 0.00019641 [b_2]: 1.23102e-05 [updatestate_depend_eliminate]: 5.89015e-06 [updatestate_assign_eliminate]: 5.37001e-06 [updatestate_loads_eliminate]: 5.77001e-06 [renormalize]: 3.39933e-07 [cse]: 1.97398e-05 [optimize_parallel_all_gather_comm]: 8.88994e-06 [overlap_param_gather]: 1.36998e-06 [cconv]: 2.64398e-05 [loop_unroll]: 0.00049628 [opt_after_cconv]: 0.00015457, [1] [Cycle 1]: 0.00014754, [7] [c_1]: 6.47199e-05 [parameter_eliminate]: 2.61003e-06 [updatestate_depend_eliminate]: 9.05991e-06 [updatestate_assign_eliminate]: 5.02029e-06 [updatestate_loads_eliminate]: 6.12019e-06 [cse]: 2.403e-05 [renormalize]: 4.4005e-07 [remove_dup_value]: 1.43801e-05 [tuple_transform]: 8.48398e-05, [1] [Cycle 1]: 8.028e-05, [2] [d_1]: 6.97901e-05 [renormalize]: 2.70084e-07 [partial_unused_args_eliminate]: 2.72971e-06 [add_cache_embedding]: 1.45896e-05 [add_recomputation]: 0.00012269 [cse_after_recomputation]: 3.09101e-05, [1] [Cycle 1]: 2.51499e-05, [1] [cse]: 1.95303e-05 [environ_conv]: 8.15e-06 [swap_dp_allreduce_reducescatter]: 9.10973e-06 [bias_add_comm_swap]: 2.59001e-06 [label_micro_interleaved_index]: 2.45962e-06 [label_fine_grained_interleaved_index]: 2.39024e-06 [merge_cast_opt]: 1.81003e-06 [slice_recompute_activation]: 2.27988e-06 [micro_interleaved_order_control]: 2.42004e-06 [assign_add_opt]: 8.66968e-06 [ForceFp32Comm]: 9.09902e-07 [remove_cast_before_assign_add]: 1.07987e-06 [full_micro_interleaved_order_control]: 2.54018e-06 [reorder_send_recv_between_fp_bp]: 2.63005e-06 [comm_op_add_attrs]: 1.03004e-06 [add_comm_op_reuse_tag]: 9.00123e-07 [interleave_split_concat_branches]: 9.09902e-07 [interleave_parallel_branches]: 1.1404e-06 [overlap_opt_shard_in_pipeline]: 1.2503e-06 [overlap_opt_shard_grad_in_pipeline]: 2.32039e-06 [control_data_broadcast_order]: 1.60001e-06 [grouped_pairwise_exchange_alltoall]: 1.40024e-06 [offloading_packed_experts]: 1.17021e-06 [overlap_recompute_and_grad_model_parallel]: 2.29012e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.01002e-06 [overlap_recompute_allgather_and_fa_grad]: 1.35973e-06 [overlap_grad_ring_attention]: 1.93994e-06 [overlap_grad_flash_sp]: 1.66702e-05 [begin_end_overlap_inline]: 8.00006e-07 [split_matmul_comm_elemetwise]: 2.15974e-06 [split_layernorm_comm]: 2.02004e-06 [handle_group_info]: 9.99775e-07 [symbol_engine_optimizer]: 0.00010156, [1] [Cycle 1]: 9.68599e-05, [6] [build]: 4.15975e-06 [elim_shapecalc]: 1.49799e-05 [elim_not_effective]: 2.03601e-05 [opt_reshape]: 1.112e-05 [fold_const_symbol]: 1.731e-05 [renormalize]: 3.7998e-07 [pipeline_parallel_scheduler]: 1.74018e-06 [auto_monad_reorder]: 3.397e-05 [get_jit_bprop_graph]: 4.29805e-07 [rewriter_after_jit_bprop_graph]: 4.69852e-07 [eliminate_special_op_node]: 0.00051913 [distribtued_split]: 4.67398e-05 [validate]: 3.82899e-05 [task_emit]: 0.0715867 [execute]: 1.36602e-05 Sums bootstrap : 0.000330s : 0.42% type_inference : 0.002636s : 3.32% auto_monad : 0.000311s : 0.39% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000042s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000044s : 0.06% optimize.opt_a.loop_unroll : 0.000028s : 0.04% optimize.opt_a.a_1 : 0.000712s : 0.90% optimize.opt_a.recompute_prepare : 0.000021s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000270s : 0.34% optimize.opt_a.accelerated_algorithm : 0.000020s : 0.03% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000008s : 0.01% optimize.opt_a.shard_inline : 0.000020s : 0.03% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000017s : 0.02% optimize.opt_a.allreduce_fusion : 0.000013s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000023s : 0.03% optimize.opt_a.virtual_dataset : 0.000020s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000019s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000042s : 0.05% optimize.opt_a.before_grad : 0.000034s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000013s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000031s : 0.04% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000487s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000055s : 0.07% optimize.opt_a.cse : 0.000058s : 0.07% optimize.opt_a.a_3 : 0.000129s : 0.16% optimize.py_interpret_to_execute_after_opt_a : 0.000011s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000153s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000196s : 0.25% optimize.opt_b.b_2 : 0.000012s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000026s : 0.03% optimize.loop_unroll : 0.000496s : 0.62% optimize.opt_after_cconv.c_1 : 0.000065s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000070s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000123s : 0.15% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000009s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000002s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000015s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000034s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000519s : 0.65% distribtued_split : 0.000047s : 0.06% validate : 0.000038s : 0.05% task_emit : 0.071587s : 90.15% execute : 0.000014s : 0.02% Time group info: ------[substitution.] 0.000199 63 3.75% : 0.000007s : 2: substitution.depend_value_elim 1.70% : 0.000003s : 5: substitution.elim_not_effective 1.51% : 0.000003s : 5: substitution.fold_const_symbol 4.82% : 0.000010s : 6: substitution.graph_param_transform 60.09% : 0.000119s : 1: substitution.inline 3.68% : 0.000007s : 10: substitution.j_node_and_user_rematch 2.69% : 0.000005s : 6: substitution.load_eliminater 2.02% : 0.000004s : 2: substitution.reduce_all_const_elim 5.08% : 0.000010s : 10: substitution.remove_not_recompute_node 1.90% : 0.000004s : 2: substitution.replace_old_param 6.49% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 6.27% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002605 2 87.95% : 0.002291s : 1: type_inference.infer 12.05% : 0.000314s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000118 1 100.00% : 0.000118s : 1: match.inline ------[predicate.] 0.000271 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.17% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.14% : 0.000006s : 25: predicate.arithmetic_simplify 0.78% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.33% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000005s : 31: predicate.environ_get_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000004s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.74% : 0.000002s : 12: predicate.incorporate_call_switch 6.27% : 0.000017s : 63: predicate.inline 1.17% : 0.000003s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000003s : 12: predicate.less_batch_normalization 1.79% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.36% : 0.000006s : 38: predicate.load_eliminater 1.27% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.75% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.70% : 0.000002s : 12: predicate.merge_addn 0.70% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.09% : 0.000003s : 14: predicate.partial_defer_inline 1.36% : 0.000004s : 19: predicate.partial_eliminate 0.89% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 0.97% : 0.000003s : 13: predicate.reduce_eliminate 0.64% : 0.000002s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.59% : 0.000002s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.74% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000003s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.01% : 0.000003s : 12: predicate.shard_identity_eliminate 1.41% : 0.000004s : 18: predicate.special_op_eliminate 1.06% : 0.000003s : 12: predicate.specialize_transform 1.14% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.37% : 0.000006s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000005s : 26: predicate.switch_layer_defer_inline 4.08% : 0.000011s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.72% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.48% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.35% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.32% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.87% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000180 4 10.21% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.79% : 0.000162s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.094049 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.14% : 0.000128s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.35% : 0.000326s : 1: auto_monad 0.04% : 0.000041s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000354s : 1: bootstrap 0.03% : 0.000031s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.01% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.04% : 0.000034s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000056s : 1: distribtued_split 0.57% : 0.000532s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000023s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000010s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.54% : 0.000506s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000018s : 1: opt.transform.loop_unroll_optimizer 1.48% : 0.001389s : 80: opt.transform.opt_a 0.07% : 0.000063s : 1: opt.transform.opt_after_cconv 0.20% : 0.000185s : 27: opt.transform.opt_b 0.07% : 0.000068s : 1: opt.transform.opt_trans_graph 0.04% : 0.000037s : 3: opt.transform.special_op_eliminate 0.06% : 0.000059s : 4: opt.transform.symbol_engine_opt 6.48% : 0.006090s : 1: opt_a 0.17% : 0.000159s : 1: opt_after_cconv 0.31% : 0.000289s : 1: opt_b 8.52% : 0.008010s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000007s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000035s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.28% : 0.000260s : 1: renormalize.infer 0.23% : 0.000221s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000159s : 1: rewriter_after_opt_a 0.05% : 0.000046s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000013s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000105s : 1: symbol_engine_optimizer 76.15% : 0.071621s : 1: task_emit 0.09% : 0.000089s : 1: tuple_transform 2.82% : 0.002654s : 1: type_inference 0.08% : 0.000078s : 1: validate TotalTime = 0.0793746, [21] [bootstrap]: 0.0002825 [type_inference]: 0.00223314 [auto_monad]: 9.734e-05 [graph_reusing]: 1.85007e-06 [inline]: 1.18976e-06 [parallel-infer-symbol]: 1.61026e-06 [pre_auto_parallel]: 2.13101e-05 [insert-virtual-dataset]: 1.87987e-06 [parallel-infer-symbol-second]: 3.69735e-07 [dataset_repeat_opt]: 8.10251e-07 [pipeline_split]: 8.69855e-07 [optimize]: 0.00678081, [52] [py_interpret_to_execute]: 1.22101e-05 [rewriter_before_opt_a]: 2.94298e-05 [opt_a]: 0.005179, [2] [Cycle 1]: 0.00141902, [43] [expand_dump_flag]: 2.81027e-06 [switch_simplify]: 2.57404e-05 [loop_unroll]: 1.30399e-05 [a_1]: 0.00032547 [recompute_prepare]: 8.74e-06 [updatestate_depend_eliminate]: 7.42031e-06 [updatestate_assign_eliminate]: 5.68014e-06 [updatestate_loads_eliminate]: 5.51017e-06 [parameter_eliminate]: 2.10991e-06 [a_2]: 0.00011374 [accelerated_algorithm]: 8.32044e-06 [shard]: 1.72015e-06 [meta_shard_fg_expand]: 3.10969e-06 [shard_inline]: 8.15e-06 [auto_parallel]: 1.17999e-05 [parallel]: 5.64987e-06 [flash_sp]: 7.56001e-06 [merge_comm]: 7.39004e-06 [allreduce_fusion]: 4.93973e-06 [matmul_add_comm_reduction]: 9.49018e-06 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 9.60985e-06 [virtual_dataset]: 7.84965e-06 [get_grad_eliminate_]: 7.91997e-06 [virtual_output]: 7.73976e-06 [merge_forward]: 4.80004e-06 [cell_reuse_recompute_pass]: 1.39e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.64402e-05 [before_grad]: 1.35801e-05 [inplace_validation]: 5.24009e-06 [meta_fg_expand]: 4.99003e-06 [inplace_validation_after_expand]: 5.09992e-06 [flash_sp_send_recv_attached]: 2.95974e-06 [receive_attached]: 2.14018e-06 [after_resolve]: 1.06897e-05 [a_after_grad]: 1.28201e-05 [special_op_eliminate]: 8.08015e-06 [renormalize]: 0.00039286 [add_forward_monad_depend]: 2.88989e-06 [auto_monad_grad]: 1.4198e-06 [auto_monad_eliminator]: 2.37501e-05 [cse]: 2.55401e-05 [a_3]: 5.81602e-05 [Cycle 2]: 0.00077274, [43] [expand_dump_flag]: 9.20147e-07 [switch_simplify]: 9.13022e-06 [loop_unroll]: 7.58981e-06 [a_1]: 0.00020272 [recompute_prepare]: 7.43009e-06 [updatestate_depend_eliminate]: 5.63031e-06 [updatestate_assign_eliminate]: 4.71994e-06 [updatestate_loads_eliminate]: 5.01005e-06 [parameter_eliminate]: 1.00024e-06 [a_2]: 0.00010497 [accelerated_algorithm]: 8.46013e-06 [shard]: 1.05985e-06 [meta_shard_fg_expand]: 2.42982e-06 [shard_inline]: 8.06013e-06 [auto_parallel]: 1.08499e-05 [parallel]: 3.09991e-06 [flash_sp]: 2.27988e-06 [merge_comm]: 6.00005e-06 [allreduce_fusion]: 5.0501e-06 [matmul_add_comm_reduction]: 7.58981e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 8.88994e-06 [virtual_dataset]: 7.58003e-06 [get_grad_eliminate_]: 7.42031e-06 [virtual_output]: 7.18003e-06 [merge_forward]: 4.61005e-06 [cell_reuse_recompute_pass]: 1.62004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49398e-05 [before_grad]: 1.25598e-05 [inplace_validation]: 4.17e-06 [meta_fg_expand]: 4.63985e-06 [inplace_validation_after_expand]: 4.78979e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 6.99889e-07 [after_resolve]: 9.43989e-06 [a_after_grad]: 1.16602e-05 [special_op_eliminate]: 7.30017e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.49715e-07 [auto_monad_grad]: 1.01002e-06 [auto_monad_eliminator]: 1.573e-05 [cse]: 1.87699e-05 [a_3]: 4.83403e-05 [py_interpret_to_execute_after_opt_a]: 8.07038e-06 [slice_cell_reuse_recomputed_activation]: 1.67033e-06 [rewriter_after_opt_a]: 0.00012891 [convert_after_rewriter]: 7.87992e-06 [order_py_execute_after_rewriter]: 5.68014e-06 [opt_b]: 0.00023987, [1] [Cycle 1]: 0.00023487, [7] [b_1]: 0.00016083 [b_2]: 1.015e-05 [updatestate_depend_eliminate]: 5.4799e-06 [updatestate_assign_eliminate]: 4.52995e-06 [updatestate_loads_eliminate]: 4.92996e-06 [renormalize]: 2.39816e-07 [cse]: 1.77701e-05 [optimize_parallel_all_gather_comm]: 7.77002e-06 [overlap_param_gather]: 7.90227e-07 [cconv]: 1.52397e-05 [loop_unroll]: 0.00047378 [opt_after_cconv]: 0.0001517, [1] [Cycle 1]: 0.00014598, [7] [c_1]: 5.10798e-05 [parameter_eliminate]: 1.83983e-06 [updatestate_depend_eliminate]: 2.87597e-05 [updatestate_assign_eliminate]: 4.86989e-06 [updatestate_loads_eliminate]: 5.33042e-06 [cse]: 2.06199e-05 [renormalize]: 4.20026e-07 [remove_dup_value]: 8.73022e-06 [tuple_transform]: 6.82902e-05, [1] [Cycle 1]: 6.38799e-05, [2] [d_1]: 5.45196e-05 [renormalize]: 2.19792e-07 [partial_unused_args_eliminate]: 1.40024e-06 [add_cache_embedding]: 1.13202e-05 [add_recomputation]: 5.24702e-05 [cse_after_recomputation]: 2.605e-05, [1] [Cycle 1]: 2.16202e-05, [1] [cse]: 1.645e-05 [environ_conv]: 5.98002e-06 [swap_dp_allreduce_reducescatter]: 7.02962e-06 [bias_add_comm_swap]: 1.64006e-06 [label_micro_interleaved_index]: 1.13994e-06 [label_fine_grained_interleaved_index]: 1.11014e-06 [merge_cast_opt]: 7.19912e-07 [slice_recompute_activation]: 1.14972e-06 [micro_interleaved_order_control]: 1.83983e-06 [assign_add_opt]: 6.26966e-06 [ForceFp32Comm]: 5.69969e-07 [remove_cast_before_assign_add]: 9.49949e-07 [full_micro_interleaved_order_control]: 1.50036e-06 [reorder_send_recv_between_fp_bp]: 1.15018e-06 [comm_op_add_attrs]: 6.20261e-07 [add_comm_op_reuse_tag]: 5.80214e-07 [interleave_split_concat_branches]: 4.89876e-07 [interleave_parallel_branches]: 5.49946e-07 [overlap_opt_shard_in_pipeline]: 9.20147e-07 [overlap_opt_shard_grad_in_pipeline]: 1.37975e-06 [control_data_broadcast_order]: 6.9011e-07 [grouped_pairwise_exchange_alltoall]: 1.01002e-06 [offloading_packed_experts]: 6.99889e-07 [overlap_recompute_and_grad_model_parallel]: 1.35042e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.29805e-07 [overlap_recompute_allgather_and_fa_grad]: 5.99772e-07 [overlap_grad_ring_attention]: 1.38022e-06 [overlap_grad_flash_sp]: 1.173e-05 [begin_end_overlap_inline]: 4.69852e-07 [split_matmul_comm_elemetwise]: 1.24983e-06 [split_layernorm_comm]: 1.53994e-06 [handle_group_info]: 8.29808e-07 [symbol_engine_optimizer]: 8.18302e-05, [1] [Cycle 1]: 7.79e-05, [6] [build]: 3.66988e-06 [elim_shapecalc]: 1.13402e-05 [elim_not_effective]: 1.52397e-05 [opt_reshape]: 8.61008e-06 [fold_const_symbol]: 1.371e-05 [renormalize]: 1.80211e-07 [pipeline_parallel_scheduler]: 1.03004e-06 [auto_monad_reorder]: 2.27797e-05 [get_jit_bprop_graph]: 2.90107e-07 [rewriter_after_jit_bprop_graph]: 2.70084e-07 [eliminate_special_op_node]: 0.00048905 [distribtued_split]: 3.32398e-05 [validate]: 3.018e-05 [task_emit]: 0.0691455 [execute]: 8.12998e-06 Sums bootstrap : 0.000282s : 0.37% type_inference : 0.002233s : 2.96% auto_monad : 0.000097s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000029s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000528s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000219s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000393s : 0.52% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000039s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000129s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000474s : 0.63% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000029s : 0.04% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.02% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000489s : 0.65% distribtued_split : 0.000033s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.069145s : 91.66% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000112 63 4.51% : 0.000005s : 2: substitution.depend_value_elim 2.05% : 0.000002s : 5: substitution.elim_not_effective 2.17% : 0.000002s : 5: substitution.fold_const_symbol 5.90% : 0.000007s : 6: substitution.graph_param_transform 49.08% : 0.000055s : 1: substitution.inline 4.45% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.72% : 0.000004s : 6: substitution.load_eliminater 2.25% : 0.000003s : 2: substitution.reduce_all_const_elim 6.49% : 0.000007s : 10: substitution.remove_not_recompute_node 2.07% : 0.000002s : 2: substitution.replace_old_param 9.33% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.98% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002211 2 90.28% : 0.001996s : 1: type_inference.infer 9.72% : 0.000215s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000054 1 100.00% : 0.000054s : 1: match.inline ------[predicate.] 0.000228 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 0.96% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.39% : 0.000005s : 25: predicate.arithmetic_simplify 0.89% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.26% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.96% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.83% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 2.02% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.71% : 0.000002s : 12: predicate.incorporate_call_switch 5.78% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.80% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.56% : 0.000006s : 38: predicate.load_eliminater 1.26% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.27% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.71% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.74% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.29% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.02% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.78% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 0.96% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.40% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.12% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.10% : 0.000009s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.91% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.65% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.47% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.56% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000121 4 8.38% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.62% : 0.000111s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087899 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000057s : 1: add_recomputation 0.01% : 0.000009s : 1: assign_add_opt 0.12% : 0.000108s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000306s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.57% : 0.000501s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000482s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001075s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 5.90% : 0.005183s : 1: opt_a 0.18% : 0.000156s : 1: opt_after_cconv 0.28% : 0.000243s : 1: opt_b 7.72% : 0.006788s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.24% : 0.000210s : 1: renormalize.infer 0.20% : 0.000178s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000134s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 78.69% : 0.069169s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.56% : 0.002249s : 1: type_inference 0.07% : 0.000060s : 1: validate TotalTime = 0.0796379, [21] [bootstrap]: 0.00030124 [type_inference]: 0.00244903 [auto_monad]: 0.00012463 [graph_reusing]: 1.93994e-06 [inline]: 1.51992e-06 [parallel-infer-symbol]: 2.19001e-06 [pre_auto_parallel]: 2.56901e-05 [insert-virtual-dataset]: 2.71015e-06 [parallel-infer-symbol-second]: 3.89758e-07 [dataset_repeat_opt]: 8.2003e-07 [pipeline_split]: 1.44029e-06 [optimize]: 0.00703613, [52] [py_interpret_to_execute]: 1.59699e-05 [rewriter_before_opt_a]: 3.31197e-05 [opt_a]: 0.00537112, [2] [Cycle 1]: 0.00151246, [43] [expand_dump_flag]: 3.70014e-06 [switch_simplify]: 3.04701e-05 [loop_unroll]: 1.34003e-05 [a_1]: 0.00034045 [recompute_prepare]: 8.55001e-06 [updatestate_depend_eliminate]: 9.54e-06 [updatestate_assign_eliminate]: 6.31995e-06 [updatestate_loads_eliminate]: 7.77002e-06 [parameter_eliminate]: 3.23029e-06 [a_2]: 0.0001179 [accelerated_algorithm]: 8.16025e-06 [shard]: 1.94972e-06 [meta_shard_fg_expand]: 3.88967e-06 [shard_inline]: 8.13976e-06 [auto_parallel]: 1.20196e-05 [parallel]: 7.28015e-06 [flash_sp]: 1.034e-05 [merge_comm]: 7.62986e-06 [allreduce_fusion]: 5.22984e-06 [matmul_add_comm_reduction]: 1.02003e-05 [allreduce_slice_to_reducescatter]: 4.89876e-07 [virtual_shard_identity]: 9.1996e-06 [virtual_dataset]: 7.86968e-06 [get_grad_eliminate_]: 7.51019e-06 [virtual_output]: 7.68993e-06 [merge_forward]: 5.54975e-06 [cell_reuse_recompute_pass]: 1.87987e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.63103e-05 [before_grad]: 1.33202e-05 [inplace_validation]: 5.13019e-06 [meta_fg_expand]: 5.26989e-06 [inplace_validation_after_expand]: 5.91995e-06 [flash_sp_send_recv_attached]: 4.4601e-06 [receive_attached]: 2.92016e-06 [after_resolve]: 1.12299e-05 [a_after_grad]: 1.29798e-05 [special_op_eliminate]: 7.64988e-06 [renormalize]: 0.00042864 [add_forward_monad_depend]: 3.66988e-06 [auto_monad_grad]: 1.71037e-06 [auto_monad_eliminator]: 3.24198e-05 [cse]: 3.319e-05 [a_3]: 5.87003e-05 [Cycle 2]: 0.00076603, [43] [expand_dump_flag]: 1.05007e-06 [switch_simplify]: 9.10973e-06 [loop_unroll]: 7.58003e-06 [a_1]: 0.00020041 [recompute_prepare]: 7.3798e-06 [updatestate_depend_eliminate]: 5.79981e-06 [updatestate_assign_eliminate]: 4.99981e-06 [updatestate_loads_eliminate]: 5.11995e-06 [parameter_eliminate]: 1.07987e-06 [a_2]: 0.00010387 [accelerated_algorithm]: 8.2599e-06 [shard]: 1.14972e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 7.49994e-06 [auto_parallel]: 1.08499e-05 [parallel]: 3.72017e-06 [flash_sp]: 3.41004e-06 [merge_comm]: 5.99027e-06 [allreduce_fusion]: 4.78979e-06 [matmul_add_comm_reduction]: 7.70018e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 8.10018e-06 [virtual_dataset]: 7.51996e-06 [get_grad_eliminate_]: 7.35e-06 [virtual_output]: 7.11996e-06 [merge_forward]: 4.5402e-06 [cell_reuse_recompute_pass]: 2.00002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50902e-05 [before_grad]: 1.20397e-05 [inplace_validation]: 4.17978e-06 [meta_fg_expand]: 4.87035e-06 [inplace_validation_after_expand]: 5.04963e-06 [flash_sp_send_recv_attached]: 9.49949e-07 [receive_attached]: 9.00123e-07 [after_resolve]: 9.77004e-06 [a_after_grad]: 1.175e-05 [special_op_eliminate]: 7.41985e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 7.79983e-07 [auto_monad_grad]: 1.12969e-06 [auto_monad_eliminator]: 1.81203e-05 [cse]: 1.90004e-05 [a_3]: 4.83301e-05 [py_interpret_to_execute_after_opt_a]: 9.01008e-06 [slice_cell_reuse_recomputed_activation]: 2.16998e-06 [rewriter_after_opt_a]: 0.00014992 [convert_after_rewriter]: 8.57981e-06 [order_py_execute_after_rewriter]: 6.04009e-06 [opt_b]: 0.00025666, [1] [Cycle 1]: 0.00025044, [7] [b_1]: 0.00015999 [b_2]: 9.72999e-06 [updatestate_depend_eliminate]: 5.25964e-06 [updatestate_assign_eliminate]: 4.33996e-06 [updatestate_loads_eliminate]: 5.24009e-06 [renormalize]: 2.40281e-07 [cse]: 3.49898e-05 [optimize_parallel_all_gather_comm]: 8.38004e-06 [overlap_param_gather]: 1.59023e-06 [cconv]: 2.27601e-05 [loop_unroll]: 0.00046519 [opt_after_cconv]: 0.00013108, [1] [Cycle 1]: 0.00012492, [7] [c_1]: 5.225e-05 [parameter_eliminate]: 2.46987e-06 [updatestate_depend_eliminate]: 7.66013e-06 [updatestate_assign_eliminate]: 4.41959e-06 [updatestate_loads_eliminate]: 5.05988e-06 [cse]: 2.17897e-05 [renormalize]: 3.30154e-07 [remove_dup_value]: 1.28197e-05 [tuple_transform]: 6.92597e-05, [1] [Cycle 1]: 6.45798e-05, [2] [d_1]: 5.49899e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 1.75973e-06 [add_cache_embedding]: 1.22399e-05 [add_recomputation]: 6.35497e-05 [cse_after_recomputation]: 2.666e-05, [1] [Cycle 1]: 2.16397e-05, [1] [cse]: 1.67103e-05 [environ_conv]: 7.39982e-06 [swap_dp_allreduce_reducescatter]: 7.51996e-06 [bias_add_comm_swap]: 2.69013e-06 [label_micro_interleaved_index]: 2.06009e-06 [label_fine_grained_interleaved_index]: 1.83005e-06 [merge_cast_opt]: 1.43005e-06 [slice_recompute_activation]: 1.55019e-06 [micro_interleaved_order_control]: 1.70991e-06 [assign_add_opt]: 7.89994e-06 [ForceFp32Comm]: 9.09902e-07 [remove_cast_before_assign_add]: 8.89879e-07 [full_micro_interleaved_order_control]: 2.02004e-06 [reorder_send_recv_between_fp_bp]: 1.94972e-06 [comm_op_add_attrs]: 1.05985e-06 [add_comm_op_reuse_tag]: 1.11014e-06 [interleave_split_concat_branches]: 8.2003e-07 [interleave_parallel_branches]: 9.69972e-07 [overlap_opt_shard_in_pipeline]: 1.23028e-06 [overlap_opt_shard_grad_in_pipeline]: 2.08989e-06 [control_data_broadcast_order]: 1.20979e-06 [grouped_pairwise_exchange_alltoall]: 1.26986e-06 [offloading_packed_experts]: 1.07987e-06 [overlap_recompute_and_grad_model_parallel]: 2.00002e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.49715e-07 [overlap_recompute_allgather_and_fa_grad]: 1.24983e-06 [overlap_grad_ring_attention]: 1.70991e-06 [overlap_grad_flash_sp]: 1.38297e-05 [begin_end_overlap_inline]: 6.79865e-07 [split_matmul_comm_elemetwise]: 1.93017e-06 [split_layernorm_comm]: 1.74996e-06 [handle_group_info]: 9.10368e-07 [symbol_engine_optimizer]: 8.186e-05, [1] [Cycle 1]: 7.73901e-05, [6] [build]: 3.93018e-06 [elim_shapecalc]: 1.19698e-05 [elim_not_effective]: 1.53603e-05 [opt_reshape]: 8.82009e-06 [fold_const_symbol]: 1.29598e-05 [renormalize]: 3.20375e-07 [pipeline_parallel_scheduler]: 1.50036e-06 [auto_monad_reorder]: 2.87802e-05 [get_jit_bprop_graph]: 4.20026e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00047992 [distribtued_split]: 4.00799e-05 [validate]: 3.401e-05 [task_emit]: 0.0688562 [execute]: 1.12201e-05 Sums bootstrap : 0.000301s : 0.40% type_inference : 0.002449s : 3.24% auto_monad : 0.000125s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.01% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000429s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000150s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000035s : 0.05% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000465s : 0.62% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000480s : 0.63% distribtued_split : 0.000040s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.068856s : 91.09% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000129 63 5.08% : 0.000007s : 2: substitution.depend_value_elim 2.03% : 0.000003s : 5: substitution.elim_not_effective 1.85% : 0.000002s : 5: substitution.fold_const_symbol 5.37% : 0.000007s : 6: substitution.graph_param_transform 50.64% : 0.000065s : 1: substitution.inline 3.90% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.50% : 0.000005s : 6: substitution.load_eliminater 2.47% : 0.000003s : 2: substitution.reduce_all_const_elim 5.95% : 0.000008s : 10: substitution.remove_not_recompute_node 2.65% : 0.000003s : 2: substitution.replace_old_param 8.86% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.70% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002420 2 89.04% : 0.002155s : 1: type_inference.infer 10.96% : 0.000265s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000228 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.08% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.22% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.47% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 1.01% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.49% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.26% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.72% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.70% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.43% : 0.000006s : 38: predicate.load_eliminater 1.30% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.82% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.80% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.31% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.12% : 0.000003s : 13: predicate.reduce_eliminate 0.60% : 0.000001s : 12: predicate.remove_not_recompute_node 1.20% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.78% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 1.09% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.28% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.75% : 0.000002s : 13: predicate.transpose_eliminate 1.74% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.81% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.39% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.48% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.83% : 0.000002s : 12: predicate.virtual_output_eliminate 0.44% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000150 4 10.42% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.58% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088474 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000137s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000326s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.56% : 0.000493s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000474s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001093s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005375s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.29% : 0.000260s : 1: opt_b 7.96% : 0.007044s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000231s : 1: renormalize.infer 0.22% : 0.000192s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000156s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 77.86% : 0.068886s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.79% : 0.002467s : 1: type_inference 0.08% : 0.000068s : 1: validate TotalTime = 0.0805942, [21] [bootstrap]: 0.00028334 [type_inference]: 0.00223296 [auto_monad]: 9.90699e-05 [graph_reusing]: 1.57999e-06 [inline]: 1.12038e-06 [parallel-infer-symbol]: 1.00024e-06 [pre_auto_parallel]: 2.01501e-05 [insert-virtual-dataset]: 1.66986e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 5.89993e-07 [pipeline_split]: 1.01002e-06 [optimize]: 0.00678266, [52] [py_interpret_to_execute]: 1.22301e-05 [rewriter_before_opt_a]: 2.982e-05 [opt_a]: 0.0051763, [2] [Cycle 1]: 0.00141506, [43] [expand_dump_flag]: 2.27988e-06 [switch_simplify]: 2.59704e-05 [loop_unroll]: 1.32304e-05 [a_1]: 0.0003261 [recompute_prepare]: 9.09995e-06 [updatestate_depend_eliminate]: 7.54977e-06 [updatestate_assign_eliminate]: 5.43986e-06 [updatestate_loads_eliminate]: 5.74999e-06 [parameter_eliminate]: 2.12993e-06 [a_2]: 0.00011471 [accelerated_algorithm]: 8.23988e-06 [shard]: 1.46963e-06 [meta_shard_fg_expand]: 2.83029e-06 [shard_inline]: 8.54023e-06 [auto_parallel]: 1.13696e-05 [parallel]: 6.07036e-06 [flash_sp]: 7.75e-06 [merge_comm]: 7.24988e-06 [allreduce_fusion]: 5.1898e-06 [matmul_add_comm_reduction]: 9.02032e-06 [allreduce_slice_to_reducescatter]: 5.09899e-07 [virtual_shard_identity]: 9.41008e-06 [virtual_dataset]: 8.07969e-06 [get_grad_eliminate_]: 7.82032e-06 [virtual_output]: 7.79983e-06 [merge_forward]: 5.29969e-06 [cell_reuse_recompute_pass]: 1.55997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.67e-05 [before_grad]: 1.39698e-05 [inplace_validation]: 5.13997e-06 [meta_fg_expand]: 5.24987e-06 [inplace_validation_after_expand]: 5.5097e-06 [flash_sp_send_recv_attached]: 3.32016e-06 [receive_attached]: 2.10991e-06 [after_resolve]: 1.04201e-05 [a_after_grad]: 1.23903e-05 [special_op_eliminate]: 7.81007e-06 [renormalize]: 0.00039766 [add_forward_monad_depend]: 2.73995e-06 [auto_monad_grad]: 1.39e-06 [auto_monad_eliminator]: 2.11699e-05 [cse]: 2.39396e-05 [a_3]: 5.63501e-05 [Cycle 2]: 0.00077282, [43] [expand_dump_flag]: 8.40053e-07 [switch_simplify]: 9.14e-06 [loop_unroll]: 7.77002e-06 [a_1]: 0.00020345 [recompute_prepare]: 7.35978e-06 [updatestate_depend_eliminate]: 5.96978e-06 [updatestate_assign_eliminate]: 5.11995e-06 [updatestate_loads_eliminate]: 5.13019e-06 [parameter_eliminate]: 1.0198e-06 [a_2]: 0.00010376 [accelerated_algorithm]: 8.19983e-06 [shard]: 1.09989e-06 [meta_shard_fg_expand]: 2.42982e-06 [shard_inline]: 7.70018e-06 [auto_parallel]: 1.05603e-05 [parallel]: 3.45986e-06 [flash_sp]: 2.71993e-06 [merge_comm]: 5.80028e-06 [allreduce_fusion]: 4.82006e-06 [matmul_add_comm_reduction]: 7.27968e-06 [allreduce_slice_to_reducescatter]: 2.20258e-07 [virtual_shard_identity]: 8.86014e-06 [virtual_dataset]: 7.96979e-06 [get_grad_eliminate_]: 7.53021e-06 [virtual_output]: 7.43987e-06 [merge_forward]: 4.74975e-06 [cell_reuse_recompute_pass]: 1.66008e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.56099e-05 [before_grad]: 1.25901e-05 [inplace_validation]: 4.23007e-06 [meta_fg_expand]: 4.71016e-06 [inplace_validation_after_expand]: 5.05988e-06 [flash_sp_send_recv_attached]: 8.69855e-07 [receive_attached]: 7.89762e-07 [after_resolve]: 1.01798e-05 TotalTime = 0.0805717, [21] [bootstrap]: 0.00030134 [type_inference]: 0.00244888 [auto_monad]: 0.00012527 [graph_reusing]: 2.49036e-06 [inline]: 1.33971e-06 [parallel-infer-symbol]: 1.88965e-06 [pre_auto_parallel]: 2.603e-05 [insert-virtual-dataset]: 2.74973e-06 [parallel-infer-symbol-second]: 3.49712e-07 [dataset_repeat_opt]: 1.26008e-06 [pipeline_split]: 1.01002e-06 [optimize]: 0.00708972, [52] [py_interpret_to_execute]: 1.56499e-05 [rewriter_before_opt_a]: 3.35402e-05 [opt_a]: 0.00551728, [2] [Cycle 1]: 0.00143904, [43] [expand_dump_flag]: 2.17976e-06 [switch_simplify]: 2.44803e-05 [loop_unroll]: 1.32201e-05 [a_1]: 0.00032698 [recompute_prepare]: 9.04966e-06 [updatestate_depend_eliminate]: 8.31019e-06 [updatestate_assign_eliminate]: 5.13997e-06 [updatestate_loads_eliminate]: 5.65033e-06 [parameter_eliminate]: 2.65008e-06 [a_2]: 0.00011265 [accelerated_algorithm]: 8.7698e-06 [shard]: 1.51014e-06 [meta_shard_fg_expand]: 2.82004e-06 [shard_inline]: 8.12998e-06 [auto_parallel]: 1.18902e-05 [parallel]: 4.40981e-06 [flash_sp]: 6.14999e-06 [merge_comm]: 6.57002e-06 [allreduce_fusion]: 5.30016e-06 [matmul_add_comm_reduction]: 8.31019e-06 [allreduce_slice_to_reducescatter]: 3.49712e-07 [virtual_shard_identity]: 9.96003e-06 [virtual_dataset]: 8.88016e-06 [get_grad_eliminate_]: 7.84034e-06 [virtual_output]: 7.50972e-06 [merge_forward]: 5.24987e-06 [cell_reuse_recompute_pass]: 1.74996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.68099e-05 [before_grad]: 1.37496e-05 [inplace_validation]: 4.42006e-06 [meta_fg_expand]: 5.02029e-06 [inplace_validation_after_expand]: 6.8997e-06 [flash_sp_send_recv_attached]: 2.88012e-06 [receive_attached]: 1.49012e-06 [after_resolve]: 1.10897e-05 [a_after_grad]: 1.251e-05 [special_op_eliminate]: 7.98004e-06 [renormalize]: 0.00041974 [add_forward_monad_depend]: 2.95974e-06 [auto_monad_grad]: 1.81003e-06 [auto_monad_eliminator]: 2.27997e-05 [cse]: 2.39201e-05 [a_3]: 5.81201e-05 [Cycle 2]: 0.00077054, [43] [expand_dump_flag]: 1.17999e-06 [switch_simplify]: 9.22009e-06 [loop_unroll]: 7.86968e-06 [a_1]: 0.00020146 [recompute_prepare]: 7.44965e-06 [updatestate_depend_eliminate]: 5.98002e-06 [updatestate_assign_eliminate]: 4.61983e-06 [updatestate_loads_eliminate]: 5.38025e-06 [parameter_eliminate]: 1.27964e-06 [a_2]: 0.00010361 [accelerated_algorithm]: 8.13976e-06 [shard]: 1.22003e-06 [meta_shard_fg_expand]: 2.31015e-06 [shard_inline]: 7.69971e-06 [auto_parallel]: 1.07298e-05 [parallel]: 3.68012e-06 [flash_sp]: 2.37999e-06 [merge_comm]: 5.66011e-06 [allreduce_fusion]: 5.17024e-06 [matmul_add_comm_reduction]: 7.91997e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 8.57003e-06 [virtual_dataset]: 7.41985e-06 [get_grad_eliminate_]: 7.22986e-06 [virtual_output]: 6.87968e-06 [merge_forward]: 4.71994e-06 [cell_reuse_recompute_pass]: 1.97021e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.57701e-05 [before_grad]: 1.27601e-05 [inplace_validation]: 4.29014e-06 [meta_fg_expand]: 4.82006e-06 [inplace_validation_after_expand]: 5.0501e-06 [flash_sp_send_recv_attached]: 1.07009e-06 [receive_attached]: 6.39819e-07 [after_resolve]: 9.66992 [a_after_grad]: 1.24397e-05 [special_op_eliminate]: 7.81985e-06 [renormalize]: 9.03383e-08 [add_forward_monad_depend]: 7.39936e-07 [auto_monad_grad]: 9.69972e-07 [auto_monad_eliminator]: 1.613e-05 [cse]: 1.70302e-05 [a_3]: 4.98099e-05 [py_interpret_to_execute_after_opt_a]: 9.22009e-06 [slice_cell_reuse_recomputed_activation]: 1.70991e-06 [rewriter_after_opt_a]: 0.00013119 [convert_after_rewriter]: 7.66991e-06 [order_py_execute_after_rewriter]: 5.70016e-06 [opt_b]: 0.00023968, [1] [Cycle 1]: 0.00023469, [7] [b_1]: 0.00016109 [b_2]: 1.01398e-05 [updatestate_depend_eliminate]: 5.03985e-06 [updatestate_assign_eliminate]: 4.31016e-06 [updatestate_loads_eliminate]: 4.94998e-06 [renormalize]: 2.79862e-07 [cse]: 1.73301e-05 [optimize_parallel_all_gather_comm]: 7.79005e-06 [overlap_param_gather]: 1.0198e-06 [cconv]: 1.50898e-05 [loop_unroll]: 0.00047664 [opt_after_cconv]: 0.00014679, [1] [Cycle 1]: 0.00014095, [7] [c_1]: 5.19399e-05 [parameter_eliminate]: 1.80025e-06 [updatestate_depend_eliminate]: 2.59699e-05 [updatestate_assign_eliminate]: 4.79026e-06 [updatestate_loads_eliminate]: 5.03985e-06 [cse]: 1.91298e-05 [renormalize]: 3.70201e-07 [remove_dup_value]: 9.92976e-06 [tuple_transform]: 6.93602e-05, [1] [Cycle 1]: 6.48899e-05, [2] [d_1]: 5.564e-05 [renormalize]: 2.39816e-07 [partial_unused_args_eliminate]: 1.34995e-06 [add_cache_embedding]: 1.11097e-05 [add_recomputation]: 5.31999e-05 [cse_after_recomputation]: 2.50796e-05, [1] [Cycle 1]: 2.04202e-05, [1] [cse]: 1.54902e-05 [environ_conv]: 6.23008e-06 [swap_dp_allreduce_reducescatter]: 6.8401e-06 [bias_add_comm_swap]: 2.06009e-06 [label_micro_interleaved_index]: 1.80025e-06 [label_fine_grained_interleaved_index]: 1.41002e-06 [merge_cast_opt]: 7.39936e-07 [slice_recompute_activation]: 1.12969e-06 [micro_interleaved_order_control]: 1.49012e-06 [assign_add_opt]: 6.48992e-06 [ForceFp32Comm]: 4.89876e-07 [remove_cast_before_assign_add]: 5.30388e-07 [full_micro_interleaved_order_control]: 1.09989e-06 [reorder_send_recv_between_fp_bp]: 1.17999e-06 [comm_op_add_attrs]: 4.49829e-07 [add_comm_op_reuse_tag]: 4.99655e-07 [interleave_split_concat_branches]: 4.59608e-07 [interleave_parallel_branches]: 4.30271e-07 [overlap_opt_shard_in_pipeline]: 5.30388e-07 [overlap_opt_shard_grad_in_pipeline]: 1.10036e-06 [control_data_broadcast_order]: 5.29923e-07 [grouped_pairwise_exchange_alltoall]: 5.69969e-07 [offloading_packed_experts]: 5.79748e-07 [overlap_recompute_and_grad_model_parallel]: 1.12969e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.69852e-07 [overlap_recompute_allgather_and_fa_grad]: 5.0012e-07 [overlap_grad_ring_attention]: 1.39e-06 [overlap_grad_flash_sp]: 1.196e-05 [begin_end_overlap_inline]: 4.10248e-07 [split_matmul_comm_elemetwise]: 1.09989e-06 [split_layernorm_comm]: 1.02026e-06 [handle_group_info]: 4.4005e-07 [symbol_engine_optimizer]: 8.15201e-05, [1] [Cycle 1]: 7.71298e-05, [6] [build]: 3.2899e-06 [elim_shapecalc]: 1.12001e-05 [elim_not_effective]: 1.529e-05 [opt_reshape]: 8.95001e-06 [fold_const_symbol]: 1.29398e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 8.801e-07 [auto_monad_reorder]: 2.148e-05 [get_jit_bprop_graph]: 3.50177e-07 [rewriter_after_jit_bprop_graph]: 2.90107e-07 [eliminate_special_op_node]: 0.00049237 [distribtued_split]: 3.199e-05 [validate]: 2.97297e-05 [task_emit]: 0.0703597 [execute]: 9.14978e-06 Sums bootstrap : 0.000283s : 0.37% type_inference : 0.002233s : 2.91% auto_monad e-06 [a_after_grad]: 1.15302e-05 [special_op_eliminate]: 6.90995e-06 [renormalize]: 9.03383e-08 [add_forward_monad_depend]: 7.79983e-07 [auto_monad_grad]: 1.19023e-06 [auto_monad_eliminator]: 1.76299e-05 [cse]: 1.96202e-05 [a_3]: 4.86299e-05 [py_interpret_to_execute_after_opt_a]: 9.02964e-06 [slice_cell_reuse_recomputed_activation]: 1.26008e-06 [rewriter_after_opt_a]: 0.00011806 [convert_after_rewriter]: 7.22008e-06 [order_py_execute_after_rewriter]: 4.93042e-06 [opt_b]: 0.00025917, [1] [Cycle 1]: 0.00025376, [7] [b_1]: 0.00017782 [b_2]: 9.91998e-06 [updatestate_depend_eliminate]: 5.4501e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.15021e-06 [renormalize]: 2.70084e-07 [cse]: 1.93701e-05 [optimize_parallel_all_gather_comm]: 7.49016e-06 [overlap_param_gather]: 7.79983e-07 [cconv]: 1.41901e-05 [loop_unroll]: 0.00045975 [opt_after_cconv]: 0.00013107, [1] [Cycle 1]: 0.00012552, [7] [c_1]: 5.19101e-05 [parameter_eliminate]: 2.38977e-06 [updatestate_depend_eliminate]: 8.14022e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.14975e-06 [cse]: 2.209e-05 [renormalize]: 3.80445e-07 [remove_dup_value]: 9.66014e-06 [tuple_transform]: 6.64098e-05, [1] [Cycle 1]: 6.21802e-05, [2] [d_1]: 5.31003e-05 [renormalize]: 1.60187e-07 [partial_unused_args_eliminate]: 1.22003e-06 [add_cache_embedding]: 1.08397e-05 [add_recomputation]: 5.10002e-05 [cse_after_recomputation]: 2.65604e-05, [1] [Cycle 1]: 2.20099e-05, [1] [cse]: 1.71601e-05 [environ_conv]: 5.68014e-06 [swap_dp_allreduce_reducescatter]: 6.67991e-06 [bias_add_comm_swap]: 1.47009e-06 [label_micro_interleaved_index]: 1.00024e-06 [label_fine_grained_interleaved_index]: 1.00024e-06 [merge_cast_opt]: 5.69969e-07 [slice_recompute_activation]: 7.5018e-07 [micro_interleaved_order_control]: 8.40053e-07 [assign_add_opt]: 6.07967e-06 [ForceFp32Comm]: 5.29923e-07 [remove_cast_before_assign_add]: 5.69969e-07 [full_micro_interleaved_order_control]: 1.09011e-06 [reorder_send_recv_between_fp_bp]: 8.79634e-07 [comm_op_add_attrs]: 4.60073e-07 [add_comm_op_reuse_tag]: 4.89876e-07 [interleave_split_concat_branches]: 4.89876e-07 [interleave_parallel_branches]: 4.4005e-07 [overlap_opt_shard_in_pipeline]: 6.70087e-07 [overlap_opt_shard_grad_in_pipeline]: 8.79634e-07 [control_data_broadcast_order]: 5.40167e-07 [grouped_pairwise_exchange_alltoall]: 5.20144e-07 [offloading_packed_experts]: 5.09899e-07 [overlap_recompute_and_grad_model_parallel]: 8.79634e-07 [overlap_grad_matmul_and_grad_allreduce]: 3.7998e-07 [overlap_recompute_allgather_and_fa_grad]: 4.4005e-07 [overlap_grad_ring_attention]: 9.4017e-07 [overlap_grad_flash_sp]: 1.14199e-05 [begin_end_overlap_inline]: 4.09782e-07 [split_matmul_comm_elemetwise]: 8.99658e-07 [split_layernorm_comm]: 8.00006e-07 [handle_group_info]: 3.7998e-07 [symbol_engine_optimizer]: 8.247e-05, [1] [Cycle 1]: 7.82902e-05, [6] [build]: 2.92016e-06 [elim_shapecalc]: 1.13999e-05 [elim_not_effective]: 1.62199e-05 [opt_reshape]: 9.31975e-06 [fold_const_symbol]: 1.331e-05 [renormalize]: 2.10013e-07 [pipeline_parallel_scheduler]: 1.09989e-06 [auto_monad_reorder]: 2.09203e-05 [get_jit_bprop_graph]: 2.40281e-07 [rewriter_after_jit_bprop_graph]: 2.19792e-07 [eliminate_special_op_node]: 0.00047014 [distribtued_split]: 3.24999e-05 [validate]: 2.978e-05 [task_emit]: 0.0697849 [execute]: 7.14976e-06 Sums bootstrap : 0.000301s : 0.39% type_inference : 0.002449s : 3.21 : 0.000099s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000530s : 0.69% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000218s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000398s : 0.52% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000037s : 0.05% optimize.opt_a.cse : 0.000041s : 0.05% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000131s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 : 0.0% auto_monad : 0.000125s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000528s : 0.69% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000216s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000009s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000420s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000118s : 0.15% optimize.convert_after_rewriter : 0.000007s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000178s : 0.23% optimize.opt_b.b_2 00010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000477s : 0.62% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000026s : 0.03% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000019s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000053s : 0.07% optimize.cse_after_recomputation.cse : 0.000015s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000000s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000000s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimize : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000014s : 0.02% optimize.loop_unroll : 0.000460s : 0.60% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000051s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000000s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000000s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.01% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000000s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbr.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000021s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000492s : 0.64% distribtued_split : 0.000032s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.070360s : 91.78% execute : 0.000009s : 0.01% ol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000021s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000470s : 0.62% distribtued_split : 0.000032s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.069785s : 91.43% execute : 0.000007s : 0.01% Time group info: ------[substitution.] 0.000113 63 4.23% : 0.000005s : 2: substitution.depend_value_elim 1.89% : 0.000002s : 5: substitution.elim_not_effective 1.71% : 0.000002s : 5: substitution.fold_const_symbol 6.10% : 0.000007s : 6: substitution.graph_param_transform 49.04% : 0.000055s : 1: substitution.inline 4.49% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.64% : 0.000004s : 6: substitution.load_eliminater 2.02% : 0.000002s : 2: substitution.reduce_all_const_elim 6.97% : 0.000008s : 10: substitution.remove_not_recompute_node 2.65% : 0.000003s : 2: substitution.replace_old_param 9.15% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.11% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002211 2 90.33% : 0.001997s : 1: type_inference.infer 9.67% : 0.000214s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000054 1 100.00% : 0.000054s : 1: match.inline ------[predicate.] 0.000225 1420 0.85% : 0.000002s : 13: predicate.accumulaten_eliminater 1.05% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.13% : 0.000005s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.84% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.33% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.88% : 0.000004s : 31: predicate.environ_get_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.29% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.30% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.82% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.43% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.73% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000005s : 38: predicate.load_eliminater 1.38% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.74% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.67% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.89% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.13% : 0.000003s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.09% : 0.000002s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.88% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 0.97% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.36% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 1.02% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.02% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.43% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.27% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.76% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.83% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.87% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.46% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.49% : 0.000001s : 6: predicate.value_based_eliminate 0.87% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.89% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000123 4 8.25% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.75% : 0.000113s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089128 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000057s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.12% : 0.000110s : 1: auto_monad 0.03% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.34% : 0.000306s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000011s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.00% : 0.000004s : 1: dataset_repeat_opt 0.04% : 0.00003 Time group info: ------[substitution.] 0.000113 63 4.65% : 0.000005s : 2: substitution.depend_value_elim 1.84% : 0.000002s : 5: substitution.elim_not_effective 1.91% : 0.000002s : 5: substitution.fold_const_symbol 4.95% : 0.000006s : 6: substitution.graph_param_transform 49.10% : 0.000055s : 1: substitution.inline 4.71% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.21% : 0.000004s : 6: substitution.load_eliminater 2.34% : 0.000003s : 2: substitution.reduce_all_const_elim 6.54% : 0.000007s : 10: substitution.remove_not_recompute_node 2.22% : 0.000003s : 2: substitution.replace_old_param 9.60% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.94% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002421 2 88.95% : 0.002153s : 1: type_inference.infer 11.05% : 0.000268s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000054 1 100.00% : 0.000054s : 1: match.inline ------[predicate.] 0.000225 1420 0.96% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.27% : 0.000005s : 25: predicate.arithmetic_simplify 0.78% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.20% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.24% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 2.00% : 0.000004s : 31: predicate.environ_get_eliminate 1.11% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.88% : 0.000002s : 12: predicate.incorporate_call_switch 5.64% : 0.000013s : 63: predicate.inline 1.07% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000002s : 12: predicate.less_batch_normalization 1.69% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000005s : 38: predicate.load_eliminater 1.38% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.74% : 0.000002s : 12: predicat9s : 1: distribtued_split 0.57% : 0.000505s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000485s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001078s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 5.81% : 0.005180s : 1: opt_a 0.17% : 0.000151s : 1: opt_after_cconv 0.27% : 0.000243s : 1: opt_b 7.62% : 0.006791s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000005s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000026s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000214s : 1: renormalize.infer 0.20% : 0.000179s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000136s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000085s : 1: symbol_engine_optimizer 78.97% : 0.070386s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.52% : 0.002249s : 1: type_inference 0.07% : 0.000061s : 1: validate e.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.90% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.87% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.17% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.85% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.04% : 0.000002s : 12: predicate.shard_identity_eliminate 1.32% : 0.000003s : 18: predicate.special_op_eliminate 1.00% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.08% : 0.000009s : 43: predicate.switch_simplify 0.85% : 0.000002s : 13: predicate.tile_eliminate 0.88% : 0.000002s : 13: predicate.transpose_eliminate 1.70% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.63% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.65% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.36% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000140 4 6.30% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.70% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089447 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000056s : 1: add_recomputation 0.01% : 0.000009s : 1: assign_add_opt 0.15% : 0.000137s : 1: auto_monad 0.03% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.36% : 0.000326s : 1: bootstrap 0.02% : 0.000018s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000011s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.04% : 0.000040s : 1: distribtued_split 0.54% : 0.000483s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.52% : 0.000469s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000003s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.20% : 0.001073s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.19% : 0.000168s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.17% : 0.005521s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.29% : 0.000262s : 1: opt_b 7.94% : 0.007098s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000008s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.26% : 0.000229s : 1: renormalize.infer 0.21% : 0.000186s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000123s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000003s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 78.04% : 0.069808s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.76% : 0.002466s : 1: type_inference 0.07% : 0.000059s : 1: validate TotalTime = 0.0814104, [21] [bootstrap]: 0.00030723 [type_inference]: 0.00262671 [auto_monad]: 0.00012291 [graph_reusing]: 2.44007e-06 [inline]: 1.49012e-06 [parallel-infer-symbol]: 2.10013e-06 [pre_auto_parallel]: 2.53501e-05 [insert-virtual-dataset]: 2.81027e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 1.22003e-06 [pipeline_split]: 1.43005e-06 [optimize]: 0.00713092, [52] [py_interpret_to_execute]: 1.43996e-05 [rewriter_before_opt_a]: 3.68697e-05 [opt_a]: 0.00543709, [2] [Cycle 1]: 0.00151326, [43] [expand_dump_flag]: 2.95043e-06 [switch_simplify]: 3.09902e-05 [loop_unroll]: 1.31601e-05 [a_1]: 0.00033862 [recompute_prepare]: 8.66968e-06 [updatestate_depend_eliminate]: 9.35001e-06 [updatestate_assign_eliminate]: 6.02985e-06 [updatestate_loads_eliminate]: 7.24988e-06 [parameter_eliminate]: 3.38024e-06 [a_2]: 0.00011699 [accelerated_algorithm]: 8.76002e-06 [shard]: 1.89012e-06 [meta_shard_fg_expand]: 4.18024e-06 [shard_inline]: 8.19983e-06 [auto_parallel]: 1.12699e-05 [parallel]: 6.98026e-06 [flash_sp]: 1.02799e-05 [merge_comm]: 7.56001e-06 [allreduce_fusion]: 5.34998e-06 [matmul_add_comm_reduction]: 1.00797e-05 [allreduce_slice_to_reducescatter]: 7.59959e-07 [virtual_shard_identity]: 9.39006e-06 [virtual_dataset]: 8.2599e-06 [get_grad_eliminate_]: 7.53999e-06 [virtual_output]: 7.93999e-06 [merge_forward]: 5.81983e-06 [cell_reuse_recompute_pass]: 2.23005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.71103e-05 [before_grad]: 1.35503e-05 [inplace_validation]: 5.00027e-06 [meta_fg_expand]: 5.43986e-06 [inplace_validation_after_expand]: 6.17001e-06 [flash_sp_send_recv_attached]: 4.71016e-06 [receive_attached]: 2.46987e-06 [after_resolve]: 1.17896e-05 [a_after_grad]: 1.268e-05 [special_op_eliminate]: 7.91019e-06 [renormalize]: 0.00042963 [add_forward_monad_depend]: 3.45986e-06 [auto_monad_grad]: 1.8701e-06 [auto_monad_eliminator]: 3.13502e-05 [cse]: 3.09101e-05 [a_3]: 5.981e-05 [Cycle 2]: 0.00077308, [43] [expand_dump_flag]: 1.17021e-06 [switch_simplify]: 9.05991e-06 [loop_unroll]: 7.7202e-06 [a_1]: 0.00020376 [recompute_prepare]: 7.3798e-06 [updatestate_depend_eliminate]: 6.29015e-06 [updatestate_assign_eliminate]: 4.52017e-06 [updatestate_loads_eliminate]: 5.17955e-06 [parameter_eliminate]: 1.32015e-06 [a_2]: 0.00010471 [accelerated_algorithm]: 8.56025e-06 [shard]: 1.22003e-06 [meta_shard_fg_expand]: 2.48989e-06 [shard_inline]: 7.83987e-06 [auto_parallel]: 1.06702e-05 [parallel]: 3.68012e-06 [flash_sp]: 3.09991e-06 [merge_comm]: 5.89993e-06 [allreduce_fusion]: 4.63985e-06 [matmul_add_comm_reduction]: 8.08015e-06 [allreduce_slice_to_reducescatter]: 3.00352e-07 [virtual_shard_identity]: 8.47038e-06 [virtual_dataset]: 7.4897e-06 [get_grad_eliminate_]: 7.23032e-06 [virtual_output]: 7.20005e-06 [merge_forward]: 4.75999e-06 [cell_reuse_recompute_pass]: 2.01957e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52299e-05 [before_grad]: 1.23903e-05 [inplace_validation]: 4.24031e-06 [meta_fg_expand]: 5.02029e-06 [inplace_validation_after_expand]: 5.24009e-06 [flash_sp_send_recv_attached]: 8.30274e-07 [receive_attached]: 7.10133e-07 [after_resolve]: 9.90042e-06 [a_after_grad]: 1.19e-05 [special_op_eliminate]: 7.39982e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.30157e-07 [auto_monad_grad]: 1.23028e-06 [auto_monad_eliminator]: 1.746e-05 [cse]: 1.87997e-05 [a_3]: 4.81801e-05 [py_interpret_to_execute_after_opt_a]: 9.4003e-06 [slice_cell_reuse_recomputed_activation]: 2.23005e-06 [rewriter_after_opt_a]: 0.00013736 [convert_after_rewriter]: 8.90018e-06 [order_py_execute_after_rewriter]: 6.14999e-06 [opt_b]: 0.00025829, [1] [Cycle 1]: 0.00025275, [7] [b_1]: 0.00017837 [b_2]: 9.35979e-06 [updatestate_depend_eliminate]: 5.43986e-06 [updatestate_assign_eliminate]: 4.33018e-06 [updatestate_loads_eliminate]: 5.05988e-06 [renormalize]: 3.30154e-07 [cse]: 1.81696e-05 [optimize_parallel_all_gather_comm]: 8.27992e-06 [overlap_param_gather]: 1.24006e-06 [cconv]: 2.31201e-05 [loop_unroll]: 0.00049927 [opt_after_cconv]: 0.00013351, [1] [Cycle 1]: 0.00012703, [7] [c_1]: 5.29597e-05 [parameter_eliminate]: 2.73017e-06 [updatestate_depend_eliminate]: 8.48016e-06 [updatestate_assign_eliminate]: 4.74975e-06 [updatestate_loads_eliminate]: 5.47012e-06 [cse]: 2.066e-05 [renormalize]: 5.49946e-07 [remove_dup_value]: 1.24802e-05 [tuple_transform]: 6.96802e-05, [1] [Cycle 1]: 6.53099e-05, [2] [d_1]: 5.59203e-05 [renormalize]: 2.00234e-07 [partial_unused_args_eliminate]: 1.76998e-06 [add_cache_embedding]: 1.40001e-05 [add_recomputation]: 6.14203e-05 [cse_after_recomputation]: 2.53301e-05, [1] [Cycle 1]: 2.07298e-05, [1] [cse]: 1.59098e-05 [environ_conv]: 7.41007e-06 [swap_dp_allreduce_reducescatter]: 7.07014e-06 [bias_add_comm_swap]: 2.16998e-06 [label_micro_interleaved_index]: 1.84029e-06 [label_fine_grained_interleaved_index]: 2.08989e-06 [merge_cast_opt]: 1.2801e-06 [slice_recompute_activation]: 1.53994e-06 [micro_interleaved_order_control]: 1.64006e-06 [assign_add_opt]: 7.30995e-06 [ForceFp32Comm]: 7.79983e-07 [remove_cast_before_assign_add]: 9.89996e-07 [full_micro_interleaved_order_control]: 1.84961e-06 [reorder_send_recv_between_fp_bp]: 1.70013e-06 [comm_op_add_attrs]: 1.12969e-06 [add_comm_op_reuse_tag]: 1.01002e-06 [interleave_split_concat_branches]: 8.99658e-07 [interleave_parallel_branches]: 8.00006e-07 [overlap_opt_shard_in_pipeline]: 1.47987e-06 [overlap_opt_shard_grad_in_pipeline]: 1.98977e-06 [control_data_broadcast_order]: 8.60076e-07 [grouped_pairwise_exchange_alltoall]: 1.17021e-06 [offloading_packed_experts]: 1.36998e-06 [overlap_recompute_and_grad_model_parallel]: 1.83983e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.37975e-06 [overlap_recompute_allgather_and_fa_grad]: 1.11014e-06 [overlap_grad_ring_attention]: 1.7602e-06 [overlap_grad_flash_sp]: 1.42902e-05 [begin_end_overlap_inline]: 7.39936e-07 [split_matmul_comm_elemetwise]: 1.51992e-06 [split_layernorm_comm]: 1.72015e-06 [handle_group_info]: 1.24006e-06 [symbol_engine_optimizer]: 8.247e-05, [1] [Cycle 1]: 7.78497e-05, [6] [build]: 3.70014e-06 [elim_shapecalc]: 1.12103e-05 [elim_not_effective]: 1.546e-05 [opt_reshape]: 8.52998e-06 [fold_const_symbol]: 1.373e-05 [renormalize]: 2.79862e-07 [pipeline_parallel_scheduler]: 1.34995e-06 [auto_monad_reorder]: 2.961e-05 [get_jit_bprop_graph]: 4.00003e-07 [rewriter_after_jit_bprop_graph]: 4.09782e-07 [eliminate_special_op_node]: 0.00051794 [distribtued_split]: 4.04101e-05 [validate]: 3.616e-05 [task_emit]: 0.0703082 [execute]: 1.05398e-05 Sums bootstrap : 0.000307s : 0.40% type_inference : 0.002627s : 3.40% auto_monad : 0.000123s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000542s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000430s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000050s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000137s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000178s : 0.23% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000499s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000518s : 0.67% distribtued_split : 0.000040s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.070308s : 90.96% execute : 0.000011s : 0.01% TotalTime = 0.081644, [21] [bootstrap]: 0.00030698 [type_inference]: 0.00252064 [auto_monad]: 0.00012436 [graph_reusing]: 1.85985e-06 [inline]: 1.76998e-06 [parallel-infer-symbol]: 2.12993e-06 [pre_auto_parallel]: 2.51597e-05 [insert-virtual-dataset]: 3.03006e-06 [parallel-infer-symbol-second]: 4.49829e-07 [dataset_repeat_opt]: 1.2503e-06 [pipeline_split]: 1.59023e-06 [optimize]: 0.00739378, [52] [py_interpret_to_execute]: 1.38199e-05 [rewriter_before_opt_a]: 3.33698e-05 [opt_a]: 0.00567214, [2] [Cycle 1]: 0.00155247, [43] [expand_dump_flag]: 3.00026e-06 [switch_simplify]: 2.97199e-05 [loop_unroll]: 1.35098e-05 [a_1]: 0.00034108 [recompute_prepare]: 8.93977e-06 [updatestate_depend_eliminate]: 8.27014e-06 [updatestate_assign_eliminate]: 5.84032e-06 [updatestate_loads_eliminate]: 7.47992e-06 [parameter_eliminate]: 3.5502e-06 [a_2]: 0.0001171 [accelerated_algorithm]: 8.46013e-06 [shard]: 1.8999e-06 [meta_shard_fg_expand]: 4.56022e-06 [shard_inline]: 8.72975e-06 [auto_parallel]: 1.17901e-05 [parallel]: 6.9798e-06 [flash_sp]: 1.04997e-05 [merge_comm]: 7.58003e-06 [allreduce_fusion]: 5.0799e-06 [matmul_add_comm_reduction]: 1.05202e-05 [allreduce_slice_to_reducescatter]: 4.69852e-07 [virtual_shard_identity]: 9.70019e-06 [virtual_dataset]: 8.27992e-06 [get_grad_eliminate_]: 8.22963e-06 [virtual_output]: 7.81985e-06 [merge_forward]: 6.64033e-06 [cell_reuse_recompute_pass]: 1.93994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.71899e-05 [before_grad]: 1.413e-05 [inplace_validation]: 5.36023e-06 [meta_fg_expand]: 5.30994e-06 [inplace_validation_after_expand]: 6.42985e-06 [flash_sp_send_recv_attached]: 5.25033e-06 [receive_attached]: 3.13995e-06 [after_resolve]: 1.17896e-05 [a_after_grad]: 1.28504e-05 [special_op_eliminate]: 7.87992e-06 [renormalize]: 0.00043982 [add_forward_monad_depend]: 4.0899e-06 [auto_monad_grad]: 1.82027e-06 [auto_monad_eliminator]: 3.23299e-05 [cse]: 3.32901e-05 [a_3]: 6.07204e-05 [Cycle 2]: 0.0008048, [43] [expand_dump_flag]: 1.22003e-06 [switch_simplify]: 9.28994e-06 [loop_unroll]: 7.85012e-06 [a_1]: 0.00020313 [recompute_prepare]: 7.79983e-06 [updatestate_depend_eliminate]: 6.27013e-06 [updatestate_assign_eliminate]: 4.85964e-06 [updatestate_loads_eliminate]: 5.26989e-06 [parameter_eliminate]: 1.20979e-06 [a_2]: 0.00010727 [accelerated_algorithm]: 8.19983e-06 [shard]: 1.26008e-06 [meta_shard_fg_expand]: 2.51969e-06 [shard_inline]: 8.17003e-06 [auto_parallel]: 1.19596e-05 [parallel]: 3.65032e-06 [flash_sp]: 3.69968e-06 [merge_comm]: 6.23008e-06 [allreduce_fusion]: 5.13997e-06 [matmul_add_comm_reduction]: 8.12998e-06 [allreduce_slice_to_reducescatter]: 2.80328e-07 [virtual_shard_identity]: 9.41008e-06 [virtual_dataset]: 8.46013e-06 [get_grad_eliminate_]: 8.06991e-06 [virtual_output]: 7.39004e-06 [merge_forward]: 4.94998e-06 [cell_reuse_recompute_pass]: 1.95019e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59801e-05 [before_grad]: 1.23801e-05 [inplace_validation]: 4.33996e-06 [meta_fg_expand]: 5.03007e-06 [inplace_validation_after_expand]: 5.3402e-06 [flash_sp_send_recv_attached]: 1.00024e-06 [receive_attached]: 7.79983e-07 [after_resolve]: 1.02799e-05 [a_after_grad]: 1.20299e-05 [special_op_eliminate]: 7.60984e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 1.03004e-06 [auto_monad_grad]: 1.32993e-06 [auto_monad_eliminator]: 1.86497e-05 [cse]: 1.99201e-05 [a_3]: 5.01499e-05 [py_interpret_to_execute_after_opt_a]: 9.6499e-06 [slice_cell_reuse_recomputed_activation]: 2.43029e-06 [rewriter_after_opt_a]: 0.00015514 [convert_after_rewriter]: 9.21963e-06 [order_py_execute_after_rewriter]: 5.83008e-06 [opt_b]: 0.00024695, [1] [Cycle 1]: 0.00024065, [7] [b_1]: 0.00016309 [b_2]: 9.81987e-06 [updatestate_depend_eliminate]: 5.56e-06 [updatestate_assign_eliminate]: 4.72972e-06 [updatestate_loads_eliminate]: 5.32018e-06 [renormalize]: 2.89641e-07 [cse]: 1.91699e-05 [optimize_parallel_all_gather_comm]: 8.96025e-06 [overlap_param_gather]: 1.78022e-06 [cconv]: 2.38703e-05 [loop_unroll]: 0.00049276 [opt_after_cconv]: 0.00013746, [1] [Cycle 1]: 0.0001309, [7] [c_1]: 5.41699e-05 [parameter_eliminate]: 2.55974e-06 [updatestate_depend_eliminate]: 8.29995e-06 [updatestate_assign_eliminate]: 4.73997e-06 [updatestate_loads_eliminate]: 5.30016e-06 [cse]: 2.27499e-05 [renormalize]: 3.39933e-07 [remove_dup_value]: 1.36299e-05 [tuple_transform]: 7.14101e-05, [1] [Cycle 1]: 6.67102e-05, [2] [d_1]: 5.63501e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 1.88965e-06 [add_cache_embedding]: 1.42399e-05 [add_recomputation]: 6.17797e-05 [cse_after_recomputation]: 2.91099e-05, [1] [Cycle 1]: 2.384e-05, [1] [cse]: 1.824e-05 [environ_conv]: 7.57026e-06 [swap_dp_allreduce_reducescatter]: 6.87037e-06 [bias_add_comm_swap]: 2.82004e-06 [label_micro_interleaved_index]: 1.89012e-06 [label_fine_grained_interleaved_index]: 1.8701e-06 [merge_cast_opt]: 1.41002e-06 [slice_recompute_activation]: 2.1602e-06 [micro_interleaved_order_control]: 2.25008e-06 [assign_add_opt]: 6.65989e-06 [ForceFp32Comm]: 1.22003e-06 [remove_cast_before_assign_add]: 6.99889e-07 [full_micro_interleaved_order_control]: 1.83005e-06 [reorder_send_recv_between_fp_bp]: 2.37022e-06 [comm_op_add_attrs]: 1.02026e-06 [add_comm_op_reuse_tag]: 8.801e-07 [interleave_split_concat_branches]: 6.9011e-07 [interleave_parallel_branches]: 7.59959e-07 [overlap_opt_shard_in_pipeline]: 1.43005e-06 [overlap_opt_shard_grad_in_pipeline]: 2.25008e-06 [control_data_broadcast_order]: 9.30391e-07 [grouped_pairwise_exchange_alltoall]: 1.64984e-06 [offloading_packed_experts]: 9.59728e-07 [overlap_recompute_and_grad_model_parallel]: 2.25008e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.34017e-06 [overlap_recompute_allgather_and_fa_grad]: 8.29808e-07 [overlap_grad_ring_attention]: 2.33995e-06 [overlap_grad_flash_sp]: 1.43498e-05 [begin_end_overlap_inline]: 8.40053e-07 [split_matmul_comm_elemetwise]: 1.91014e-06 [split_layernorm_comm]: 2.12016e-06 [handle_group_info]: 7.69738e-07 [symbol_engine_optimizer]: 8.67499e-05, [1] [Cycle 1]: 8.19704e-05, [6] [build]: 3.37977e-06 [elim_shapecalc]: 1.22599e-05 [elim_not_effective]: 1.65198e-05 [opt_reshape]: 9.04966e-06 [fold_const_symbol]: 1.37696e-05 [renormalize]: 1.40164e-07 [pipeline_parallel_scheduler]: 1.5297e-06 [auto_monad_reorder]: 2.89199e-05 [get_jit_bprop_graph]: 4.20026e-07 [rewriter_after_jit_bprop_graph]: 4.99655e-07 [eliminate_special_op_node]: 0.0005167 [distribtued_split]: 4.20399e-05 [validate]: 3.595e-05 [task_emit]: 0.0703725 [execute]: 1.116e-05 Sums bootstrap : 0.000307s : 0.40% type_inference : 0.002521s : 3.26% auto_monad : 0.000124s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000544s : 0.70% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.01% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000440s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000111s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000155s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000493s : 0.64% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000517s : 0.67% distribtued_split : 0.000042s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.070373s : 91.04% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000129 63 4.90% : 0.000006s : 2: substitution.depend_value_elim 1.76% : 0.000002s : 5: substitution.elim_not_effective 2.22% : 0.000003s : 5: substitution.fold_const_symbol 5.50% : 0.000007s : 6: substitution.graph_param_transform 49.43% : 0.000064s : 1: substitution.inline 4.09% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.47% : 0.000004s : 6: substitution.load_eliminater 2.70% : 0.000003s : 2: substitution.reduce_all_const_elim 5.91% : 0.000008s : 10: substitution.remove_not_recompute_node 2.79% : 0.000004s : 2: substitution.replace_old_param 9.10% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.11% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002597 2 89.78% : 0.002332s : 1: type_inference.infer 10.22% : 0.000265s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000062 1 100.00% : 0.000062s : 1: match.inline ------[predicate.] 0.000229 1420 0.75% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.81% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.51% : 0.000006s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.52% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.44% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.18% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.76% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.71% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.21% : 0.000003s : 12: predicate.less_batch_normalization 1.87% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.19% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.92% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.69% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.22% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.79% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000002s : 13: predicate.reduce_eliminate 0.69% : 0.000002s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.39% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.24% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.93% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.54% : 0.000010s : 43: predicate.switch_simplify 0.73% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.69% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.83% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.50% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.47% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000151 4 11.00% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.00% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090367 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000135s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000331s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.59% : 0.000532s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000509s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001098s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.19% : 0.000168s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 6.02% : 0.005441s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.29% : 0.000261s : 1: opt_b 7.90% : 0.007139s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.25% : 0.000230s : 1: renormalize.infer 0.22% : 0.000195s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000143s : 1: rewriter_after_opt_a 0.05% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000086s : 1: symbol_engine_optimizer 77.84% : 0.070339s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.93% : 0.002645s : 1: type_inference 0.08% : 0.000071s : 1: validate Time group info: ------[substitution.] 0.000131 63 4.92% : 0.000006s : 2: substitution.depend_value_elim 2.09% : 0.000003s : 5: substitution.elim_not_effective 1.85% : 0.000002s : 5: substitution.fold_const_symbol 5.08% : 0.000007s : 6: substitution.graph_param_transform 49.35% : 0.000065s : 1: substitution.inline 3.97% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.22% : 0.000004s : 6: substitution.load_eliminater 2.75% : 0.000004s : 2: substitution.reduce_all_const_elim 6.16% : 0.000008s : 10: substitution.remove_not_recompute_node 2.75% : 0.000004s : 2: substitution.replace_old_param 9.09% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.76% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002491 2 89.19% : 0.002222s : 1: type_inference.infer 10.81% : 0.000269s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000228 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.15% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.95% : 0.000002s : 13: predicate.cast_eliminate 0.93% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.49% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.49% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 1.95% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.86% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.32% : 0.000003s : 14: predicate.float_depend_g_call 0.79% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 6: predicate.fold_const_symbol 0.84% : 0.000002s : 12: predicate.get_grad_eliminate 0.48% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.69% : 0.000013s : 63: predicate.inline 1.00% : 0.000002s : 12: predicate.inline_without_move 0.43% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.96% : 0.000002s : 12: predicate.less_batch_normalization 1.63% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.16% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.30% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.95% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.84% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.74% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.03% : 0.000002s : 13: predicate.reduce_eliminate 0.51% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.08% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.03% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.36% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.93% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.76% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.45% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.80% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.48% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000150 4 11.12% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 88.88% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090876 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000137s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000332s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.04% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000051s : 1: distribtued_split 0.58% : 0.000531s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000503s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001110s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.25% : 0.005676s : 1: opt_a 0.16% : 0.000142s : 1: opt_after_cconv 0.28% : 0.000250s : 1: opt_b 8.14% : 0.007402s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000238s : 1: renormalize.infer 0.22% : 0.000197s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000161s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000006s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 77.47% : 0.070404s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.79% : 0.002539s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.0830765, [21] [bootstrap]: 0.00032601 [type_inference]: 0.0026941 [auto_monad]: 0.00014501 [graph_reusing]: 2.99979e-06 [inline]: 1.53016e-06 [parallel-infer-symbol]: 2.54996e-06 [pre_auto_parallel]: 2.80398e-05 [insert-virtual-dataset]: 3.05008e-06 [parallel-infer-symbol-second]: 4.30271e-07 [dataset_repeat_opt]: 1.32993e-06 [pipeline_split]: 2.21981e-06 [optimize]: 0.007928, [52] [py_interpret_to_execute]: 1.69901e-05 [rewriter_before_opt_a]: 4.32003e-05 [opt_a]: 0.00606989, [2] [Cycle 1]: 0.00175901, [43] [expand_dump_flag]: 3.76999e-06 [switch_simplify]: 3.378e-05 [loop_unroll]: 1.62702e-05 [a_1]: 0.00041005 [recompute_prepare]: 1.06203e-05 [updatestate_depend_eliminate]: 1.00597e-05 [updatestate_assign_eliminate]: 6.78981e-06 [updatestate_loads_eliminate]: 8.48016e-06 [parameter_eliminate]: 3.73041e-06 [a_2]: 0.00014284 [accelerated_algorithm]: 1.04699e-05 [shard]: 2.00002e-06 [meta_shard_fg_expand]: 4.52995e-06 [shard_inline]: 1.089e-05 [auto_parallel]: 1.28997e-05 [parallel]: 8.68971e-06 [flash_sp]: 1.27801e-05 [merge_comm]: 9.37004e-06 [allreduce_fusion]: 6.25011e-06 [matmul_add_comm_reduction]: 1.23298e-05 [allreduce_slice_to_reducescatter]: 5.19678e-07 [virtual_shard_identity]: 1.17798e-05 [virtual_dataset]: 1.04299e-05 [get_grad_eliminate_]: 1.00899e-05 [virtual_output]: 8.95979e-06 [merge_forward]: 6.97002e-06 [cell_reuse_recompute_pass]: 2.00002e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.19699e-05 [before_grad]: 1.82102e-05 [inplace_validation]: 5.85988e-06 [meta_fg_expand]: 6.48992e-06 [inplace_validation_after_expand]: 8.01962e-06 [flash_sp_send_recv_attached]: 6.12997e-06 [receive_attached]: 2.56021e-06 [after_resolve]: 1.34101e-05 [a_after_grad]: 1.56104e-05 [special_op_eliminate]: 9.42964e-06 [renormalize]: 0.00048268 [add_forward_monad_depend]: 3.66988e-06 [auto_monad_grad]: 2.11969e-06 [auto_monad_eliminator]: 3.561e-05 [cse]: 3.68701e-05 [a_3]: 6.77202e-05 [Cycle 2]: 0.00094116, [43] [expand_dump_flag]: 1.16974e-06 [switch_simplify]: 1.13803e-05 [loop_unroll]: 9.85991e-06 [a_1]: 0.0002644 [recompute_prepare]: 9.80962e-06 [updatestate_depend_eliminate]: 6.97002e-06 [updatestate_assign_eliminate]: 5.11995e-06 [updatestate_loads_eliminate]: 5.97956e-06 [parameter_eliminate]: 1.80025e-06 [a_2]: 0.00012884 [accelerated_algorithm]: 1.02199e-05 [shard]: 1.32993e-06 [meta_shard_fg_expand]: 3.0701e-06 [shard_inline]: 1.02101e-05 [auto_parallel]: 1.18702e-05 [parallel]: 3.87011e-06 [flash_sp]: 3.50038e-06 [merge_comm]: 6.99982e-06 [allreduce_fusion]: 5.81006e-06 [matmul_add_comm_reduction]: 8.55001e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 1.05901e-05 [virtual_dataset]: 9.26014e-06 [get_grad_eliminate_]: 9.11998e-06 [virtual_output]: 8.71997e-06 [merge_forward]: 4.84986e-06 [cell_reuse_recompute_pass]: 2.25008e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.00598e-05 [before_grad]: 1.61896e-05 [inplace_validation]: 4.80004e-06 [meta_fg_expand]: 5.69969e-06 [inplace_validation_after_expand]: 6.00982e-06 [flash_sp_send_recv_attached]: 9.30391e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 1.19801e-05 [a_after_grad]: 1.48201e-05 [special_op_eliminate]: 9.23965e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 9.4017e-07 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 2.06502e-05 [cse]: 2.17399e-05 [a_3]: 5.88903e-05 [py_interpret_to_execute_after_opt_a]: 1.03097e-05 [slice_cell_reuse_recomputed_activation]: 2.65008e-06 [rewriter_after_opt_a]: 0.000152 [convert_after_rewriter]: 9.2499e-06 [order_py_execute_after_rewriter]: 7.03009e-06 [opt_b]: 0.00028702, [1] [Cycle 1]: 0.00028113, [7] [b_1]: 0.00019711 [b_2]: 1.26204e-05 [updatestate_depend_eliminate]: 5.96e-06 [updatestate_assign_eliminate]: 4.84008e-06 [updatestate_loads_eliminate]: 5.77001e-06 [renormalize]: 2.70084e-07 [cse]: 1.99699e-05 [optimize_parallel_all_gather_comm]: 9.33977e-06 [overlap_param_gather]: 1.32993e-06 [cconv]: 2.546e-05 [loop_unroll]: 0.00050006 [opt_after_cconv]: 0.0001506, [1] [Cycle 1]: 0.0001444, [7] [c_1]: 6.33099e-05 [parameter_eliminate]: 2.50991e-06 [updatestate_depend_eliminate]: 8.7996e-06 [updatestate_assign_eliminate]: 5.01983e-06 [updatestate_loads_eliminate]: 5.80028e-06 [cse]: 2.29799e-05 [renormalize]: 4.89876e-07 [remove_dup_value]: 1.571e-05 [tuple_transform]: 8.47802e-05, [1] [Cycle 1]: 7.97198e-05, [2] [d_1]: 6.90301e-05 [renormalize]: 2.90107e-07 [partial_unused_args_eliminate]: 2.41026e-06 [add_cache_embedding]: 1.51098e-05 [add_recomputation]: 6.98101e-05 [cse_after_recomputation]: 2.87499e-05, [1] [Cycle 1]: 2.41897e-05, [1] [cse]: 1.868e-05 [environ_conv]: 7.89994e-06 [swap_dp_allreduce_reducescatter]: 8.12998e-06 [bias_add_comm_swap]: 2.42982e-06 [label_micro_interleaved_index]: 2.39024e-06 [label_fine_grained_interleaved_index]: 2.14018e-06 [merge_cast_opt]: 1.38022e-06 [slice_recompute_activation]: 1.8701e-06 [micro_interleaved_order_control]: 2.35019e-06 [assign_add_opt]: 8.00984e-06 [ForceFp32Comm]: 9.89996e-07 [remove_cast_before_assign_add]: 1.05007e-06 [full_micro_interleaved_order_control]: 2.59979e-06 [reorder_send_recv_between_fp_bp]: 2.31992e-06 [comm_op_add_attrs]: 1.03004e-06 [add_comm_op_reuse_tag]: 1.15996e-06 [interleave_split_concat_branches]: 1.11992e-06 [interleave_parallel_branches]: 7.19912e-07 [overlap_opt_shard_in_pipeline]: 1.26986e-06 [overlap_opt_shard_grad_in_pipeline]: 2.53972e-06 [control_data_broadcast_order]: 1.13016e-06 [grouped_pairwise_exchange_alltoall]: 1.4999e-06 [offloading_packed_experts]: 9.69972e-07 [overlap_recompute_and_grad_model_parallel]: 2.16998e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.89996e-07 [overlap_recompute_allgather_and_fa_grad]: 1.4999e-06 [overlap_grad_ring_attention]: 2.00002e-06 [overlap_grad_flash_sp]: 1.769e-05 [begin_end_overlap_inline]: 8.49832e-07 [split_matmul_comm_elemetwise]: 2.44007e-06 [split_layernorm_comm]: 1.89012e-06 [handle_group_info]: 1.05985e-06 [symbol_engine_optimizer]: 9.90299e-05, [1] [Cycle 1]: 9.401e-05, [6] [build]: 4.59002e-06 [elim_shapecalc]: 1.38902e-05 [elim_not_effective]: 1.98898e-05 [opt_reshape]: 1.02399e-05 [fold_const_symbol]: 1.70004e-05 [renormalize]: 4.20026e-07 [pipeline_parallel_scheduler]: 1.57021e-06 [auto_monad_reorder]: 3.21302e-05 [get_jit_bprop_graph]: 5.69969e-07 [rewriter_after_jit_bprop_graph]: 4.60073e-07 [eliminate_special_op_node]: 0.00052141 [distribtued_split]: 4.608e-05 [validate]: 3.79896e-05 [task_emit]: 0.0710286 [execute]: 1.24504e-05 Sums bootstrap : 0.000326s : 0.41% type_inference : 0.002694s : 3.43% auto_monad : 0.000145s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000043s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000045s : 0.06% optimize.opt_a.loop_unroll : 0.000026s : 0.03% optimize.opt_a.a_1 : 0.000674s : 0.86% optimize.opt_a.recompute_prepare : 0.000020s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000006s : 0.01% optimize.opt_a.a_2 : 0.000272s : 0.35% optimize.opt_a.accelerated_algorithm : 0.000021s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000008s : 0.01% optimize.opt_a.shard_inline : 0.000021s : 0.03% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000021s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000020s : 0.03% optimize.opt_a.get_grad_eliminate_ : 0.000019s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000042s : 0.05% optimize.opt_a.before_grad : 0.000034s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000025s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.04% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000483s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000056s : 0.07% optimize.opt_a.cse : 0.000059s : 0.07% optimize.opt_a.a_3 : 0.000127s : 0.16% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000152s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000197s : 0.25% optimize.opt_b.b_2 : 0.000013s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000500s : 0.64% optimize.opt_after_cconv.c_1 : 0.000063s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000016s : 0.02% optimize.tuple_transform.d_1 : 0.000069s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000070s : 0.09% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000521s : 0.66% distribtued_split : 0.000046s : 0.06% validate : 0.000038s : 0.05% task_emit : 0.071029s : 90.32% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000154 63 4.87% : 0.000007s : 2: substitution.depend_value_elim 2.54% : 0.000004s : 5: substitution.elim_not_effective 1.98% : 0.000003s : 5: substitution.fold_const_symbol 5.66% : 0.000009s : 6: substitution.graph_param_transform 47.19% : 0.000072s : 1: substitution.inline 4.95% : 0.000008s : 10: substitution.j_node_and_user_rematch 3.49% : 0.000005s : 6: substitution.load_eliminater 2.80% : 0.000004s : 2: substitution.reduce_all_const_elim 6.94% : 0.000011s : 10: substitution.remove_not_recompute_node 2.51% : 0.000004s : 2: substitution.replace_old_param 8.42% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 8.64% : 0.000013s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002663 2 88.40% : 0.002355s : 1: type_inference.infer 11.60% : 0.000309s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000071 1 100.00% : 0.000071s : 1: match.inline ------[predicate.] 0.000271 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.27% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.14% : 0.000006s : 25: predicate.arithmetic_simplify 0.79% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.42% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.94% : 0.000003s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_depend_swap 2.01% : 0.000005s : 31: predicate.environ_get_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000004s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.39% : 0.000001s : 6: predicate.graph_param_transform 0.83% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 6.15% : 0.000017s : 63: predicate.inline 1.15% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.98% : 0.000003s : 12: predicate.less_batch_normalization 1.77% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000007s : 38: predicate.load_eliminater 1.33% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.72% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.67% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.51% : 0.000001s : 6: predicate.parallel_virtual_node 1.11% : 0.000003s : 14: predicate.partial_defer_inline 1.35% : 0.000004s : 19: predicate.partial_eliminate 0.86% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000003s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.80% : 0.000002s : 13: predicate.reshape_eliminate 0.75% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 0.97% : 0.000003s : 12: predicate.same_eliminate 0.52% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000003s : 12: predicate.shard_identity_eliminate 1.43% : 0.000004s : 18: predicate.special_op_eliminate 1.08% : 0.000003s : 12: predicate.specialize_transform 1.07% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.92% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000006s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.26% : 0.000012s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.76% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.67% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.35% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000002s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.46% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000170 4 10.49% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.51% : 0.000152s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.093186 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000075s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.17% : 0.000158s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000351s : 1: bootstrap 0.03% : 0.000030s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000055s : 1: distribtued_split 0.57% : 0.000535s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000010s : 1: graph_reusing 0.01% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000005s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000510s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000018s : 1: opt.transform.loop_unroll_optimizer 1.45% : 0.001354s : 80: opt.transform.opt_a 0.07% : 0.000062s : 1: opt.transform.opt_after_cconv 0.20% : 0.000186s : 27: opt.transform.opt_b 0.07% : 0.000067s : 1: opt.transform.opt_trans_graph 0.04% : 0.000038s : 3: opt.transform.special_op_eliminate 0.06% : 0.000057s : 4: opt.transform.symbol_engine_opt 6.52% : 0.006074s : 1: opt_a 0.17% : 0.000156s : 1: opt_after_cconv 0.31% : 0.000290s : 1: opt_b 8.52% : 0.007936s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.01% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000008s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.02% : 0.000015s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000020s : 1: remove_dup_value 0.28% : 0.000258s : 1: renormalize.infer 0.23% : 0.000218s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000158s : 1: rewriter_after_opt_a 0.05% : 0.000048s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000102s : 1: symbol_engine_optimizer 76.26% : 0.071061s : 1: task_emit 0.09% : 0.000088s : 1: tuple_transform 2.91% : 0.002712s : 1: type_inference 0.08% : 0.000075s : 1: validate TotalTime = 0.0904617, [21] [bootstrap]: 0.00032022 [type_inference]: 0.00246317 [auto_monad]: 0.00012488 [graph_reusing]: 2.73017e-06 [inline]: 1.98977e-06 [parallel-infer-symbol]: 2.63005e-06 [pre_auto_parallel]: 2.53301e-05 [insert-virtual-dataset]: 3.26009e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 1.12038e-06 [pipeline_split]: 1.4999e-06 [optimize]: 0.00704473, [52] [py_interpret_to_execute]: 1.53603e-05 [rewriter_before_opt_a]: 3.561e-05 [opt_a]: 0.00534338, [2] [Cycle 1]: 0.00150655, [43] [expand_dump_flag]: 2.92016e-06 [switch_simplify]: 2.84403e-05 [loop_unroll]: 1.30399e-05 [a_1]: 0.0003416 [recompute_prepare]: 9.02964e-06 [updatestate_depend_eliminate]: 8.2897e-06 [updatestate_assign_eliminate]: 5.93998e-06 [updatestate_loads_eliminate]: 6.80005e-06 [parameter_eliminate]: 3.48035e-06 [a_2]: 0.00011779 [accelerated_algorithm]: 8.19983e-06 [shard]: 2.02004e-06 [meta_shard_fg_expand]: 3.67966e-06 [shard_inline]: 8.31019e-06 [auto_parallel]: 1.16499e-05 [parallel]: 7.75e-06 [flash_sp]: 9.60985e-06 [merge_comm]: 8.11974e-06 [allreduce_fusion]: 5.11995e-06 [matmul_add_comm_reduction]: 1.05901e-05 [allreduce_slice_to_reducescatter]: 4.20026e-07 [virtual_shard_identity]: 8.93977e-06 [virtual_dataset]: 8.07969e-06 [get_grad_eliminate_]: 7.70018e-06 [virtual_output]: 7.22008e-06 [merge_forward]: 6.02007e-06 [cell_reuse_recompute_pass]: 1.73971e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.687e-05 [before_grad]: 1.32401e-05 [inplace_validation]: 5.09014e-06 [meta_fg_expand]: 5.25964e-06 [inplace_validation_after_expand]: 6.14021e-06 [flash_sp_send_recv_attached]: 4.29992e-06 [receive_attached]: 2.28966e-06 [after_resolve]: 1.11801e-05 [a_after_grad]: 1.213e-05 [special_op_eliminate]: 7.81007e-06 [renormalize]: 0.00043031 [add_forward_monad_depend]: 3.58e-06 [auto_monad_grad]: 1.8198e-06 [auto_monad_eliminator]: 2.88701e-05 [cse]: 3.077e-05 [a_3]: 5.74002e-05 [Cycle 2]: 0.00077859, [43] [expand_dump_flag]: 1.03004e-06 [switch_simplify]: 9.20007e-06 [loop_unroll]: 7.62986e-06 [a_1]: 0.00020216 [recompute_prepare]: 7.53999e-06 [updatestate_depend_eliminate]: 6.07967e-06 [updatestate_assign_eliminate]: 5.06965e-06 [updatestate_loads_eliminate]: 5.18002e-06 [parameter_eliminate]: 1.35042e-06 [a_2]: 0.00010496 [accelerated_algorithm]: 8.15e-06 [shard]: 1.22003e-06 [meta_shard_fg_expand]: 2.80002e-06 [shard_inline]: 7.62986e-06 [auto_parallel]: 1.116e-05 [parallel]: 3.69037e-06 [flash_sp]: 3.22005e-06 [merge_comm]: 6.04987e-06 [allreduce_fusion]: 5.17024e-06 [matmul_add_comm_reduction]: 8.11974e-06 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 8.52998e-06 [virtual_dataset]: 7.83987e-06 [get_grad_eliminate_]: 7.49016e-06 [virtual_output]: 7.13021e-06 [merge_forward]: 4.82984e-06 [cell_reuse_recompute_pass]: 1.90036e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52001e-05 [before_grad]: 1.21202e-05 [inplace_validation]: 4.35021e-06 [meta_fg_expand]: 4.69014e-06 [inplace_validation_after_expand]: 5.39981e-06 [flash_sp_send_recv_attached]: 1.03004e-06 [receive_attached]: 7.49715e-07 [after_resolve]: 9.43011e-06 [a_after_grad]: 1.171e-05 [special_op_eliminate]: 7.72998e-06 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 8.29808e-07 [auto_monad_grad]: 1.31968e-06 [auto_monad_eliminator]: 1.82102e-05 [cse]: 1.83699e-05 [a_3]: 4.91701e-05 [py_interpret_to_execute_after_opt_a]: 9.54978e-06 [slice_cell_reuse_recomputed_activation]: 2.10991e-06 [rewriter_after_opt_a]: 0.00013864 [convert_after_rewriter]: 8.75024e-06 [order_py_execute_after_rewriter]: 5.90039e-06 [opt_b]: 0.00025635, [1] [Cycle 1]: 0.0002504, [7] [b_1]: 0.00016399 [b_2]: 9.70019e-06 [updatestate_depend_eliminate]: 5.53997e-06 [updatestate_assign_eliminate]: 4.46988e-06 [updatestate_loads_eliminate]: 5.20982e-06 [renormalize]: 3.20375e-07 [cse]: 2.919e-05 [optimize_parallel_all_gather_comm]: 8.50996e-06 [overlap_param_gather]: 1.40024e-06 [cconv]: 2.18302e-05 [loop_unroll]: 0.00049671 [opt_after_cconv]: 0.00013416, [1] [Cycle 1]: 0.00012802, [7] [c_1]: 5.31599e-05 [parameter_eliminate]: 2.44984e-06 [updatestate_depend_eliminate]: 8.40006e-06 [updatestate_assign_eliminate]: 4.84008e-06 [updatestate_loads_eliminate]: 5.34998e-06 [cse]: 2.104e-05 [renormalize]: 4.20026e-07 [remove_dup_value]: 1.20499e-05 [tuple_transform]: 7.116e-05, [1] [Cycle 1]: 6.613e-05, [2] [d_1]: 5.646e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 1.94972e-06 [add_cache_embedding]: 1.39899e-05 [add_recomputation]: 6.07399e-05 [cse_after_recomputation]: 2.61301e-05, [1] [Cycle 1]: 2.15401e-05, [1] [cse]: 1.645e-05 [environ_conv]: 7.77002e-06 [swap_dp_allreduce_reducescatter]: 7.49016e-06 [bias_add_comm_swap]: 2.35019e-06 [label_micro_interleaved_index]: 1.8999e-06 [label_fine_grained_interleaved_index]: 2.44007e-06 [merge_cast_opt]: 1.40956e-06 [slice_recompute_activation]: 1.90968e-06 [micro_interleaved_order_control]: 1.70991e-06 [assign_add_opt]: 7.35e-06 [ForceFp32Comm]: 8.29808e-07 [remove_cast_before_assign_add]: 1.15996e-06 [full_micro_interleaved_order_control]: 2.30037e-06 [reorder_send_recv_between_fp_bp]: 1.98977e-06 [comm_op_add_attrs]: 8.40053e-07 [add_comm_op_reuse_tag]: 1.2801e-06 [interleave_split_concat_branches]: 7.89762e-07 [interleave_parallel_branches]: 6.3004e-07 [overlap_opt_shard_in_pipeline]: 1.41002e-06 [overlap_opt_shard_grad_in_pipeline]: 2.45031e-06 [control_data_broadcast_order]: 1.15018e-06 [grouped_pairwise_exchange_alltoall]: 1.35973e-06 [offloading_packed_experts]: 1.24983e-06 [overlap_recompute_and_grad_model_parallel]: 2.14996e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.29926e-07 [overlap_recompute_allgather_and_fa_grad]: 1.03004e-06 [overlap_grad_ring_attention]: 1.64006e-06 [overlap_grad_flash_sp]: 1.54004e-05 [begin_end_overlap_inline]: 8.50298e-07 [split_matmul_comm_elemetwise]: 2.08011e-06 [split_layernorm_comm]: 1.8999e-06 [handle_group_info]: 1.05985e-06 [symbol_engine_optimizer]: 8.39299e-05, [1] [Cycle 1]: 7.95298e-05, [6] [build]: 4.06988e-06 [elim_shapecalc]: 1.15102e-05 [elim_not_effective]: 1.63498e-05 [opt_reshape]: 8.68039e-06 [fold_const_symbol]: 1.35596e-05 [renormalize]: 1.8999e-07 [pipeline_parallel_scheduler]: 1.38022e-06 [auto_monad_reorder]: 2.96002e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00050776 [distribtued_split]: 4.23896e-05 [validate]: 3.67896e-05 [task_emit]: 0.0795713 [execute]: 1.27801e-05 Sums bootstrap : 0.000320s : 0.37% type_inference : 0.002463s : 2.85% auto_monad : 0.000125s : 0.14% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000038s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.02% optimize.opt_a.a_1 : 0.000544s : 0.63% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.01% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.26% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.01% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.02% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000430s : 0.50% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000047s : 0.05% optimize.opt_a.cse : 0.000049s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.12% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000139s : 0.16% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.19% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000029s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000497s : 0.57% optimize.opt_after_cconv.c_1 : 0.000053s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000508s : 0.59% distribtued_split : 0.000042s : 0.05% validate : 0.000037s : 0.04% task_emit : 0.079571s : 92.10% execute : 0.000013s : 0.01% Time group info: ------[substitution.] 0.000130 63 5.10% : 0.000007s : 2: substitution.depend_value_elim 1.86% : 0.000002s : 5: substitution.elim_not_effective 1.98% : 0.000003s : 5: substitution.fold_const_symbol 5.58% : 0.000007s : 6: substitution.graph_param_transform 50.53% : 0.000066s : 1: substitution.inline 4.14% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.27% : 0.000004s : 6: substitution.load_eliminater 2.37% : 0.000003s : 2: substitution.reduce_all_const_elim 5.96% : 0.000008s : 10: substitution.remove_not_recompute_node 2.80% : 0.000004s : 2: substitution.replace_old_param 8.67% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.74% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002434 2 89.09% : 0.002168s : 1: type_inference.infer 10.91% : 0.000266s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000227 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.01% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.23% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.27% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.38% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 1.98% : 0.000004s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.44% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.78% : 0.000002s : 12: predicate.get_grad_eliminate 0.37% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.59% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.43% : 0.000006s : 38: predicate.load_eliminater 1.34% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.82% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.20% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.89% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.18% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.33% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 0.98% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.31% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.54% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.73% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.59% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.55% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.89% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000145 4 10.84% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.16% : 0.000129s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.099304 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.14% : 0.000137s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000346s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.53% : 0.000522s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000023s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.51% : 0.000507s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.10% : 0.001095s : 80: opt.transform.opt_a 0.05% : 0.000051s : 1: opt.transform.opt_after_cconv 0.16% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 5.38% : 0.005347s : 1: opt_a 0.14% : 0.000138s : 1: opt_after_cconv 0.26% : 0.000259s : 1: opt_b 7.10% : 0.007053s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.23% : 0.000233s : 1: renormalize.infer 0.19% : 0.000192s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000144s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000087s : 1: symbol_engine_optimizer 80.17% : 0.079609s : 1: task_emit 0.07% : 0.000074s : 1: tuple_transform 2.50% : 0.002481s : 1: type_inference 0.07% : 0.000073s : 1: validate TotalTime = 0.0772893, [21] [bootstrap]: 0.00028123 [type_inference]: 0.002218 [auto_monad]: 9.816e-05 [graph_reusing]: 1.26986e-06 [inline]: 9.89996e-07 [parallel-infer-symbol]: 1.44029e-06 [pre_auto_parallel]: 2.03699e-05 [insert-virtual-dataset]: 1.62981e-06 [parallel-infer-symbol-second]: 3.50177e-07 [dataset_repeat_opt]: 9.79751e-07 [pipeline_split]: 1.34995e-06 [optimize]: 0.00678407, [52] [py_interpret_to_execute]: 1.25403e-05 [rewriter_before_opt_a]: 2.91001e-05 [opt_a]: 0.00516086, [2] [Cycle 1]: 0.00141185, [43] [expand_dump_flag]: 2.34973e-06 [switch_simplify]: 2.62302e-05 [loop_unroll]: 1.33598e-05 [a_1]: 0.00032347 [recompute_prepare]: 8.59005e-06 [updatestate_depend_eliminate]: 7.22008e-06 [updatestate_assign_eliminate]: 5.4501e-06 [updatestate_loads_eliminate]: 6.00982e-06 [parameter_eliminate]: 2.21003e-06 [a_2]: 0.00011477 [accelerated_algorithm]: 8.37957e-06 [shard]: 1.60979e-06 [meta_shard_fg_expand]: 3.49991e-06 [shard_inline]: 8.62963e-06 [auto_parallel]: 1.076e-05 [parallel]: 5.9898e-06 [flash_sp]: 7.58003e-06 [merge_comm]: 6.9798e-06 [allreduce_fusion]: 5.34998e-06 [matmul_add_comm_reduction]: 9.22009e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 9.95025e-06 [virtual_dataset]: 8.19005e-06 [get_grad_eliminate_]: 7.92975e-06 [virtual_output]: 7.50972e-06 [merge_forward]: 4.67012e-06 [cell_reuse_recompute_pass]: 1.51014e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59303e-05 [before_grad]: 1.34599e-05 [inplace_validation]: 4.29992e-06 [meta_fg_expand]: 5.11017e-06 [inplace_validation_after_expand]: 5.43008e-06 [flash_sp_send_recv_attached]: 3.43006e-06 [receive_attached]: 2.10013e-06 [after_resolve]: 1.06199e-05 [a_after_grad]: 1.26101e-05 [special_op_eliminate]: 7.75e-06 [renormalize]: 0.00039029 [add_forward_monad_depend]: 2.31992e-06 [auto_monad_grad]: 1.49012e-06 [auto_monad_eliminator]: 2.35997e-05 [cse]: 2.63299e-05 [a_3]: 5.69304e-05 [Cycle 2]: 0.00078003, [43] [expand_dump_flag]: 9.29926e-07 [switch_simplify]: 8.86014e-06 [loop_unroll]: 7.53999e-06 [a_1]: 0.00020197 [recompute_prepare]: 7.24988e-06 [updatestate_depend_eliminate]: 5.62007e-06 [updatestate_assign_eliminate]: 4.80004e-06 [updatestate_loads_eliminate]: 4.97e-06 [parameter_eliminate]: 1.01002e-06 [a_2]: 0.00010446 [accelerated_algorithm]: 8.85967e-06 [shard]: 1.07987e-06 [meta_shard_fg_expand]: 2.42982e-06 [shard_inline]: 8.12998e-06 [auto_parallel]: 1.03898e-05 [parallel]: 3.30014e-06 [flash_sp]: 2.38977e-06 [merge_comm]: 5.95022e-06 [allreduce_fusion]: 4.81028e-06 [matmul_add_comm_reduction]: 7.56001e-06 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 1.00601e-05 [virtual_dataset]: 7.79005e-06 [get_grad_eliminate_]: 7.65966e-06 [virtual_output]: 7.07991e-06 [merge_forward]: 4.62029e-06 [cell_reuse_recompute_pass]: 1.75042e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59498e-05 [before_grad]: 1.30897e-05 [inplace_validation]: 4.59002e-06 [meta_fg_expand]: 4.65009e-06 [inplace_validation_after_expand]: 5.09014e-06 [flash_sp_send_recv_attached]: 7.59959e-07 [receive_attached]: 6.39819e-07 [after_resolve]: 9.51998e-06 [a_after_grad]: 1.24802e-05 [special_op_eliminate]: 7.53021e-06 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 9.30391e-07 [auto_monad_grad]: 9.39704e-07 [auto_monad_eliminator]: 1.62302e-05 [cse]: 1.88104e-05 [a_3]: 4.84297e-05 [py_interpret_to_execute_after_opt_a]: 8.15e-06 [slice_cell_reuse_recomputed_activation]: 1.26986e-06 [rewriter_after_opt_a]: 0.00012601 [convert_after_rewriter]: 7.86968e-06 [order_py_execute_after_rewriter]: 5.1898e-06 [opt_b]: 0.00023859, [1] [Cycle 1]: 0.00023362, [7] [b_1]: 0.00015996 [b_2]: 9.81009e-06 [updatestate_depend_eliminate]: 5.4799e-06 [updatestate_assign_eliminate]: 4.70039e-06 [updatestate_loads_eliminate]: 4.97e-06 [renormalize]: 4.09782e-07 [cse]: 1.80202e-05 [optimize_parallel_all_gather_comm]: 7.88039e-06 [overlap_param_gather]: 9.10368e-07 [cconv]: 1.56802e-05 [loop_unroll]: 0.00047545 [opt_after_cconv]: 0.0001809, [1] [Cycle 1]: 0.00017492, [7] [c_1]: 5.06397e-05 [parameter_eliminate]: 2.19001e-06 [updatestate_depend_eliminate]: 7.29039e-06 [updatestate_assign_eliminate]: 4.73019e-06 [updatestate_loads_eliminate]: 5.39003e-06 [cse]: 2.05399e-05 [renormalize]: 3.59956e-07 [remove_dup_value]: 9.98983e-06 [tuple_transform]: 6.85598e-05, [1] [Cycle 1]: 6.41299e-05, [2] [d_1]: 5.49904e-05 [renormalize]: 1.59722e-07 [partial_unused_args_eliminate]: 1.62004e-06 [add_cache_embedding]: 1.15801e-05 [add_recomputation]: 5.07403e-05 [cse_after_recomputation]: 2.55802e-05, [1] [Cycle 1]: 2.12002e-05, [1] [cse]: 1.632e-05 [environ_conv]: 6.7004e-06 [swap_dp_allreduce_reducescatter]: 7.56979e-06 [bias_add_comm_swap]: 1.76998e-06 [label_micro_interleaved_index]: 1.23028e-06 [label_fine_grained_interleaved_index]: 1.47009e-06 [merge_cast_opt]: 7.29691e-07 [slice_recompute_activation]: 8.801e-07 [micro_interleaved_order_control]: 1.13994e-06 [assign_add_opt]: 6.44987e-06 [ForceFp32Comm]: 4.60073e-07 [remove_cast_before_assign_add]: 4.4005e-07 [full_micro_interleaved_order_control]: 1.08033e-06 [reorder_send_recv_between_fp_bp]: 8.2003e-07 [comm_op_add_attrs]: 4.69852e-07 [add_comm_op_reuse_tag]: 4.30271e-07 [interleave_split_concat_branches]: 4.09782e-07 [interleave_parallel_branches]: 4.80097e-07 [overlap_opt_shard_in_pipeline]: 1.13994e-06 [overlap_opt_shard_grad_in_pipeline]: 1.53994e-06 [control_data_broadcast_order]: 9.60194e-07 [grouped_pairwise_exchange_alltoall]: 6.9011e-07 [offloading_packed_experts]: 4.89876e-07 [overlap_recompute_and_grad_model_parallel]: 7.89762e-07 [overlap_grad_matmul_and_grad_allreduce]: 4.50294e-07 [overlap_recompute_allgather_and_fa_grad]: 4.00003e-07 [overlap_grad_ring_attention]: 1.13016e-06 [overlap_grad_flash_sp]: 1.10501e-05 [begin_end_overlap_inline]: 4.29805e-07 [split_matmul_comm_elemetwise]: 1.22003e-06 [split_layernorm_comm]: 1.2503e-06 [handle_group_info]: 6.00237e-07 [symbol_engine_optimizer]: 8.06302e-05, [1] [Cycle 1]: 7.66502e-05, [6] [build]: 3.20002e-06 [elim_shapecalc]: 1.154e-05 [elim_not_effective]: 1.48299e-05 [opt_reshape]: 8.7698e-06 [fold_const_symbol]: 1.32001e-05 [renormalize]: 1.8999e-07 [pipeline_parallel_scheduler]: 8.99658e-07 [auto_monad_reorder]: 2.04197e-05 [get_jit_bprop_graph]: 2.60305e-07 [rewriter_after_jit_bprop_graph]: 2.40281e-07 [eliminate_special_op_node]: 0.00049006 [distribtued_split]: 3.09302e-05 [validate]: 2.88198e-05 [task_emit]: 0.0670763 [execute]: 9.13022e-06 Sums bootstrap : 0.000281s : 0.38% type_inference : 0.002218s : 3.03% auto_monad : 0.000098s : 0.13% graph_reusing : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000029s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000525s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000219s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000390s : 0.53% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000126s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000475s : 0.65% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000051s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000000s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000000s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000000s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000020s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000490s : 0.67% distribtued_split : 0.000031s : 0.04% validate : 0.000029s : 0.04% task_emit : 0.067076s : 91.48% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000110 63 4.80% : 0.000005s : 2: substitution.depend_value_elim 1.77% : 0.000002s : 5: substitution.elim_not_effective 1.77% : 0.000002s : 5: substitution.fold_const_symbol 5.99% : 0.000007s : 6: substitution.graph_param_transform 48.67% : 0.000054s : 1: substitution.inline 4.73% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.75% : 0.000004s : 6: substitution.load_eliminater 2.89% : 0.000003s : 2: substitution.reduce_all_const_elim 6.63% : 0.000007s : 10: substitution.remove_not_recompute_node 2.12% : 0.000002s : 2: substitution.replace_old_param 9.07% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.81% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002194 2 90.25% : 0.001980s : 1: type_inference.infer 9.75% : 0.000214s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000227 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.01% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.36% : 0.000005s : 25: predicate.arithmetic_simplify 0.79% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.16% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.87% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.65% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.95% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.87% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000000s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.54% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000005s : 38: predicate.load_eliminater 1.18% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.32% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.85% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.09% : 0.000002s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.91% : 0.000002s : 12: predicate.reduce_all_const_elim 0.98% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.30% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.07% : 0.000002s : 12: predicate.shard_identity_eliminate 1.47% : 0.000003s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 1.02% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.03% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.47% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.24% : 0.000010s : 43: predicate.switch_simplify 0.84% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.46% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.81% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.74% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.72% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.50% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.52% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.60% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000123 4 8.67% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.33% : 0.000112s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.085818 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000055s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000109s : 1: auto_monad 0.03% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000304s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000038s : 1: distribtued_split 0.59% : 0.000503s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000484s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001077s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.18% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 6.02% : 0.005165s : 1: opt_a 0.21% : 0.000184s : 1: opt_after_cconv 0.28% : 0.000242s : 1: opt_b 7.91% : 0.006791s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000008s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000010s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000208s : 1: renormalize.infer 0.21% : 0.000178s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000132s : 1: rewriter_after_opt_a 0.04% : 0.000033s : 1: rewriter_before_opt_a 0.01% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000084s : 1: symbol_engine_optimizer 78.19% : 0.067100s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.60% : 0.002234s : 1: type_inference 0.07% : 0.000059s : 1: validate TotalTime = 0.0786626, [21] [bootstrap]: 0.00030072 [type_inference]: 0.00241893 [auto_monad]: 0.00012767 [graph_reusing]: 2.10991e-06 [inline]: 1.17999e-06 [parallel-infer-symbol]: 2.02004e-06 [pre_auto_parallel]: 2.53501e-05 [insert-virtual-dataset]: 1.95997e-06 [parallel-infer-symbol-second]: 4.09782e-07 [dataset_repeat_opt]: 5.80214e-07 [pipeline_split]: 1.2801e-06 [optimize]: 0.00698824, [52] [py_interpret_to_execute]: 1.49799e-05 [rewriter_before_opt_a]: 3.37902e-05 [opt_a]: 0.0053161, [2] [Cycle 1]: 0.00148095, [43] [expand_dump_flag]: 2.21003e-06 [switch_simplify]: 2.49599e-05 [loop_unroll]: 1.33896e-05 [a_1]: 0.00032712 [recompute_prepare]: 9.14978e-06 [updatestate_depend_eliminate]: 7.93021e-06 [updatestate_assign_eliminate]: 5.54975e-06 [updatestate_loads_eliminate]: 7.2103e-06 [parameter_eliminate]: 3.53996e-06 [a_2]: 0.00011575 [accelerated_algorithm]: 8.21007e-06 [shard]: 1.94972e-06 [meta_shard_fg_expand]: 3.43984e-06 [shard_inline]: 8.23988e-06 [auto_parallel]: 1.169e-05 [parallel]: 6.6096e-06 [flash_sp]: 1.051e-05 [merge_comm]: 6.99982e-06 [allreduce_fusion]: 5.02029e-06 [matmul_add_comm_reduction]: 1.03298e-05 [allreduce_slice_to_reducescatter]: 4.80097e-07 [virtual_shard_identity]: 8.94023e-06 [virtual_dataset]: 7.8897e-06 [get_grad_eliminate_]: 7.81985e-06 [virtual_output]: 7.41985e-06 [merge_forward]: 6.13974e-06 [cell_reuse_recompute_pass]: 1.63028e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.67103e-05 [before_grad]: 1.329e-05 [inplace_validation]: 4.88991e-06 [meta_fg_expand]: 5.20004e-06 [inplace_validation_after_expand]: 5.98002e-06 [flash_sp_send_recv_attached]: 4.29014e-06 [receive_attached]: 3.07988e-06 [after_resolve]: 1.12602e-05 [a_after_grad]: 1.272e-05 [special_op_eliminate]: 7.62986e-06 [renormalize]: 0.00042498 [add_forward_monad_depend]: 4.1998e-06 [auto_monad_grad]: 1.56974e-06 [auto_monad_eliminator]: 3.121e-05 [cse]: 3.31099e-05 [a_3]: 5.99199e-05 [Cycle 2]: 0.00076898, [43] [expand_dump_flag]: 1.13016e-06 [switch_simplify]: 9.60007e-06 [loop_unroll]: 7.70995e-06 [a_1]: 0.00020008 [recompute_prepare]: 7.39004e-06 [updatestate_depend_eliminate]: 5.67967e-06 [updatestate_assign_eliminate]: 4.79026e-06 [updatestate_loads_eliminate]: 5.27967e-06 [parameter_eliminate]: 1.18976e-06 [a_2]: 0.00010438 [accelerated_algorithm]: 8.42987e-06 [shard]: 1.19023e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 7.8897e-06 [auto_parallel]: 1.15898e-05 [parallel]: 3.41982e-06 [flash_sp]: 2.28966e-06 [merge_comm]: 5.84032e-06 [allreduce_fusion]: 4.96022e-06 [matmul_add_comm_reduction]: 7.73976e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 8.55001e-06 [virtual_dataset]: 7.41007e-06 [get_grad_eliminate_]: 7.07991e-06 [virtual_output]: 6.98958e-06 [merge_forward]: 4.44008e-06 [cell_reuse_recompute_pass]: 2.04006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49496e-05 [before_grad]: 1.249e-05 [inplace_validation]: 4.14019e-06 [meta_fg_expand]: 4.90993e-06 [inplace_validation_after_expand]: 4.93973e-06 [flash_sp_send_recv_attached]: 9.00123e-07 [receive_attached]: 6.9011e-07 [after_resolve]: 9.58005e-06 [a_after_grad]: 1.15903e-05 [special_op_eliminate]: 6.94022e-06 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 8.29808e-07 [auto_monad_grad]: 1.02026e-06 [auto_monad_eliminator]: 1.81398e-05 [cse]: 1.97296e-05 [a_3]: 4.79799e-05 [py_interpret_to_execute_after_opt_a]: 9.05991e-06 [slice_cell_reuse_recomputed_activation]: 1.32015e-06 [rewriter_after_opt_a]: 0.00014591 [convert_after_rewriter]: 8.04011e-06 [order_py_execute_after_rewriter]: 5.26989e-06 [opt_b]: 0.00028752, [1] [Cycle 1]: 0.00028136, [7] [b_1]: 0.0001602 [b_2]: 1.02599e-05 [updatestate_depend_eliminate]: 5.22984e-06 [updatestate_assign_eliminate]: 4.23007e-06 [updatestate_loads_eliminate]: 5.12041e-06 [renormalize]: 2.90107e-07 [cse]: 1.82604e-05 [optimize_parallel_all_gather_comm]: 8.19005e-06 [overlap_param_gather]: 6.60308e-07 [cconv]: 1.49799e-05 [loop_unroll]: 0.0004697 [opt_after_cconv]: 0.00013283, [1] [Cycle 1]: 0.00012678, [7] [c_1]: 5.35501e-05 [parameter_eliminate]: 2.37022e-06 [updatestate_depend_eliminate]: 7.66991e-06 [updatestate_assign_eliminate]: 4.33996e-06 [updatestate_loads_eliminate]: 5.17955e-06 [cse]: 2.19001e-05 [renormalize]: 3.70201e-07 [remove_dup_value]: 9.35001e-06 [tuple_transform]: 6.69998e-05, [1] [Cycle 1]: 6.23204e-05, [2] [d_1]: 5.39902e-05 [renormalize]: 1.20141e-07 [partial_unused_args_eliminate]: 1.28988e-06 [add_cache_embedding]: 1.093e-05 [add_recomputation]: 5.12898e-05 [cse_after_recomputation]: 2.69003e-05, [1] [Cycle 1]: 2.21301e-05, [1] [cse]: 1.69901e-05 [environ_conv]: 6.43032e-06 [swap_dp_allreduce_reducescatter]: 6.71018e-06 [bias_add_comm_swap]: 1.51014e-06 [label_micro_interleaved_index]: 1.4198e-06 [label_fine_grained_interleaved_index]: 1.98977e-06 [merge_cast_opt]: 7.79983e-07 [slice_recompute_activation]: 1.74996e-06 [micro_interleaved_order_control]: 1.57021e-06 [assign_add_opt]: 7.30995e-06 [ForceFp32Comm]: 8.40053e-07 [remove_cast_before_assign_add]: 9.90462e-07 [full_micro_interleaved_order_control]: 2.43029e-06 [reorder_send_recv_between_fp_bp]: 1.83983e-06 [comm_op_add_attrs]: 9.4017e-07 [add_comm_op_reuse_tag]: 1.16974e-06 [interleave_split_concat_branches]: 7.49715e-07 [interleave_parallel_branches]: 4.60073e-07 [overlap_opt_shard_in_pipeline]: 1.28988e-06 [overlap_opt_shard_grad_in_pipeline]: 1.95019e-06 [control_data_broadcast_order]: 8.69855e-07 [grouped_pairwise_exchange_alltoall]: 1.08965e-06 [offloading_packed_experts]: 1.55997e-06 [overlap_recompute_and_grad_model_parallel]: 2.05031e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.50298e-07 [overlap_recompute_allgather_and_fa_grad]: 9.09902e-07 [overlap_grad_ring_attention]: 1.60001e-06 [overlap_grad_flash_sp]: 1.48802e-05 [begin_end_overlap_inline]: 6.70087e-07 [split_matmul_comm_elemetwise]: 1.70991e-06 [split_layernorm_comm]: 1.83983e-06 [handle_group_info]: 9.20147e-07 [symbol_engine_optimizer]: 8.81599e-05, [1] [Cycle 1]: 8.33902e-05, [6] [build]: 4.28967e-06 [elim_shapecalc]: 1.66399e-05 [elim_not_effective]: 1.58101e-05 [opt_reshape]: 8.38004e-06 [fold_const_symbol]: 1.26702e-05 [renormalize]: 1.79745e-07 [pipeline_parallel_scheduler]: 7.79983e-07 [auto_monad_reorder]: 2.47401e-05 [get_jit_bprop_graph]: 2.79862e-07 [rewriter_after_jit_bprop_graph]: 2.59839e-07 [eliminate_special_op_node]: 0.00052614 [distribtued_split]: 3.26401e-05 [validate]: 3.00901e-05 [task_emit]: 0.0679254 [execute]: 1.07102e-05 Sums bootstrap : 0.000301s : 0.40% type_inference : 0.002419s : 3.24% auto_monad : 0.000128s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000527s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.01% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000425s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.07% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.20% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000470s : 0.63% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000051s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000017s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000025s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000526s : 0.71% distribtued_split : 0.000033s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.067925s : 91.07% execute : 0.000011s : 0.01% TotalTime = 0.0787949, [21] [bootstrap]: 0.00030029 [type_inference]: 0.00241869 [auto_monad]: 0.00012853 [graph_reusing]: 2.37999e-06 [inline]: 1.13994e-06 [parallel-infer-symbol]: 2.59001e-06 [pre_auto_parallel]: 2.53301e-05 [insert-virtual-dataset]: 2.44007e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 1.26986e-06 [pipeline_split]: 1.51992e-06 [optimize]: 0.0069825, [52] [py_interpret_to_execute]: 1.46301e-05 [rewriter_before_opt_a]: 3.38596e-05 [opt_a]: 0.00531269, [2] [Cycle 1]: 0.00150157, [43] [expand_dump_flag]: 3.55998e-06 [switch_simplify]: 2.95802e-05 [loop_unroll]: 1.30399e-05 [a_1]: 0.00034209 [recompute_prepare]: 8.95979e-06 [updatestate_depend_eliminate]: 8.7698e-06 [updatestate_assign_eliminate]: 6.04987e-06 [updatestate_loads_eliminate]: 7.14976e-06 [parameter_eliminate]: 3.11993e-06 [a_2]: 0.00011601 [accelerated_algorithm]: 8.57003e-06 [shard]: 1.48965e-06 [meta_shard_fg_expand]: 2.93972e-06 [shard_inline]: 8.37026e-06 [auto_parallel]: 1.20099e-05 [parallel]: 5.47012e-06 [flash_sp]: 1.06497e-05 [merge_comm]: 7.37002e-06 [allreduce_fusion]: 5.81006e-06 [matmul_add_comm_reduction]: 9.30997e-06 [allreduce_slice_to_reducescatter]: 5.09899e-07 [virtual_shard_identity]: 9.22987e-06 [virtual_dataset]: 8.33999e-06 [get_grad_eliminate_]: 7.79983e-06 [virtual_output]: 7.62008e-06 [merge_forward]: 4.99003e-06 [cell_reuse_recompute_pass]: 1.55997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.62702e-05 [before_grad]: 1.38101e-05 [inplace_validation]: 4.48013e-06 [meta_fg_expand]: 4.97e-06 [inplace_validation_after_expand]: 5.8501e-06 [flash_sp_send_recv_attached]: 4.09968e-06 [receive_attached]: 2.97977e-06 [after_resolve]: 1.15e-05 [a_after_grad]: 1.25901e-05 [special_op_eliminate]: 7.81007e-06 [renormalize]: 0.00042542 [add_forward_monad_depend]: 3.9502e-06 [auto_monad_grad]: 1.71037e-06 [auto_monad_eliminator]: 3.12799e-05 [cse]: 3.277e-05 [a_3]: 5.85802e-05 [Cycle 2]: 0.00076666, [43] [expand_dump_flag]: 1.01002e-06 [switch_simplify]: 8.92999e-06 [loop_unroll]: 7.58981e-06 [a_1]: 0.00019896 [recompute_prepare]: 7.34022e-06 [updatestate_depend_eliminate]: 6.02007e-06 [updatestate_assign_eliminate]: 4.82984e-06 [updatestate_loads_eliminate]: 5.03985e-06 [parameter_eliminate]: 1.01002e-06 [a_2]: 0.00010257 [accelerated_algorithm]: 7.78027e-06 [shard]: 1.49012e-06 [meta_shard_fg_expand]: 2.70037e-06 [shard_inline]: 7.3798e-06 [auto_parallel]: 1.057e-05 [parallel]: 3.47011e-06 [flash_sp]: 3.51993e-06 [merge_comm]: 6.12019e-06 [allreduce_fusion]: 5.53997e-06 [matmul_add_comm_reduction]: 8.24034e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 8.6301e-06 [virtual_dataset]: 7.50972e-06 [get_grad_eliminate_]: 7.45989e-06 [virtual_output]: 7.13021e-06 [merge_forward]: 4.71016e-06 [cell_reuse_recompute_pass]: 1.60001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.54399e-05 [before_grad]: 1.25701e-05 [inplace_validation]: 4.25987e-06 [meta_fg_expand]: 4.69992e-06 [inplace_validation_after_expand]: 5.15021e-06 [flash_sp_send_recv_attached]: 8.49832e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 1.00597e-05 [a_after_grad]: 1.18003e-05 [special_op_eliminate]: 7.28993e-06 [renormalize]: 1.00117e-07 [add_forward_monad_depend]: 8.89879e-07 [auto_monad_grad]: 1.05985e-06 [auto_monad_eliminator]: 1.792e-05 [cse]: 1.95797e-05 [a_3]: 4.67701e-05 [py_interpret_to_execute_after_opt_a]: 8.72975e-06 [slice_cell_reuse_recomputed_activation]: 1.81003e-06 [rewriter_after_opt_a]: 0.00014858 [convert_after_rewriter]: 8.52998e-06 [order_py_execute_after_rewriter]: 6.05965e-06 [opt_b]: 0.00027438, [1] [Cycle 1]: 0.00026829, [7] [b_1]: 0.00015986 [b_2]: 9.48971e-06 [updatestate_depend_eliminate]: 5.3402e-06 [updatestate_assign_eliminate]: 4.47966e-06 [updatestate_loads_eliminate]: 5.22006e-06 [renormalize]: 2.10013e-07 [cse]: 1.89501e-05 [optimize_parallel_all_gather_comm]: 8.59005e-06 [overlap_param_gather]: 1.29966e-06 [cconv]: 2.25198e-05 [loop_unroll]: 0.00045828 [opt_after_cconv]: 0.00013184, [1] [Cycle 1]: 0.00012601, [7] [c_1]: 5.25699e-05 [parameter_eliminate]: 2.23005e-06 [updatestate_depend_eliminate]: 8.04989e-06 [updatestate_assign_eliminate]: 4.60958e-06 [updatestate_loads_eliminate]: 5.09992e-06 [cse]: 2.068e-05 [renormalize]: 3.50177e-07 [remove_dup_value]: 1.31298e-05 [tuple_transform]: 6.88997e-05, [1] [Cycle 1]: 6.44401e-05, [2] [d_1]: 5.51501e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.85007e-06 [add_cache_embedding]: 1.30301e-05 [add_recomputation]: 6.07502e-05 [cse_after_recomputation]: 2.702e-05, [1] [Cycle 1]: 2.222e-05, [1] [cse]: 1.73398e-05 [environ_conv]: 7.28993e-06 [swap_dp_allreduce_reducescatter]: 7.12974e-06 [bias_add_comm_swap]: 2.10013e-06 [label_micro_interleaved_index]: 1.78022e-06 [label_fine_grained_interleaved_index]: 2.00979e-06 [merge_cast_opt]: 1.13016e-06 [slice_recompute_activation]: 1.39e-06 [micro_interleaved_order_control]: 1.70013e-06 [assign_add_opt]: 7.11996e-06 [ForceFp32Comm]: 7.5018e-07 [remove_cast_before_assign_add]: 1.01002e-06 [full_micro_interleaved_order_control]: 2.50014e-06 [reorder_send_recv_between_fp_bp]: 2.02004e-06 [comm_op_add_attrs]: 5.20144e-07 [add_comm_op_reuse_tag]: 1.07987e-06 [interleave_split_concat_branches]: 8.30274e-07 [interleave_parallel_branches]: 6.39819e-07 [overlap_opt_shard_in_pipeline]: 1.51992e-06 [overlap_opt_shard_grad_in_pipeline]: 1.70013e-06 [control_data_broadcast_order]: 8.09785e-07 [grouped_pairwise_exchange_alltoall]: 1.07009e-06 [offloading_packed_experts]: 1.46031e-06 [overlap_recompute_and_grad_model_parallel]: 2.06986e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.69738e-07 [overlap_recompute_allgather_and_fa_grad]: 7.5018e-07 [overlap_grad_ring_attention]: 1.86032e-06 [overlap_grad_flash_sp]: 1.50399e-05 [begin_end_overlap_inline]: 5.29923e-07 [split_matmul_comm_elemetwise]: 1.98977e-06 [split_layernorm_comm]: 1.95997e-06 [handle_group_info]: 7.70204e-07 [symbol_engine_optimizer]: 8.33999e-05, [1] [Cycle 1]: 7.89897e-05, [6] [build]: 4.10993e-06 [elim_shapecalc]: 1.13402e-05 [elim_not_effective]: 1.59298e-05 [opt_reshape]: 8.50996e-06 [fold_const_symbol]: 1.40397e-05 [renormalize]: 1.8999e-07 [pipeline_parallel_scheduler]: 1.53016e-06 [auto_monad_reorder]: 2.86801e-05 [get_jit_bprop_graph]: 4.69852e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00047316 [distribtued_split]: 3.77703e-05 [validate]: 3.38503e-05 [task_emit]: 0.068113 [execute]: 7.2103e-06 Sums bootstrap : 0.000300s : 0.40% type_inference : 0.002419s : 3.24% auto_monad : 0.000129s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000219s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000426s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.07% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000149s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000458s : 0.61% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000473s : 0.63% distribtued_split : 0.000038s : 0.05% validate : 0.000034s : 0.05% task_emit : 0.068113s : 91.10% execute : 0.000007s : 0.01% Time group info: ------[substitution.] 0.000115 63 5.81% : 0.000007s : 2: substitution.depend_value_elim 2.09% : 0.000002s : 5: substitution.elim_not_effective 1.66% : 0.000002s : 5: substitution.fold_const_symbol 4.99% : 0.000006s : 6: substitution.graph_param_transform 47.54% : 0.000055s : 1: substitution.inline 4.87% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.61% : 0.000004s : 6: substitution.load_eliminater 2.80% : 0.000003s : 2: substitution.reduce_all_const_elim 6.60% : 0.000008s : 10: substitution.remove_not_recompute_node 2.91% : 0.000003s : 2: substitution.replace_old_param 9.05% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.07% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002390 2 88.93% : 0.002126s : 1: type_inference.infer 11.07% : 0.000265s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000229 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.02% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.35% : 0.000005s : 25: predicate.arithmetic_simplify 0.89% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.22% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.52% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.31% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.84% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.82% : 0.000013s : 63: predicate.inline 1.07% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.70% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.80% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.81% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.81% : 0.000002s : 13: predicate.minmaximum_grad 0.78% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.29% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.92% : 0.000002s : 12: predicate.reduce_all_const_elim 1.15% : 0.000003s : 13: predicate.reduce_eliminate 0.67% : 0.000002s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.90% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.65% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000002s : 12: predicate.shard_identity_eliminate 1.26% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.22% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.79% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.90% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.41% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.29% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.56% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000143 4 6.57% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.43% : 0.000133s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087437 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.06% : 0.000056s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000141s : 1: auto_monad 0.04% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000328s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.62% : 0.000541s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000479s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001074s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000038s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005320s : 1: opt_a 0.16% : 0.000137s : 1: opt_after_cconv 0.33% : 0.000291s : 1: opt_b 8.00% : 0.006996s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000013s : 1: remove_dup_value 0.26% : 0.000229s : 1: renormalize.infer 0.22% : 0.000190s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000152s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 77.72% : 0.067955s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.79% : 0.002437s : 1: type_inference 0.07% : 0.000061s : 1: validate Time group info: ------[substitution.] 0.000129 63 4.06% : 0.000005s : 2: substitution.depend_value_elim 1.96% : 0.000003s : 5: substitution.elim_not_effective 1.84% : 0.000002s : 5: substitution.fold_const_symbol 5.36% : 0.000007s : 6: substitution.graph_param_transform 52.28% : 0.000067s : 1: substitution.inline 4.20% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.35% : 0.000004s : 6: substitution.load_eliminater 2.20% : 0.000003s : 2: substitution.reduce_all_const_elim 5.67% : 0.000007s : 10: substitution.remove_not_recompute_node 2.27% : 0.000003s : 2: substitution.replace_old_param 8.81% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.00% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002390 2 88.91% : 0.002125s : 1: type_inference.infer 11.09% : 0.000265s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000224 1420 0.87% : 0.000002s : 13: predicate.accumulaten_eliminater 1.10% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.75% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.14% : 0.000005s : 25: predicate.arithmetic_simplify 0.80% : 0.000002s : 13: predicate.cast_eliminate 0.90% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.43% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.80% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.50% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.63% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.14% : 0.000003s : 12: predicate.less_batch_normalization 1.82% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.26% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.90% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.71% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000002s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.18% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.92% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.58% : 0.000001s : 6: predicate.row_tensor_eliminate 1.08% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.31% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.39% : 0.000010s : 43: predicate.switch_simplify 0.87% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.48% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.81% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.70% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.52% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.57% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000150 4 10.51% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.49% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087567 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.16% : 0.000141s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000328s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000046s : 1: distribtued_split 0.55% : 0.000486s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000004s : 1: label_micro_interleaved_index 0.53% : 0.000467s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001089s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.07% : 0.005316s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.32% : 0.000277s : 1: opt_b 7.98% : 0.006991s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000231s : 1: renormalize.infer 0.22% : 0.000188s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000154s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 77.81% : 0.068136s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.78% : 0.002436s : 1: type_inference 0.08% : 0.000067s : 1: validate TotalTime = 0.0796672, [21] [bootstrap]: 0.00031243 [type_inference]: 0.00250973 [auto_monad]: 0.00012359 [graph_reusing]: 1.96975e-06 [inline]: 1.39e-06 [parallel-infer-symbol]: 2.14996e-06 [pre_auto_parallel]: 2.487e-05 [insert-virtual-dataset]: 2.44984e-06 [parallel-infer-symbol-second]: 4.60073e-07 [dataset_repeat_opt]: 1.34017e-06 [pipeline_split]: 1.64984e-06 [optimize]: 0.00715967, [52] [py_interpret_to_execute]: 1.447e-05 [rewriter_before_opt_a]: 3.414e-05 [opt_a]: 0.00542262, [2] [Cycle 1]: 0.00153072, [43] [expand_dump_flag]: 2.84007e-06 [switch_simplify]: 2.96799e-05 [loop_unroll]: 1.32099e-05 [a_1]: 0.00033964 [recompute_prepare]: 9.20007e-06 [updatestate_depend_eliminate]: 8.43033e-06 [updatestate_assign_eliminate]: 5.64987e-06 [updatestate_loads_eliminate]: 7.05011e-06 [parameter_eliminate]: 3.21027e-06 [a_2]: 0.00011555 [accelerated_algorithm]: 8.58027e-06 [shard]: 1.93994e-06 [meta_shard_fg_expand]: 3.70992e-06 [shard_inline]: 8.46991e-06 [auto_parallel]: 1.19298e-05 [parallel]: 7.45011e-06 [flash_sp]: 1.03801e-05 [merge_comm]: 7.84965e-06 [allreduce_fusion]: 5.30994e-06 [matmul_add_comm_reduction]: 1.03703e-05 [allreduce_slice_to_reducescatter]: 4.20026e-07 [virtual_shard_identity]: 9.55025e-06 [virtual_dataset]: 7.89016e-06 [get_grad_eliminate_]: 7.43009e-06 [virtual_output]: 7.93999e-06 [merge_forward]: 5.69038e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65896e-05 [before_grad]: 1.37398e-05 [inplace_validation]: 4.80004e-06 [meta_fg_expand]: 5.35976e-06 [inplace_validation_after_expand]: 6.33998e-06 [flash_sp_send_recv_attached]: 5.21028e-06 [receive_attached]: 2.6701e-06 [after_resolve]: 1.10399e-05 [a_after_grad]: 1.234e-05 [special_op_eliminate]: 7.86036e-06 [renormalize]: 0.00044648 [add_forward_monad_depend]: 3.46033e-06 [auto_monad_grad]: 1.91014e-06 [auto_monad_eliminator]: 3.19001e-05 [cse]: 3.016e-05 [a_3]: 5.91101e-05 [Cycle 2]: 0.00077665, [43] [expand_dump_flag]: 1.13016e-06 [switch_simplify]: 9.22987e-06 [loop_unroll]: 7.76956e-06 [a_1]: 0.00020136 [recompute_prepare]: 6.99982e-06 [updatestate_depend_eliminate]: 5.89015e-06 [updatestate_assign_eliminate]: 4.48013e-06 [updatestate_loads_eliminate]: 5.37001e-06 [parameter_eliminate]: 1.26986e-06 [a_2]: 0.00010527 [accelerated_algorithm]: 8.02008e-06 [shard]: 1.2503e-06 [meta_shard_fg_expand]: 2.59001e-06 [shard_inline]: 8.12998e-06 [auto_parallel]: 1.08299e-05 [parallel]: 3.6601e-06 [flash_sp]: 3.35043e-06 [merge_comm]: 5.8203e-06 [allreduce_fusion]: 4.86011e-06 [matmul_add_comm_reduction]: 8.00006e-06 [allreduce_slice_to_reducescatter]: 2.60305e-07 [virtual_shard_identity]: 8.61986e-06 [virtual_dataset]: 7.77002e-06 [get_grad_eliminate_]: 7.49994e-06 [virtual_output]: 7.19959e-06 [merge_forward]: 4.86011e-06 [cell_reuse_recompute_pass]: 1.85985e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50898e-05 [before_grad]: 1.22399e-05 [inplace_validation]: 4.35999e-06 [meta_fg_expand]: 5.01005e-06 [inplace_validation_after_expand]: 5.13997e-06 [flash_sp_send_recv_attached]: 1.02958e-06 [receive_attached]: 8.2003e-07 [after_resolve]: 9.96003e-06 [a_after_grad]: 1.15503e-05 [special_op_eliminate]: 7.66013e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 7.5018e-07 [auto_monad_grad]: 1.26986e-06 [auto_monad_eliminator]: 1.93203e-05 [cse]: 1.847e-05 [a_3]: 4.80199e-05 [py_interpret_to_execute_after_opt_a]: 9.41986e-06 [slice_cell_reuse_recomputed_activation]: 2.08989e-06 [rewriter_after_opt_a]: 0.00014203 [convert_after_rewriter]: 8.93977e-06 [order_py_execute_after_rewriter]: 6.4699e-06 [opt_b]: 0.00029026, [1] [Cycle 1]: 0.0002846, [7] [b_1]: 0.00020836 [b_2]: 1.01398e-05 [updatestate_depend_eliminate]: 5.81983e-06 [updatestate_assign_eliminate]: 4.9402e-06 [updatestate_loads_eliminate]: 5.29038e-06 [renormalize]: 2.99886e-07 [cse]: 1.81999e-05 [optimize_parallel_all_gather_comm]: 8.27014e-06 [overlap_param_gather]: 1.09011e-06 [cconv]: 2.31001e-05 [loop_unroll]: 0.00050408 [opt_after_cconv]: 0.00013433, [1] [Cycle 1]: 0.00012801, [7] [c_1]: 5.37401e-05 [parameter_eliminate]: 2.40002e-06 [updatestate_depend_eliminate]: 8.06991e-06 [updatestate_assign_eliminate]: 4.88991e-06 [updatestate_loads_eliminate]: 5.20982e-06 [cse]: 2.17799e-05 [renormalize]: 3.7998e-07 [remove_dup_value]: 1.333e-05 [tuple_transform]: 7.00401e-05, [1] [Cycle 1]: 6.556e-05, [2] [d_1]: 5.63199e-05 [renormalize]: 1.80211e-07 [partial_unused_args_eliminate]: 1.64006e-06 [add_cache_embedding]: 1.36001e-05 [add_recomputation]: 5.964e-05 [cse_after_recomputation]: 2.70098e-05, [1] [Cycle 1]: 2.21799e-05, [1] [cse]: 1.70199e-05 [environ_conv]: 7.31973e-06 [swap_dp_allreduce_reducescatter]: 6.86012e-06 [bias_add_comm_swap]: 2.33017e-06 [label_micro_interleaved_index]: 1.75973e-06 [label_fine_grained_interleaved_index]: 2.09035e-06 [merge_cast_opt]: 1.43005e-06 [slice_recompute_activation]: 1.43005e-06 [micro_interleaved_order_control]: 1.83005e-06 [assign_add_opt]: 7.46036e-06 [ForceFp32Comm]: 8.49832e-07 [remove_cast_before_assign_add]: 6.79865e-07 [full_micro_interleaved_order_control]: 1.6503e-06 [reorder_send_recv_between_fp_bp]: 1.72015e-06 [comm_op_add_attrs]: 9.00123e-07 [add_comm_op_reuse_tag]: 9.80217e-07 [interleave_split_concat_branches]: 7.79983e-07 [interleave_parallel_branches]: 8.60076e-07 [overlap_opt_shard_in_pipeline]: 1.6503e-06 [overlap_opt_shard_grad_in_pipeline]: 2.08011e-06 [control_data_broadcast_order]: 1.39978e-06 [grouped_pairwise_exchange_alltoall]: 1.30013e-06 [offloading_packed_experts]: 1.05985e-06 [overlap_recompute_and_grad_model_parallel]: 2.08011e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.24983e-06 [overlap_recompute_allgather_and_fa_grad]: 1.14972e-06 [overlap_grad_ring_attention]: 1.39e-06 [overlap_grad_flash_sp]: 1.48499e-05 [begin_end_overlap_inline]: 5.59725e-07 [split_matmul_comm_elemetwise]: 1.91992e-06 [split_layernorm_comm]: 1.62004e-06 [handle_group_info]: 8.89879e-07 [symbol_engine_optimizer]: 8.36798e-05, [1] [Cycle 1]: 7.89603e-05, [6] [build]: 3.49991e-06 [elim_shapecalc]: 1.19298e-05 [elim_not_effective]: 1.586e-05 [opt_reshape]: 8.50996e-06 [fold_const_symbol]: 1.37798e-05 [renormalize]: 1.8999e-07 [pipeline_parallel_scheduler]: 1.36998e-06 [auto_monad_reorder]: 2.89599e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00051437 [distribtued_split]: 4.16101e-05 [validate]: 3.51099e-05 [task_emit]: 0.0686549 [execute]: 1.17002e-05 Sums bootstrap : 0.000312s : 0.41% type_inference : 0.002510s : 3.32% auto_monad : 0.000124s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000221s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000447s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000049s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000142s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000208s : 0.28% optimize.opt_b.b_2 : 0.0000 TotalTime = 0.0797766, [21] [bootstrap]: 0.00028104 [type_inference]: 0.00221813 [auto_monad]: 9.68399e-05 [graph_reusing]: 2.14018e-06 [inline]: 1.07987e-06 [parallel-infer-symbol]: 1.30991e-06 [pre_auto_parallel]: 2.03098e-05 [insert-virtual-dataset]: 1.78022e-06 [parallel-infer-symbol-second]: 3.59956e-07 [dataset_repeat_opt]: 1.00024e-06 [pipeline_split]: 1.11992e-06 [optimize]: 0.00677517, [52] [py_interpret_to_execute]: 1.23498e-05 [rewriter_before_opt_a]: 2.93502e-05 [opt_a]: 0.00515028, [2] [Cycle 1]: 0.00141355, [43] [expand_dump_flag]: 2.09967e-06 [switch_simplify]: 2.58898e-05 [loop_unroll]: 1.28602e-05 [a_1]: 0.00033014 [recompute_prepare]: 8.80985e-06 [updatestate_depend_eliminate]: 7.35978e-06 [updatestate_assign_eliminate]: 5.13997e-06 [updatestate_loads_eliminate]: 5.61029e-06 [parameter_eliminate]: 2.21981e-06 [a_2]: 0.00011322 [accelerated_algorithm]: 8.3698e-06 [shard]: 1.51992e-06 [meta_shard_fg_expand]: 2.74973e-06 [shard_inline]: 8.50996e-06 [auto_parallel]: 1.09002e-05 [parallel]: 4.80004e-06 [flash_sp]: 6.36e-06 [merge_comm]: 7.41985e-06 [allreduce_fusion]: 4.96022e-06 [matmul_add_comm_reduction]: 9.1102e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 1.02399e-05 [virtual_dataset]: 8.16025e-06 [get_grad_eliminate_]: 7.70995e-06 [virtual_output]: 7.58003e-06 [merge_forward]: 4.86011e-06 [cell_reuse_recompute_pass]: 1.56974e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.60802e-05 [before_grad]: 1.35102e-05 [inplace_validation]: 4.4601e-06 [meta_fg_expand]: 5.41005e-06 [inplace_validation_after_expand]: 5.22006e-06 [flash_sp_send_recv_attached]: 3.76021e-06 [receive_attached]: 1.97999e-06 [after_resolve]: 1.11302e-05 [a_after_grad]: 1.24001e-05 [special_op_eliminate]: 7.49994e-06 [renormalize]: 0.00039953 [add_forward_monad_depend]: 2.50991e-06 [auto_monad_grad]: 1.27964e-06 [auto_monad_eliminator]: 2.182e-05 [cse]: 2.31499e-05 [a_3]: 5.64498e-05 [Cycle 2]: 0.00076656, [43] [expand_dump_flag]: 9.69972e-07 [switch_simplify]: 8.86014e-06 [loop_unroll]: 7.76025e-06 [a_1]: 0.00020338 [recompute_prepare]: 7.40029e-06 [updatestate_depend_eliminate]: 5.70016e-06 [updatestate_assign_eliminate]: 4.71994e-06 [updatestate_loads_eliminate]: 4.73997e-06 [parameter_eliminate]: 9.49949e-07 [a_2]: 0.00010277 [accelerated_algorithm]: 7.90972e-06 [shard]: 1.15996e-06 [meta_shard_fg_expand]: 2.50991e-06 [shard_inline]: 7.64988e-06 [auto_parallel]: 1.013e-05 [parallel]: 3.30014e-06 [flash_sp]: 2.65008e-06 [merge_comm]: 6.10016e-06 [allreduce_fusion]: 5.13997e-06 [matmul_add_comm_reduction]: 7.57026e-06 [allreduce_slice_to_reducescatter]: 2.89641e-07 [virtual_shard_identity]: 8.49972e-06 [virtual_dataset]: 7.76025e-06 [get_grad_eliminate_]: 7.21961e-06 [virtual_output]: 7.11018e-06 [merge_forward]: 4.41959e-06 [cell_reuse_recompute_pass]: 1.68988e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49002e-05 [before_grad]: 1.26399e-05 [inplace_validation]: 4.31994e-06 [meta_fg_expand]: 4.88013e-06 [inplace_validation_after_expand]: 5.23962e-06 [flash_sp_send_recv_attached]: 8.40053e-07 [receive_attached]: 7.29691e-07 [after_resolve]: 9.78028e-10s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000504s : 0.67% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.f06 [a_after_grad]: 1.19801e-05 [special_op_eliminate]: 7.40029e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.09785e-07 [auto_monad_grad]: 1.05007e-06 [auto_monad_eliminator]: 1.58902e-05 [cse]: 1.86502e-05 [a_3]: 4.84199e-05 [py_interpret_to_execute_after_opt_a]: 9.14e-06 [slice_cell_reuse_recomputed_activation]: 1.57021e-06 [rewriter_after_opt_a]: 0.00013514 [convert_after_rewriter]: 8.21985e-06 [order_py_execute_after_rewriter]: 5.62007e-06 [opt_b]: 0.00023897, [1] [Cycle 1]: 0.0002335, [7] [b_1]: 0.0001615 [b_2]: 9.71975e-06 [updatestate_depend_eliminate]: 5.41983e-06 [updatestate_assign_eliminate]: 4.23007e-06 [updatestate_loads_eliminate]: 4.72041e-06 [renormalize]: 2.59839e-07 [cse]: 1.79298e-05 [optimize_parallel_all_gather_comm]: 7.47992e-06 [overlap_param_gather]: 8.40053e-07 [cconv]: 1.394e-05 [loop_unroll]: 0.00047455 [opt_after_cconv]: 0.0001537, [1] [Cycle 1]: 0.00014794, [7] [c_1]: 5.08497e-05 [parameter_eliminate]: 1.47987e-06 [updatestate_depend_eliminate]: 7.22008e-06 [updatestate_assign_eliminate]: 4.44008e-06 [updatestate_loads_eliminate]: 4.6799e-06 [cse]: 1.99098e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 9.94001e-06 [tuple_transform]: 6.86403e-05, [1] [Cycle 1]: 6.42603e-05, [2] [d_1]: 5.54202e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.43005e-06 [add_cache_embedding]: 1.171e-05 [add_recomputation]: 5.51301e-05 [cse_after_recomputation]: 2.57702e-05, [1] [Cycle 1]: 2.14898e-05, [1] [cse]: 1.65198e-05 [environ_conv]: 6.36e-06 [swap_dp_allreduce_reducescatter]: 7.2699e-06 [bias_add_comm_swap]: 1.79978e-06 [label_micro_interleaved_index]: 1.64006e-06 [label_fine_grained_interleaved_index]: 1.51992e-06 [merge_cast_opt]: 9.09902e-07 [slice_recompute_activation]: 1.26986e-06 [micro_interleaved_order_control]: 1.83983e-06 [assign_add_opt]: 6.90017e-06 [ForceFp32Comm]: 9.30391e-07 [remove_cast_before_assign_add]: 7.19912e-07 [full_micro_interleaved_order_control]: 1.39978e-06 [reorder_send_recv_between_fp_bp]: 1.32015e-06 [comm_op_add_attrs]: 6.59842e-07 [add_comm_op_reuse_tag]: 6.50063e-07 [interleave_split_concat_branches]: 5.89993e-07 [interleave_parallel_branches]: 5.69969e-07 [overlap_opt_shard_in_pipeline]: 6.79865e-07 [overlap_opt_shard_grad_in_pipeline]: 1.24983e-06 [control_data_broadcast_order]: 9.19681e-07 [grouped_pairwise_exchange_alltoall]: 7.19912e-07 [offloading_packed_experts]: 7.90227e-07 [overlap_recompute_and_grad_model_parallel]: 1.20001e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.60191e-07 [overlap_recompute_allgather_and_fa_grad]: 6.99889e-07 [overlap_grad_ring_attention]: 1.28988e-06 [overlap_grad_flash_sp]: 1.19098e-05 [begin_end_overlap_inline]: 5.29923e-07 [split_matmul_comm_elemetwise]: 1.39978e-06 [split_layernorm_comm]: 1.91014e-06 [handle_group_info]: 5.89993e-07 [symbol_engine_optimizer]: 8.69399e-05, [1] [Cycle 1]: 7.83699e-05, [6] [build]: 3.79002e-06 [elim_shapecalc]: 1.20797e-05 [elim_not_effective]: 1.548e-05 [opt_reshape]: 8.51974e-06 [fold_const_symbol]: 1.32103e-05 [renormalize]: 2.40281e-07 [pipeline_parallel_scheduler]: 1.08965e-06 [auto_monad_reorder]: 2.35299e-05 [get_jit_bprop_graph]: 3.30154e-07 [rewriter_after_jit_bprop_graph]: 3.09665e-07 [eliminate_special_op_node]: 0.00049216 [distribtued_split]: 3.48799e-05 [validate]: 3.037e-05 [task_emit]: 0.0695618 [execute]: 9.22009e-06 Sums bootstrap : 0.000281s : 0.37% type_inference : 0.002218s : 2.92% old_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000514s : 0.68% distribtued_split : 0.000042s : 0.06% validate : 0.000035s : 0.05% task_emit : 0.068655s : 90.84% execute : 0.000012s : 0.02% auto_monad : 0.000097s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000029s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000534s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000010s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000216s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000009s : 0.01% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000400s : 0.53% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000038s : 0.05% optimize.opt_a.cse : 0.000042s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000135s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000014s : 0.02% optimize.loop_unroll : 0.000475s : 0.63% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000001s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000055s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000492s : 0.65% distribtued_split : 0.000035s : 0.05% validate : 0.000030s : 0.04% task_emit : 0.069562s : 91.73% execute : 0.000009s : 0.01% TotalTime = 0.0799802, [21] [bootstrap]: 0.00031245 [type_inference]: 0.00256478 [auto_monad]: 0.00012875 [graph_reusing]: 1.98977e-06 [inline]: 1.39978e-06 [parallel-infer-symbol]: 1.87987e-06 [pre_auto_parallel]: 2.57101e-05 [insert-virtual-dataset]: 3.22005e-06 [parallel-infer-symbol-second]: 4.20026e-07 [dataset_repeat_opt]: 1.57999e-06 [pipeline_split]: 1.24006e-06 [optimize]: 0.00716902, [52] [py_interpret_to_execute]: 1.48397e-05 [rewriter_before_opt_a]: 3.59798e-05 [opt_a]: 0.00540757, [2] [Cycle 1]: 0.00152908, [43] [expand_dump_flag]: 3.03006e-06 [switch_simplify]: 2.93101e-05 [loop_unroll]: 1.36001e-05 [a_1]: 0.00033935 [recompute_prepare]: 9.26992e-06 [updatestate_depend_eliminate]: 8.72975e-06 [updatestate_assign_eliminate]: 5.68992e-06 [updatestate_loads_eliminate]: 6.97002e-06 [parameter_eliminate]: 2.96021e-06 [a_2]: 0.00011554 [accelerated_algorithm]: 8.48016e-06 [shard]: 1.96975e-06 [meta_shard_fg_expand]: 3.87011e-06 [shard_inline]: 8.95979e-06 [auto_parallel]: 1.19e-05 [parallel]: 6.78003e-06 [flash_sp]: 9.58005e-06 [merge_comm]: 7.97957e-06 [allreduce_fusion]: 5.53019e-06 [matmul_add_comm_reduction]: 1.11498e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 9.49996e-06 [virtual_dataset]: 8.29017e-06 [get_grad_eliminate_]: 7.93999e-06 [virtual_output]: 7.45011e-06 [merge_forward]: 6.21006e-06 [cell_reuse_recompute_pass]: 1.8198e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.67899e-05 [before_grad]: 1.411e-05 [inplace_validation]: 4.9402e-06 [meta_fg_expand]: 5.74999e-06 [inplace_validation_after_expand]: 6.75023e-06 [flash_sp_send_recv_attached]: 5.47012e-06 [receive_attached]: 2.33017e-06 [after_resolve]: 1.09901e-05 [a_after_grad]: 1.27498e-05 [special_op_eliminate]: 7.80961e-06 [renormalize]: 0.00043339 [add_forward_monad_depend]: 3.36021e-06 [auto_monad_grad]: 1.75973e-06 [auto_monad_eliminator]: 2.9e-05 [cse]: 3.11998e-05 [a_3]: 6.06799e-05 [Cycle 2]: 0.00080126, [43] [expand_dump_flag]: 1.13994e-06 [switch_simplify]: 9.20007e-06 [loop_unroll]: 8.00984e-06 [a_1]: 0.00020593 [recompute_prepare]: 7.3798e-06 [updatestate_depend_eliminate]: 6.32973e-06 [updatestate_assign_eliminate]: 4.69992e-06 [updatestate_loads_eliminate]: 5.4799e-06 [parameter_eliminate]: 1.22003e-06 [a_2]: 0.00010617 [accelerated_algorithm]: 8.50996e-06 [shard]: 1.20001e-06 [meta_shard_fg_expand]: 2.50991e-06 [shard_inline]: 8.21007e-06 [auto_parallel]: 1.12499e-05 [parallel]: 3.76021e-06 [flash_sp]: 3.62005e-06 [merge_comm]: 5.92973e-06 [allreduce_fusion]: 5.28991e-06 [matmul_add_comm_reduction]: 8.27992e-06 [allreduce_slice_to_reducescatter]: 3.00352e-07 [virtual_shard_identity]: 8.59983e-06 [virtual_dataset]: 7.68015e-06 [get_grad_eliminate_]: 7.39982e-06 [virtual_output]: 7.22008e-06 [merge_forward]: 4.8303e-06 [cell_reuse_recompute_pass]: 1.93994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.567e-05 [before_grad]: 1.32802e-05 [inplace_validation]: 4.3097e-06 [meta_fg_expand]: 5.01983e-06 [inplace_validation_after_expand]: 5.65965e-06 [flash_sp_send_recv_attached]: 9.20147e-07 [receive_attached]: 6.70087e-07 [after_resolve]: 1.00601e-05 [a_after_grad]: 1.21701e-05 [special_op_eliminate]: 7.57026e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.20001e-06 [auto_monad_grad]: 1.17999e-06 [auto_monad_eliminator]: 1.87601e-05 [cse]: 2.007e-05 [a_3]: 4.94099e-05 [py_interpret_to_execute_after_opt_a]: 9.18005e-06 [slice_cell_reuse_recomputed_activation]: 2.54996e-06 [rewriter_after_opt_a]: 0.00014943 [convert_after_rewriter]: 8.57003e-06 [order_py_execute_after_rewriter]: 6.18957e-06 [opt_b]: 0.00028618, [1] [Cycle 1]: 0.00027987, [7] [b_1]: 0.00020092 [b_2]: 9.60007e-06 [updatestate_depend_eliminate]: 5.43008e-06 [updatestate_assign_eliminate]: 4.77023e-06 [updatestate_loads_eliminate]: 5.23962e-06 [renormalize]: 2.19792e-07 [cse]: 1.95899e-05 [optimize_parallel_all_gather_comm]: 8.2897e-06 [overlap_param_gather]: 1.17999e-06 [cconv]: 2.39201e-05 [loop_unroll]: 0.00049845 [opt_after_cconv]: 0.00013798, [1] [Cycle 1]: 0.00013143, [7] [c_1]: 5.29401e-05 [parameter_eliminate]: 2.48989e-06 [updatestate_depend_eliminate]: 8.31997e-06 [updatestate_assign_eliminate]: 5.18002e-06 [updatestate_loads_eliminate]: 5.29969e-06 [cse]: 2.262e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.36001e-05 [tuple_transform]: 7.03498e-05, [1] [Cycle 1]: 6.533e-05, [2] [d_1]: 5.57099e-05 [renormalize]: 2.40281e-07 [partial_unused_args_eliminate]: 2.02982e-06 [add_cache_embedding]: 1.32103e-05 [add_recomputation]: 6.17099e-05 [cse_after_recomputation]: 2.90303e-05, [1] [Cycle 1]: 2.38097e-05, [1] [cse]: 1.85603e-05 [environ_conv]: 7.30995e-06 [swap_dp_allreduce_reducescatter]: 7.07014e-06 [bias_add_comm_swap]: 2.35019e-06 [label_micro_interleaved_index]: 1.91992e-06 [label_fine_grained_interleaved_index]: 1.91014e-06 [merge_cast_opt]: 1.26008e-06 [slice_recompute_activation]: 2.02982e-06 [micro_interleaved_order_control]: 1.68988e-06 [assign_add_opt]: 7.16001e-06 [ForceFp32Comm]: 8.70321e-07 [remove_cast_before_assign_add]: 8.00006e-07 [full_micro_interleaved_order_control]: 2.03028e-06 [reorder_send_recv_between_fp_bp]: 2.14018e-06 [comm_op_add_attrs]: 8.29808e-07 [add_comm_op_reuse_tag]: 1.22003e-06 [interleave_split_concat_branches]: 9.49949e-07 [interleave_parallel_branches]: 7.49715e-07 [overlap_opt_shard_in_pipeline]: 1.20979e-06 [overlap_opt_shard_grad_in_pipeline]: 2.25985e-06 [control_data_broadcast_order]: 9.59728e-07 [grouped_pairwise_exchange_alltoall]: 9.4017e-07 [offloading_packed_experts]: 9.09902e-07 [overlap_recompute_and_grad_model_parallel]: 2.26963e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.69855e-07 [overlap_recompute_allgather_and_fa_grad]: 8.10251e-07 [overlap_grad_ring_attention]: 2.34973e-06 [overlap_grad_flash_sp]: 1.388e-05 [begin_end_overlap_inline]: 1.33039e-06 [split_matmul_comm_elemetwise]: 2.00979e-06 [split_layernorm_comm]: 2.13971e-06 [handle_group_info]: 7.79983e-07 [symbol_engine_optimizer]: 8.56104e-05, [1] [Cycle 1]: 8.05403e-05, [6] [build]: 3.48035e-06 [elim_shapecalc]: 1.17803e-05 [elim_not_effective]: 1.62702e-05 [opt_reshape]: 8.65012e-06 [fold_const_symbol]: 1.289e-05 [renormalize]: 2.80328e-07 [pipeline_parallel_scheduler]: 2.00002e-06 [auto_monad_reorder]: 2.92202e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.69852e-07 [eliminate_special_op_node]: 0.0005185 [distribtued_split]: 3.978e-05 [validate]: 3.65698e-05 [task_emit]: 0.0688811 [execute]: 1.02604e-05 Sums bootstrap : 0.000312s : 0.41% type_inference : 0.002565s : 3.38% auto_monad : 0.000129s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000545s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000433s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000048s : 0.06% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000110s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000149s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000201s : 0.26% optimize.opt_b.b_2 : Time group info: ------[substitution.] 0.000128 63 4.87% : 0.000006s : 2: substitution.depend_value_elim 1.78% : 0.000002s : 5: substitution.elim_not_effective 1.88% : 0.000002s : 5: substitution.fold_const_symbol 5.96% : 0.000008s : 6: substitution.graph_param_transform 49.57% : 0.000064s : 1: substitution.inline 4.23% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.56% : 0.000005s : 6: substitution.load_eliminater 2.64% : 0.000003s : 2: substitution.reduce_all_const_elim 5.78% : 0.000007s : 10: substitution.remove_not_recompute_node 2.53% : 0.000003s : 2: substitution.replace_old_param 9.50% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.71% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002479 2 88.30% : 0.002189s : 1: type_inference.infer 11.70% : 0.000290s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000274 1420 0.68% : 0.000002s : 13: predicate.accumulaten_eliminater 1.01% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.60% : 0.000002s : 12: predicate.addn_check_dump 0.70% : 0.000002s : 13: predicate.addn_zero_filter 0.63% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.84% : 0.000005s : 25: predicate.arithmetic_simplify 0.70% : 0.000002s : 13: predicate.cast_eliminate 0.62% : 0.000002s : 12: predicate.check_bprop_eliminate 0.62% : 0.000002s : 12: predicate.compare_switch_simplify 0.19% : 0.000001s : 6: predicate.const_output_eliminate 0.40% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.24% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.68% : 0.000002s : 12: predicate.depend_value_elim 0.73% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.77% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.76% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.22% : 0.000001s : 6: predicate.elim_not_effective 0.50% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.00% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000003s : 19: predicate.environ_get_add_eliminate 0.94% : 0.000003s : 19: predicate.environ_get_depend_swap 1.59% : 0.000004s : 31: predicate.environ_get_eliminate 0.91% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.67% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.13% : 0.000003s : 14: predicate.float_depend_g_call 0.63% : 0.000002s : 12: predicate.float_environ_get_switch 0.91% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000001s : 6: predicate.fold_const_symbol 0.69% : 0.000002s : 12: predicate.get_grad_eliminate 0.23% : 0.000001s : 6: predicate.graph_param_transform 0.65% : 0.000002s : 12: predicate.incorporate_call 0.57% : 0.000002s : 12: predicate.incorporate_call_switch 4.71% : 0.000013s : 63: predicate.inline 0.83% : 0.000002s : 12: predicate.inline_without_move 0.33% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.91% : 0.000002s : 12: predicate.less_batch_normalization 1.49% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.11% : 0.000006s : 38: predicate.load_eliminater 1.03% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.02% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.50% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.65% : 0.000002s : 12: predicate.merge_addn 0.64% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.64% : 0.000002s : 12: predicat 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000498s : 0.66% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_e.mini_step_allgather_replace 0.64% : 0.000002s : 13: predicate.minmaximum_grad 0.54% : 0.000001s : 6: predicate.mutable_eliminate 0.40% : 0.000001s : 6: predicate.opt_reshape 0.39% : 0.000001s : 6: predicate.parallel_virtual_node 0.94% : 0.000003s : 14: predicate.partial_defer_inline 1.02% : 0.000003s : 19: predicate.partial_eliminate 0.62% : 0.000002s : 13: predicate.print_const_string_wrapper 0.67% : 0.000002s : 12: predicate.reduce_all_const_elim 0.94% : 0.000003s : 13: predicate.reduce_eliminate 0.44% : 0.000001s : 12: predicate.remove_not_recompute_node 0.95% : 0.000003s : 25: predicate.replace_applicator 0.37% : 0.000001s : 12: predicate.replace_old_param 16.31% : 0.000045s : 6: predicate.reset_defer_inline 0.72% : 0.000002s : 13: predicate.reshape_eliminate 0.66% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 0.87% : 0.000002s : 12: predicate.same_eliminate 0.42% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.80% : 0.000002s : 12: predicate.shard_identity_eliminate 1.12% : 0.000003s : 18: predicate.special_op_eliminate 0.76% : 0.000002s : 12: predicate.specialize_transform 0.86% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.80% : 0.000002s : 12: predicate.stack_unstack_eliminate 1.97% : 0.000005s : 38: predicate.stopgrad_eliminater 0.34% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.76% : 0.000002s : 14: predicate.switch_defer_inline 1.36% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.78% : 0.000010s : 43: predicate.switch_simplify 0.65% : 0.000002s : 13: predicate.tile_eliminate 0.67% : 0.000002s : 13: predicate.transpose_eliminate 1.48% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.39% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.29% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.45% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.47% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.03% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.38% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.09% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 2.91% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.44% : 0.000001s : 6: predicate.value_based_eliminate 0.69% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.68% : 0.000002s : 12: predicate.virtual_output_eliminate 0.48% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000192 4 8.49% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.51% : 0.000176s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088693 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000135s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000336s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.06% : 0.00004optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000518s : 0.68% distribtued_split : 0.000040s : 0.05% validate : 0.000037s : 0.05% task_emit : 0.068881s : 90.78% execute : 0.000010s : 0.01% 9s : 1: distribtued_split 0.60% : 0.000528s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.58% : 0.000514s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001092s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.22% : 0.000198s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.12% : 0.005426s : 1: opt_a 0.16% : 0.000139s : 1: opt_after_cconv 0.33% : 0.000293s : 1: opt_b 8.08% : 0.007168s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000230s : 1: renormalize.infer 0.24% : 0.000211s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000147s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 77.44% : 0.068683s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.85% : 0.002528s : 1: type_inference 0.08% : 0.000070s : 1: validate Time group info: ------[substitution.] 0.000109 63 4.05% : 0.000004s : 2: substitution.depend_value_elim 1.94% : 0.000002s : 5: substitution.elim_not_effective 2.16% : 0.000002s : 5: substitution.fold_const_symbol 6.10% : 0.000007s : 6: substitution.graph_param_transform 48.51% : 0.000053s : 1: substitution.inline 4.53% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.50% : 0.000004s : 6: substitution.load_eliminater 2.17% : 0.000002s : 2: substitution.reduce_all_const_elim 6.57% : 0.000007s : 10: substitution.remove_not_recompute_node 2.28% : 0.000002s : 2: substitution.replace_old_param 9.73% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.45% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002195 2 90.23% : 0.001980s : 1: type_inference.infer 9.77% : 0.000214s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000052 1 100.00% : 0.000052s : 1: match.inline ------[predicate.] 0.000225 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.15% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.11% : 0.000005s : 25: predicate.arithmetic_simplify 0.91% : 0.000002s : 13: predicate.cast_eliminate 0.84% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.51% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.25% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 1.01% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 2.00% : 0.000005s : 31: predicate.environ_get_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.42% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.45% : 0.000012s : 63: predicate.inline 1.02% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000005s : 38: predicate.load_eliminater 1.29% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.72% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.81% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.84% : 0.000002s : 13: predicate.minmaximum_grad 0.74% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.42% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.93% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000002s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.12% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.44% : 0.000001s : 6: predicate.row_tensor_eliminate 1.01% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.25% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 0.98% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.92% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.11% : 0.000009s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.75% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.87% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.57% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.79% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.59% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.66% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000120 4 7.97% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.03% : 0.000111s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088304 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000060s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.12% : 0.000108s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.34% : 0.000304s : 1: bootstrap 0.02% : 0.000018s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.57% : 0.000505s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000483s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001075s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 5.84% : 0.005154s : 1: opt_a 0.18% : 0.000158s : 1: opt_after_cconv 0.27% : 0.000242s : 1: opt_b 7.68% : 0.006783s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000026s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000216s : 1: renormalize.infer 0.20% : 0.000179s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000140s : 1: rewriter_after_opt_a 0.04% : 0.000033s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 78.80% : 0.069587s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.53% : 0.002234s : 1: type_inference 0.07% : 0.000062s : 1: validate Time group info: ------[substitution.] 0.000126 63 4.46% : 0.000006s : 2: substitution.depend_value_elim 2.00% : 0.000003s : 5: substitution.elim_not_effective 1.75% : 0.000002s : 5: substitution.fold_const_symbol 5.49% : 0.000007s : 6: substitution.graph_param_transform 48.72% : 0.000061s : 1: substitution.inline 4.21% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.57% : 0.000004s : 6: substitution.load_eliminater 2.68% : 0.000003s : 2: substitution.reduce_all_const_elim 6.02% : 0.000008s : 10: substitution.remove_not_recompute_node 2.47% : 0.000003s : 2: substitution.replace_old_param 9.91% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.71% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002535 2 89.40% : 0.002267s : 1: type_inference.infer 10.60% : 0.000269s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000060 1 100.00% : 0.000060s : 1: match.inline ------[predicate.] 0.000262 1420 0.67% : 0.000002s : 13: predicate.accumulaten_eliminater 0.95% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.60% : 0.000002s : 12: predicate.addn_check_dump 0.73% : 0.000002s : 13: predicate.addn_zero_filter 0.66% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.98% : 0.000005s : 25: predicate.arithmetic_simplify 0.78% : 0.000002s : 13: predicate.cast_eliminate 0.74% : 0.000002s : 12: predicate.check_bprop_eliminate 0.63% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000001s : 6: predicate.const_output_eliminate 0.40% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.13% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.68% : 0.000002s : 12: predicate.depend_value_elim 0.76% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.78% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.79% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.24% : 0.000001s : 6: predicate.elim_not_effective 0.51% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.09% : 0.000003s : 19: predicate.environ_add_const_eliminate 0.94% : 0.000002s : 19: predicate.environ_get_add_eliminate 0.94% : 0.000002s : 19: predicate.environ_get_depend_swap 1.63% : 0.000004s : 31: predicate.environ_get_eliminate 0.99% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.75% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.05% : 0.000003s : 14: predicate.float_depend_g_call 0.61% : 0.000002s : 12: predicate.float_environ_get_switch 0.97% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.72% : 0.000002s : 12: predicate.get_grad_eliminate 0.26% : 0.000001s : 6: predicate.graph_param_transform 0.66% : 0.000002s : 12: predicate.incorporate_call 0.60% : 0.000002s : 12: predicate.incorporate_call_switch 4.86% : 0.000013s : 63: predicate.inline 0.89% : 0.000002s : 12: predicate.inline_without_move 0.35% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.88% : 0.000002s : 12: predicate.less_batch_normalization 1.59% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.14% : 0.000006s : 38: predicate.load_eliminater 1.15% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.08% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.60% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.64% : 0.000002s : 12: predicate.merge_addn 0.64% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.69% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.69% : 0.000002s : 13: predicate.minmaximum_grad 0.65% : 0.000002s : 6: predicate.mutable_eliminate 0.42% : 0.000001s : 6: predicate.opt_reshape 0.43% : 0.000001s : 6: predicate.parallel_virtual_node 0.95% : 0.000002s : 14: predicate.partial_defer_inline 1.06% : 0.000003s : 19: predicate.partial_eliminate 0.74% : 0.000002s : 13: predicate.print_const_string_wrapper 0.78% : 0.000002s : 12: predicate.reduce_all_const_elim 0.93% : 0.000002s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 0.99% : 0.000003s : 25: predicate.replace_applicator 0.38% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.72% : 0.000002s : 13: predicate.reshape_eliminate 0.69% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.45% : 0.000001s : 6: predicate.row_tensor_eliminate 0.88% : 0.000002s : 12: predicate.same_eliminate 0.40% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.82% : 0.000002s : 12: predicate.shard_identity_eliminate 1.24% : 0.000003s : 18: predicate.special_op_eliminate 0.77% : 0.000002s : 12: predicate.specialize_transform 0.96% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.87% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.03% : 0.000005s : 38: predicate.stopgrad_eliminater 0.37% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.77% : 0.000002s : 14: predicate.switch_defer_inline 1.43% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.76% : 0.000010s : 43: predicate.switch_simplify 0.71% : 0.000002s : 13: predicate.tile_eliminate 0.69% : 0.000002s : 13: predicate.transpose_eliminate 1.53% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.48% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.44% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 15.58% : 0.000041s : 37: predicate.tuple_list_get_item_eliminator 1.37% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.18% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.37% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.25% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.04% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.43% : 0.000001s : 6: predicate.value_based_eliminate 0.65% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.71% : 0.000002s : 12: predicate.virtual_output_eliminate 0.47% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000144 4 11.67% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 88.33% : 0.000127s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089007 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000142s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000337s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.04% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.60% : 0.000533s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000508s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001107s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.21% : 0.000191s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005411s : 1: opt_a 0.16% : 0.000142s : 1: opt_after_cconv 0.32% : 0.000289s : 1: opt_b 8.06% : 0.007178s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000236s : 1: renormalize.infer 0.21% : 0.000191s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000156s : 1: rewriter_after_opt_a 0.05% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000089s : 1: symbol_engine_optimizer 77.42% : 0.068910s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.90% : 0.002583s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.0808074, [21] [bootstrap]: 0.00032088 [type_inference]: 0.00262943 [auto_monad]: 0.00013321 [graph_reusing]: 2.95974e-06 [inline]: 1.33039e-06 [parallel-infer-symbol]: 2.52016e-06 [pre_auto_parallel]: 2.58503e-05 [insert-virtual-dataset]: 2.88989e-06 [parallel-infer-symbol-second]: 3.39933e-07 [dataset_repeat_opt]: 1.2801e-06 [pipeline_split]: 1.80025e-06 [optimize]: 0.00723921, [52] [py_interpret_to_execute]: 1.79e-05 [rewriter_before_opt_a]: 3.54e-05 [opt_a]: 0.0054926, [2] [Cycle 1]: 0.00153888, [43] [expand_dump_flag]: 4.1998e-06 [switch_simplify]: 3.043e-05 [loop_unroll]: 1.33202e-05 [a_1]: 0.00034345 [recompute_prepare]: 8.7698e-06 [updatestate_depend_eliminate]: 8.74e-06 [updatestate_assign_eliminate]: 6.08992e-06 [updatestate_loads_eliminate]: 7.68015e-06 [parameter_eliminate]: 3.47989e-06 [a_2]: 0.00011772 [accelerated_algorithm]: 8.22032e-06 [shard]: 1.97999e-06 [meta_shard_fg_expand]: 4.29014e-06 [shard_inline]: 8.2003e-06 [auto_parallel]: 1.18599e-05 [parallel]: 8.14022e-06 [flash_sp]: 1.188e-05 [merge_comm]: 8.43033e-06 [allreduce_fusion]: 4.99981e-06 [matmul_add_comm_reduction]: 1.11898e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 9.14e-06 [virtual_dataset]: 7.82963e-06 [get_grad_eliminate_]: 7.6103e-06 [virtual_output]: 7.51996e-06 [merge_forward]: 6.32973e-06 [cell_reuse_recompute_pass]: 2.00002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.727e-05 [before_grad]: 1.34199e-05 [inplace_validation]: 5.04963e-06 [meta_fg_expand]: 5.83986e-06 [inplace_validation_after_expand]: 6.86012e-06 [flash_sp_send_recv_attached]: 5.35976e-06 [receive_attached]: 2.89967e-06 [after_resolve]: 1.13598e-05 [a_after_grad]: 1.274e-05 [special_op_eliminate]: 7.66991e-06 [renormalize]: 0.00043895 [add_forward_monad_depend]: 3.70014e-06 [auto_monad_grad]: 1.72015e-06 [auto_monad_eliminator]: 3.25399e-05 [cse]: 3.46699e-05 [a_3]: 5.92698e-05 [Cycle 2]: 0.00077812, [43] [expand_dump_flag]: 1.02958e-06 [switch_simplify]: 8.86992e-06 [loop_unroll]: 7.6401e-06 [a_1]: 0.00020083 [recompute_prepare]: 7.40029e-06 [updatestate_depend_eliminate]: 6.06012e-06 [updatestate_assign_eliminate]: 4.90015e-06 [updatestate_loads_eliminate]: 5.43008e-06 [parameter_eliminate]: 1.09011e-06 [a_2]: 0.00010554 [accelerated_algorithm]: 8.4904e-06 [shard]: 1.15996e-06 [meta_shard_fg_expand]: 2.39024e-06 [shard_inline]: 7.62986e-06 [auto_parallel]: 1.14404e-05 [parallel]: 3.60003e-06 [flash_sp]: 3.62005e-06 [merge_comm]: 5.83008e-06 [allreduce_fusion]: 4.97e-06 [matmul_add_comm_reduction]: 7.99028e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 8.80985e-06 [virtual_dataset]: 7.83987e-06 [get_grad_eliminate_]: 7.41007e-06 [virtual_output]: 7.09994e-06 [merge_forward]: 4.69014e-06 [cell_reuse_recompute_pass]: 1.97999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52998e-05 [before_grad]: 1.23503e-05 [inplace_validation]: 4.62029e-06 [meta_fg_expand]: 4.96022e-06 [inplace_validation_after_expand]: 5.15021e-06 [flash_sp_send_recv_attached]: 9.49949e-07 [receive_attached]: 7.30157e-07 [after_resolve]: 1.03703e-05 [a_after_grad]: 1.18301e-05 [special_op_eliminate]: 7.39982e-06 [renormalize]: 9.03383e-08 [add_forward_monad_depend]: 7.5018e-07 [auto_monad_grad]: 1.28988e-06 [auto_monad_eliminator]: 1.83298e-05 [cse]: 1.946e-05 [a_3]: 4.831e-05 [py_interpret_to_execute_after_opt_a]: 9.25036e-06 [slice_cell_reuse_recomputed_activation]: 2.65008e-06 [rewriter_after_opt_a]: 0.00016624 [convert_after_rewriter]: 8.52998e-06 [order_py_execute_after_rewriter]: 6.59004e-06 [opt_b]: 0.00024597, [1] [Cycle 1]: 0.00024033, [7] [b_1]: 0.00016471 [b_2]: 9.58005e-06 [updatestate_depend_eliminate]: 5.62007e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.15999e-06 [renormalize]: 2.99886e-07 [cse]: 1.82702e-05 [optimize_parallel_all_gather_comm]: 8.83033e-06 [overlap_param_gather]: 1.32993e-06 [cconv]: 2.38498e-05 [loop_unroll]: 0.00049227 [opt_after_cconv]: 0.00013829, [1] [Cycle 1]: 0.0001322, [7] [c_1]: 5.33201e-05 [parameter_eliminate]: 2.59979e-06 [updatestate_depend_eliminate]: 8.1202e-06 [updatestate_assign_eliminate]: 4.67012e-06 [updatestate_loads_eliminate]: 9.51998e-06 [cse]: 2.07997e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.34399e-05 [tuple_transform]: 7.24201e-05, [1] [Cycle 1]: 6.76899e-05, [2] [d_1]: 5.59101e-05 [renormalize]: 2.19792e-07 [partial_unused_args_eliminate]: 2.08989e-06 [add_cache_embedding]: 1.45799e-05 [add_recomputation]: 6.82999e-05 [cse_after_recomputation]: 2.77599e-05, [1] [Cycle 1]: 2.304e-05, [1] [cse]: 1.79601e-05 [environ_conv]: 7.7202e-06 [swap_dp_allreduce_reducescatter]: 7.56001e-06 [bias_add_comm_swap]: 2.35997e-06 [label_micro_interleaved_index]: 1.99024e-06 [label_fine_grained_interleaved_index]: 2.56998e-06 [merge_cast_opt]: 1.32993e-06 [slice_recompute_activation]: 2.06986e-06 [micro_interleaved_order_control]: 1.83005e-06 [assign_add_opt]: 8.22032e-06 [ForceFp32Comm]: 9.30391e-07 [remove_cast_before_assign_add]: 8.69855e-07 [full_micro_interleaved_order_control]: 2.67988e-06 [reorder_send_recv_between_fp_bp]: 2.2403e-06 [comm_op_add_attrs]: 1.05007e-06 [add_comm_op_reuse_tag]: 1.31968e-06 [interleave_split_concat_branches]: 6.50063e-07 [interleave_parallel_branches]: 9.00123e-07 [overlap_opt_shard_in_pipeline]: 1.32993e-06 [overlap_opt_shard_grad_in_pipeline]: 2.51969e-06 [control_data_broadcast_order]: 1.18976e-06 [grouped_pairwise_exchange_alltoall]: 1.38022e-06 [offloading_packed_experts]: 1.11014e-06 [overlap_recompute_and_grad_model_parallel]: 2.10991e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.29926e-07 [overlap_recompute_allgather_and_fa_grad]: 1.20001e-06 [overlap_grad_ring_attention]: 1.79e-06 [overlap_grad_flash_sp]: 1.43298e-05 [begin_end_overlap_inline]: 5.40167e-07 [split_matmul_comm_elemetwise]: 2.04984e-06 [split_layernorm_comm]: 2.35997e-06 [handle_group_info]: 9.19681e-07 [symbol_engine_optimizer]: 8.76603e-05, [1] [Cycle 1]: 8.32197e-05, [6] [build]: 4.25987e-06 [elim_shapecalc]: 1.19098e-05 [elim_not_effective]: 1.64602e-05 [opt_reshape]: 8.5202e-06 [fold_const_symbol]: 1.57598e-05 [renormalize]: 1.80211e-07 [pipeline_parallel_scheduler]: 1.30991e-06 [auto_monad_reorder]: 3.08999e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.000512 [distribtued_split]: 4.172e-05 [validate]: 3.94699e-05 [task_emit]: 0.069571 [execute]: 1.051e-05 Sums bootstrap : 0.000321s : 0.42% type_inference : 0.002629s : 3.43% auto_monad : 0.000133s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000018s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000544s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000439s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000166s : 0.22% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000492s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000010s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000068s : 0.09% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000016s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000512s : 0.67% distribtued_split : 0.000042s : 0.05% validate : 0.000039s : 0.05% task_emit : 0.069571s : 90.76% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000134 63 5.12% : 0.000007s : 2: substitution.depend_value_elim 2.15% : 0.000003s : 5: substitution.elim_not_effective 2.12% : 0.000003s : 5: substitution.fold_const_symbol 5.32% : 0.000007s : 6: substitution.graph_param_transform 50.86% : 0.000068s : 1: substitution.inline 3.87% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.04% : 0.000004s : 6: substitution.load_eliminater 2.35% : 0.000003s : 2: substitution.reduce_all_const_elim 6.00% : 0.000008s : 10: substitution.remove_not_recompute_node 2.61% : 0.000004s : 2: substitution.replace_old_param 8.81% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.74% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002598 2 89.03% : 0.002313s : 1: type_inference.infer 10.97% : 0.000285s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000229 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 1.32% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.08% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.86% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.41% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.45% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.26% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.20% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.35% : 0.000003s : 14: predicate.float_depend_g_call 0.80% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.42% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.65% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000002s : 12: predicate.less_batch_normalization 1.72% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.55% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.17% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.82% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.83% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.42% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.25% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.93% : 0.000002s : 12: predicate.reduce_all_const_elim 0.98% : 0.000002s : 13: predicate.reduce_eliminate 0.66% : 0.000002s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.75% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.30% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 0.91% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.47% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.85% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.89% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.52% : 0.000003s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.32% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.44% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000154 4 10.81% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.19% : 0.000137s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089872 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000073s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.16% : 0.000145s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000346s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.59% : 0.000527s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000007s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000502s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001100s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.12% : 0.005496s : 1: opt_a 0.16% : 0.000142s : 1: opt_after_cconv 0.28% : 0.000249s : 1: opt_b 8.06% : 0.007247s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.03% : 0.000022s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000242s : 1: renormalize.infer 0.21% : 0.000191s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.19% : 0.000173s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000007s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 77.44% : 0.069596s : 1: task_emit 0.08% : 0.000076s : 1: tuple_transform 2.95% : 0.002648s : 1: type_inference 0.08% : 0.000076s : 1: validate TotalTime = 0.0828889, [21] [bootstrap]: 0.00032884 [type_inference]: 0.00266099 [auto_monad]: 0.00014251 [graph_reusing]: 2.90992e-06 [inline]: 1.46031e-06 [parallel-infer-symbol]: 2.11038e-06 [pre_auto_parallel]: 2.89399e-05 [insert-virtual-dataset]: 3.41004e-06 [parallel-infer-symbol-second]: 5.0012e-07 [dataset_repeat_opt]: 1.55019e-06 [pipeline_split]: 1.82027e-06 [optimize]: 0.00791344, [52] [py_interpret_to_execute]: 1.685e-05 [rewriter_before_opt_a]: 4.157e-05 [opt_a]: 0.00605622, [2] [Cycle 1]: 0.00175074, [43] [expand_dump_flag]: 3.87011e-06 [switch_simplify]: 3.32203e-05 [loop_unroll]: 1.60602e-05 [a_1]: 0.00040677 [recompute_prepare]: 1.13701e-05 [updatestate_depend_eliminate]: 9.70019e-06 [updatestate_assign_eliminate]: 6.66967e-06 [updatestate_loads_eliminate]: 8.29995e-06 [parameter_eliminate]: 3.68012e-06 [a_2]: 0.00014386 [accelerated_algorithm]: 1.05803e-05 [shard]: 2.05962e-06 [meta_shard_fg_expand]: 4.21982e-06 [shard_inline]: 1.129e-05 [auto_parallel]: 1.31601e-05 [parallel]: 8.1202e-06 [flash_sp]: 1.26604e-05 [merge_comm]: 9.6499e-06 [allreduce_fusion]: 6.46012e-06 [matmul_add_comm_reduction]: 1.13901e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 1.133e-05 [virtual_dataset]: 9.70997e-06 [get_grad_eliminate_]: 9.30997e-06 [virtual_output]: 8.88016e-06 [merge_forward]: 6.80983e-06 [cell_reuse_recompute_pass]: 2.08011e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.106e-05 [before_grad]: 1.72304e-05 [inplace_validation]: 5.53019e-06 [meta_fg_expand]: 6.36978e-06 [inplace_validation_after_expand]: 7.49994e-06 [flash_sp_send_recv_attached]: 5.95022e-06 [receive_attached]: 2.37022e-06 [after_resolve]: 1.39698e-05 [a_after_grad]: 1.607e-05 [special_op_eliminate]: 9.77982e-06 [renormalize]: 0.00047632 [add_forward_monad_depend]: 3.79002e-06 [auto_monad_grad]: 2.00002e-06 [auto_monad_eliminator]: 3.51002e-05 [cse]: 3.86201e-05 [a_3]: 7.04997e-05 [Cycle 2]: 0.000963, [43] [expand_dump_flag]: 1.13016e-06 [switch_simplify]: 1.15e-05 [loop_unroll]: 9.45991e-06 [a_1]: 0.00028219 [recompute_prepare]: 1.00299e-05 [updatestate_depend_eliminate]: 6.9798e-06 [updatestate_assign_eliminate]: 5.53019e-06 [updatestate_loads_eliminate]: 5.94975e-06 [parameter_eliminate]: 1.4999e-06 [a_2]: 0.00012845 [accelerated_algorithm]: 1.04001e-05 [shard]: 1.2801e-06 [meta_shard_fg_expand]: 2.80002e-06 [shard_inline]: 9.4804e-06 [auto_parallel]: 1.25901e-05 [parallel]: 3.98979e-06 [flash_sp]: 4.00981e-06 [merge_comm]: 6.91973e-06 [allreduce_fusion]: 6.21006e-06 [matmul_add_comm_reduction]: 8.61986e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 1.06399e-05 [virtual_dataset]: 9.47015e-06 [get_grad_eliminate_]: 9.05013e-06 [virtual_output]: 8.74e-06 [merge_forward]: 5.11995e-06 [cell_reuse_recompute_pass]: 2.3297e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.98502e-05 [before_grad]: 1.59396e-05 [inplace_validation]: 4.97978e-06 [meta_fg_expand]: 5.64009e-06 [inplace_validation_after_expand]: 6.10994e-06 [flash_sp_send_recv_attached]: 1.09011e-06 [receive_attached]: 8.2003e-07 [after_resolve]: 1.23801e-05 [a_after_grad]: 1.46301e-05 [special_op_eliminate]: 9.03988e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.22981e-06 [auto_monad_grad]: 1.34017e-06 [auto_monad_eliminator]: 2.01901e-05 [cse]: 2.207e-05 [a_3]: 5.89197e-05 [py_interpret_to_execute_after_opt_a]: 9.65036e-06 [slice_cell_reuse_recomputed_activation]: 2.33995e-06 [rewriter_after_opt_a]: 0.00015106 [convert_after_rewriter]: 9.58005e-06 [order_py_execute_after_rewriter]: 7.40029e-06 [opt_b]: 0.00028565, [1] [Cycle 1]: 0.00027952, [7] [b_1]: 0.00019696 [b_2]: 1.10902e-05 [updatestate_depend_eliminate]: 6.05034e-06 [updatestate_assign_eliminate]: 4.92018e-06 [updatestate_loads_eliminate]: 5.9898e-06 [renormalize]: 4.60073e-07 [cse]: 2.087e-05 [optimize_parallel_all_gather_comm]: 9.41008e-06 [overlap_param_gather]: 1.32993e-06 [cconv]: 2.46698e-05 [loop_unroll]: 0.00049895 [opt_after_cconv]: 0.00015269, [1] [Cycle 1]: 0.00014656, [7] [c_1]: 6.48797e-05 [parameter_eliminate]: 2.65986e-06 [updatestate_depend_eliminate]: 8.77026e-06 [updatestate_assign_eliminate]: 5.02961e-06 [updatestate_loads_eliminate]: 5.88968e-06 [cse]: 2.33501e-05 [renormalize]: 4.60073e-07 [remove_dup_value]: 1.628e-05 [tuple_transform]: 8.42097e-05, [1] [Cycle 1]: 7.96104e-05, [2] [d_1]: 6.95102e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 2.54018e-06 [add_cache_embedding]: 1.34199e-05 [add_recomputation]: 7.255e-05 [cse_after_recomputation]: 2.885e-05, [1] [Cycle 1]: 2.38898e-05, [1] [cse]: 1.83699e-05 [environ_conv]: 8.19005e-06 [swap_dp_allreduce_reducescatter]: 8.56025e-06 [bias_add_comm_swap]: 2.40002e-06 [label_micro_interleaved_index]: 2.2701e-06 [label_fine_grained_interleaved_index]: 2.06009e-06 [merge_cast_opt]: 1.63028e-06 [slice_recompute_activation]: 1.8999e-06 [micro_interleaved_order_control]: 2.31992e-06 [assign_add_opt]: 7.66991e-06 [ForceFp32Comm]: 8.40053e-07 [remove_cast_before_assign_add]: 1.15996e-06 [full_micro_interleaved_order_control]: 2.28966e-06 [reorder_send_recv_between_fp_bp]: 2.2701e-06 [comm_op_add_attrs]: 1.03004e-06 [add_comm_op_reuse_tag]: 1.11014e-06 [interleave_split_concat_branches]: 1.24006e-06 [interleave_parallel_branches]: 9.89996e-07 [overlap_opt_shard_in_pipeline]: 1.27032e-06 [overlap_opt_shard_grad_in_pipeline]: 2.63005e-06 [control_data_broadcast_order]: 1.23959e-06 [grouped_pairwise_exchange_alltoall]: 1.81003e-06 [offloading_packed_experts]: 1.3602e-06 [overlap_recompute_and_grad_model_parallel]: 2.35019e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.07987e-06 [overlap_recompute_allgather_and_fa_grad]: 1.26008e-06 [overlap_grad_ring_attention]: 2.10013e-06 [overlap_grad_flash_sp]: 1.68099e-05 [begin_end_overlap_inline]: 9.60194e-07 [split_matmul_comm_elemetwise]: 2.12993e-06 [split_layernorm_comm]: 2.13971e-06 [handle_group_info]: 1.07009e-06 [symbol_engine_optimizer]: 9.95998e-05, [1] [Cycle 1]: 9.49604e-05, [6] [build]: 4.92018e-06 [elim_shapecalc]: 1.45999e-05 [elim_not_effective]: 1.96998e-05 [opt_reshape]: 1.03698e-05 [fold_const_symbol]: 1.65999e-05 [renormalize]: 3.39933e-07 [pipeline_parallel_scheduler]: 1.55019e-06 [auto_monad_reorder]: 3.30298e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.50294e-07 [eliminate_special_op_node]: 0.00051829 [distribtued_split]: 4.57601e-05 [validate]: 3.877e-05 [task_emit]: 0.0708856 [execute]: 1.38101e-05 Sums bootstrap : 0.000329s : 0.42% type_inference : 0.002661s : 3.39% auto_monad : 0.000143s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000029s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000042s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000045s : 0.06% optimize.opt_a.loop_unroll : 0.000026s : 0.03% optimize.opt_a.a_1 : 0.000689s : 0.88% optimize.opt_a.recompute_prepare : 0.000021s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000272s : 0.35% optimize.opt_a.accelerated_algorithm : 0.000021s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000021s : 0.03% optimize.opt_a.auto_parallel : 0.000026s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000017s : 0.02% optimize.opt_a.merge_comm : 0.000017s : 0.02% optimize.opt_a.allreduce_fusion : 0.000013s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000019s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000041s : 0.05% optimize.opt_a.before_grad : 0.000033s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000031s : 0.04% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000476s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000055s : 0.07% optimize.opt_a.cse : 0.000061s : 0.08% optimize.opt_a.a_3 : 0.000129s : 0.16% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000151s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000197s : 0.25% optimize.opt_b.b_2 : 0.000011s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000021s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000499s : 0.64% optimize.opt_after_cconv.c_1 : 0.000065s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000016s : 0.02% optimize.tuple_transform.d_1 : 0.000070s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000073s : 0.09% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000015s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000518s : 0.66% distribtued_split : 0.000046s : 0.06% validate : 0.000039s : 0.05% task_emit : 0.070886s : 90.33% execute : 0.000014s : 0.02% Time group info: ------[substitution.] 0.000148 63 5.41% : 0.000008s : 2: substitution.depend_value_elim 2.18% : 0.000003s : 5: substitution.elim_not_effective 2.09% : 0.000003s : 5: substitution.fold_const_symbol 5.80% : 0.000009s : 6: substitution.graph_param_transform 46.46% : 0.000069s : 1: substitution.inline 4.77% : 0.000007s : 10: substitution.j_node_and_user_rematch 3.71% : 0.000005s : 6: substitution.load_eliminater 3.01% : 0.000004s : 2: substitution.reduce_all_const_elim 7.04% : 0.000010s : 10: substitution.remove_not_recompute_node 2.72% : 0.000004s : 2: substitution.replace_old_param 8.82% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 7.99% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002629 2 88.39% : 0.002324s : 1: type_inference.infer 11.61% : 0.000305s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000271 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.24% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.21% : 0.000006s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.69% : 0.000002s : 12: predicate.compare_switch_simplify 0.26% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.32% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.78% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.18% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_depend_swap 1.95% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.36% : 0.000004s : 14: predicate.float_depend_g_call 0.80% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.84% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.82% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 6.07% : 0.000016s : 63: predicate.inline 1.14% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000003s : 12: predicate.less_batch_normalization 1.73% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.33% : 0.000006s : 38: predicate.load_eliminater 1.33% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.71% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.51% : 0.000001s : 6: predicate.parallel_virtual_node 1.07% : 0.000003s : 14: predicate.partial_defer_inline 1.32% : 0.000004s : 19: predicate.partial_eliminate 0.86% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.19% : 0.000003s : 13: predicate.reduce_eliminate 0.56% : 0.000002s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.77% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.45% : 0.000001s : 6: predicate.row_tensor_eliminate 0.94% : 0.000003s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000004s : 18: predicate.special_op_eliminate 1.08% : 0.000003s : 12: predicate.specialize_transform 1.09% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.31% : 0.000006s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000005s : 26: predicate.switch_layer_defer_inline 4.11% : 0.000011s : 43: predicate.switch_simplify 0.93% : 0.000003s : 13: predicate.tile_eliminate 0.75% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.71% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.66% : 0.000005s : 25: predicate.tuple_list_get_set_item_eliminator 2.59% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.72% : 0.000005s : 25: predicate.tuple_to_list_eliminator_ 2.33% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.49% : 0.000001s : 6: predicate.value_based_eliminate 0.88% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.59% : 0.000002s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000164 4 10.30% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.70% : 0.000147s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092992 192 0.01% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000077s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.17% : 0.000155s : 1: auto_monad 0.04% : 0.000040s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000357s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000055s : 1: distribtued_split 0.57% : 0.000533s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000023s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000010s : 1: graph_reusing 0.01% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000009s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000509s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000019s : 1: opt.transform.loop_unroll_optimizer 1.47% : 0.001368s : 80: opt.transform.opt_a 0.07% : 0.000063s : 1: opt.transform.opt_after_cconv 0.20% : 0.000184s : 27: opt.transform.opt_b 0.07% : 0.000067s : 1: opt.transform.opt_trans_graph 0.04% : 0.000039s : 3: opt.transform.special_op_eliminate 0.06% : 0.000057s : 4: opt.transform.symbol_engine_opt 6.52% : 0.006061s : 1: opt_a 0.17% : 0.000158s : 1: opt_after_cconv 0.31% : 0.000289s : 1: opt_b 8.52% : 0.007922s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000036s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000005s : 1: remove_cast_before_assign_add 0.02% : 0.000020s : 1: remove_dup_value 0.27% : 0.000255s : 1: renormalize.infer 0.23% : 0.000215s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000157s : 1: rewriter_after_opt_a 0.05% : 0.000046s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000103s : 1: symbol_engine_optimizer 76.26% : 0.070918s : 1: task_emit 0.09% : 0.000088s : 1: tuple_transform 2.88% : 0.002679s : 1: type_inference 0.08% : 0.000077s : 1: validate TotalTime = 0.0762274, [21] [bootstrap]: 0.0002833 [type_inference]: 0.00215316 [auto_monad]: 9.73698e-05 [graph_reusing]: 1.87987e-06 [inline]: 1.18976e-06 [parallel-infer-symbol]: 1.34017e-06 [pre_auto_parallel]: 2.15303e-05 [insert-virtual-dataset]: 1.91992e-06 [parallel-infer-symbol-second]: 4.20026e-07 [dataset_repeat_opt]: 7.69738e-07 [pipeline_split]: 8.801e-07 [optimize]: 0.0066914, [52] [py_interpret_to_execute]: 1.20499e-05 [rewriter_before_opt_a]: 2.99499e-05 [opt_a]: 0.00511706, [2] [Cycle 1]: 0.00141899, [43] [expand_dump_flag]: 2.48989e-06 [switch_simplify]: 2.48798e-05 [loop_unroll]: 1.28797e-05 [a_1]: 0.00031957 [recompute_prepare]: 9.20007e-06 [updatestate_depend_eliminate]: 7.16979e-06 [updatestate_assign_eliminate]: 4.89969e-06 [updatestate_loads_eliminate]: 5.41983e-06 [parameter_eliminate]: 2.3297e-06 [a_2]: 0.00011256 [accelerated_algorithm]: 8.17003e-06 [shard]: 1.51992e-06 [meta_shard_fg_expand]: 2.71993e-06 [shard_inline]: 8.48994e-06 [auto_parallel]: 1.18301e-05 [parallel]: 5.85988e-06 [flash_sp]: 5.60004e-06 [merge_comm]: 6.76e-06 [allreduce_fusion]: 5.3402e-06 [matmul_add_comm_reduction]: 8.52998e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 9.54e-06 [virtual_dataset]: 7.67969e-06 [get_grad_eliminate_]: 7.88039e-06 [virtual_output]: 7.91997e-06 [merge_forward]: 4.72972e-06 [cell_reuse_recompute_pass]: 1.45007e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.64299e-05 [before_grad]: 1.367e-05 [inplace_validation]: 4.29014e-06 [meta_fg_expand]: 4.82006e-06 [inplace_validation_after_expand]: 4.86989e-06 [flash_sp_send_recv_attached]: 2.54018e-06 [receive_attached]: 1.51992e-06 [after_resolve]: 1.03801e-05 [a_after_grad]: 1.26199e-05 [special_op_eliminate]: 7.73976e-06 [renormalize]: 0.00041535 [add_forward_monad_depend]: 2.25008e-06 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 2.09003e-05 [cse]: 2.40598e-05 [a_3]: 5.59199e-05 [Cycle 2]: 0.00076443, [43] [expand_dump_flag]: 9.60194e-07 [switch_simplify]: 1.07e-05 [loop_unroll]: 7.92975e-06 [a_1]: 0.00019919 [recompute_prepare]: 7.56979e-06 [updatestate_depend_eliminate]: 5.49015e-06 [updatestate_assign_eliminate]: 4.44008e-06 [updatestate_loads_eliminate]: 4.85964e-06 [parameter_eliminate]: 9.50415e-07 [a_2]: 0.00010383 [accelerated_algorithm]: 8.06991e-06 [shard]: 1.0198e-06 [meta_shard_fg_expand]: 2.45031e-06 [shard_inline]: 7.8897e-06 [auto_parallel]: 1.01603e-05 [parallel]: 3.14973e-06 [flash_sp]: 2.68966e-06 [merge_comm]: 5.9302e-06 [allreduce_fusion]: 4.86989e-06 [matmul_add_comm_reduction]: 7.10972e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 8.35024e-06 [virtual_dataset]: 7.44965e-06 [get_grad_eliminate_]: 7.2401e-06 [virtual_output]: 7.11018e-06 [merge_forward]: 4.77023e-06 [cell_reuse_recompute_pass]: 1.81003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.48201e-05 [before_grad]: 1.213e-05 [inplace_validation]: 4.76977e-06 [meta_fg_expand]: 4.94998e-06 [inplace_validation_after_expand]: 5.11017e-06 [flash_sp_send_recv_attached]: 8.10251e-07 [receive_attached]: 6.60308e-07 [after_resolve]: 9.33977e-06 [a_after_grad]: 1.21901e-05 [special_op_eliminate]: 7.40029e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 6.99889e-07 [auto_monad_grad]: 9.49949e-07 [auto_monad_eliminator]: 1.57403e-05 [cse]: 1.80202e-05 [a_3]: 4.81298e-05 [py_interpret_to_execute_after_opt_a]: 8.31019e-06 [slice_cell_reuse_recomputed_activation]: 2.08011e-06 [rewriter_after_opt_a]: 0.00012704 [convert_after_rewriter]: 8.16025e-06 [order_py_execute_after_rewriter]: 5.32018e-06 [opt_b]: 0.00023653, [1] [Cycle 1]: 0.0002316, [7] [b_1]: 0.00015952 [b_2]: 9.93023e-06 [updatestate_depend_eliminate]: 5.0799e-06 [updatestate_assign_eliminate]: 4.28967e-06 [updatestate_loads_eliminate]: 4.67012e-06 [renormalize]: 2.79862e-07 [cse]: 1.76197e-05 [optimize_parallel_all_gather_comm]: 7.46967e-06 [overlap_param_gather]: 9.20147e-07 [cconv]: 1.59396e-05 [loop_unroll]: 0.00047278 [opt_after_cconv]: 0.00012674, [1] [Cycle 1]: 0.00012103, [7] [c_1]: 5.10798e-05 [parameter_eliminate]: 2.04006e-06 [updatestate_depend_eliminate]: 7.11018e-06 [updatestate_assign_eliminate]: 4.42006e-06 [updatestate_loads_eliminate]: 5.49015e-06 [cse]: 1.971e-05 [renormalize]: 2.59839e-07 [remove_dup_value]: 1.00099e-05 [tuple_transform]: 6.674e-05, [1] [Cycle 1]: 6.261e-05, [2] [d_1]: 5.35101e-05 [renormalize]: 1.40164e-07 [partial_unused_args_eliminate]: 1.47009e-06 [add_cache_embedding]: 1.12001e-05 [add_recomputation]: 5.39399e-05 [cse_after_recomputation]: 2.56398e-05, [1] [Cycle 1]: 2.127e-05, [1] [cse]: 1.59401e-05 [environ_conv]: 6.10016e-06 [swap_dp_allreduce_reducescatter]: 6.96024e-06 [bias_add_comm_swap]: 1.62004e-06 [label_micro_interleaved_index]: 1.50967e-06 [label_fine_grained_interleaved_index]: 1.30991e-06 [merge_cast_opt]: 5.59725e-07 [slice_recompute_activation]: 8.30274e-07 [micro_interleaved_order_control]: 1.24983e-06 [assign_add_opt]: 6.44987e-06 [ForceFp32Comm]: 5.39701e-07 [remove_cast_before_assign_add]: 5.89993e-07 [full_micro_interleaved_order_control]: 1.21025e-06 [reorder_send_recv_between_fp_bp]: 1.47987e-06 [comm_op_add_attrs]: 5.89993e-07 [add_comm_op_reuse_tag]: 5.89993e-07 [interleave_split_concat_branches]: 5.89993e-07 [interleave_parallel_branches]: 6.10016e-07 [overlap_opt_shard_in_pipeline]: 1.11992e-06 [overlap_opt_shard_grad_in_pipeline]: 1.22981e-06 [control_data_broadcast_order]: 6.70087e-07 [grouped_pairwise_exchange_alltoall]: 6.50063e-07 [offloading_packed_experts]: 6.19795e-07 [overlap_recompute_and_grad_model_parallel]: 1.21025e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.29805e-07 [overlap_recompute_allgather_and_fa_grad]: 5.20144e-07 [overlap_grad_ring_attention]: 1.22003e-06 [overlap_grad_flash_sp]: 1.14399e-05 [begin_end_overlap_inline]: 3.90224e-07 [split_matmul_comm_elemetwise]: 1.22981e-06 [split_layernorm_comm]: 1.41002e-06 [handle_group_info]: 3.49712e-07 [symbol_engine_optimizer]: 8.35503e-05, [1] [Cycle 1]: 7.946e-05, [6] [build]: 4.23985e-06 [elim_shapecalc]: 1.16802e-05 [elim_not_effective]: 1.55601e-05 [opt_reshape]: 9.03988e-06 [fold_const_symbol]: 1.32299e-05 [renormalize]: 2.10013e-07 [pipeline_parallel_scheduler]: 1.00024e-06 [auto_monad_reorder]: 2.15699e-05 [get_jit_bprop_graph]: 2.90107e-07 [rewriter_after_jit_bprop_graph]: 2.60305e-07 [eliminate_special_op_node]: 0.00048271 [distribtued_split]: 3.31402e-05 [validate]: 3.00501e-05 [task_emit]: 0.0661722 [execute]: 7.30995e-06 Sums bootstrap : 0.000283s : 0.39% type_inference : 0.002153s : 2.98% auto_monad : 0.000097s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000519s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000009s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000010s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000216s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000008s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000415s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000037s : 0.05% optimize.opt_a.cse : 0.000042s : 0.06% optimize.opt_a.a_3 : 0.000104s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000127s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000473s : 0.65% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000000s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000483s : 0.67% distribtued_split : 0.000033s : 0.05% validate : 0.000030s : 0.04% task_emit : 0.066172s : 91.47% execute : 0.000007s : 0.01% Time group info: ------[substitution.] 0.000104 63 3.94% : 0.000004s : 2: substitution.depend_value_elim 1.91% : 0.000002s : 5: substitution.elim_not_effective 1.81% : 0.000002s : 5: substitution.fold_const_symbol 5.97% : 0.000006s : 6: substitution.graph_param_transform 48.65% : 0.000051s : 1: substitution.inline 4.70% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.48% : 0.000004s : 6: substitution.load_eliminater 2.11% : 0.000002s : 2: substitution.reduce_all_const_elim 7.06% : 0.000007s : 10: substitution.remove_not_recompute_node 2.19% : 0.000002s : 2: substitution.replace_old_param 9.63% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.55% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002130 2 89.85% : 0.001914s : 1: type_inference.infer 10.15% : 0.000216s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000050 1 100.00% : 0.000050s : 1: match.inline ------[predicate.] 0.000227 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.28% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.40% : 0.000005s : 25: predicate.arithmetic_simplify 0.88% : 0.000002s : 13: predicate.cast_eliminate 0.88% : 0.000002s : 12: predicate.check_bprop_eliminate 0.81% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.16% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.99% : 0.000005s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000000s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.30% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 5.62% : 0.000013s : 63: predicate.inline 1.09% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.98% : 0.000002s : 12: predicate.less_batch_normalization 1.62% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.53% : 0.000006s : 38: predicate.load_eliminater 1.28% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.70% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.79% : 0.000002s : 13: predicate.minmaximum_grad 0.64% : 0.000001s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.00% : 0.000002s : 13: predicate.reduce_eliminate 0.59% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.87% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.87% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 1.02% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.12% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.74% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.29% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.66% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.65% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.83% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.75% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.50% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000125 4 8.57% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.43% : 0.000114s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.084671 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000059s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000109s : 1: auto_monad 0.03% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000306s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.02% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.59% : 0.000495s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.57% : 0.000482s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001061s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.18% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.05% : 0.005121s : 1: opt_a 0.15% : 0.000131s : 1: opt_after_cconv 0.28% : 0.000239s : 1: opt_b 7.91% : 0.006699s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.25% : 0.000214s : 1: renormalize.infer 0.23% : 0.000197s : 1: renormalize.specialize 0.01% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000132s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 78.18% : 0.066196s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.56% : 0.002171s : 1: type_inference 0.07% : 0.000062s : 1: validate TotalTime = 0.0786038, [21] [bootstrap]: 0.00030105 [type_inference]: 0.00239247 [auto_monad]: 0.00011632 [graph_reusing]: 1.96975e-06 [inline]: 1.51014e-06 [parallel-infer-symbol]: 1.68011e-06 [pre_auto_parallel]: 2.39401e-05 [insert-virtual-dataset]: 2.37022e-06 [parallel-infer-symbol-second]: 3.49712e-07 [dataset_repeat_opt]: 1.20001e-06 [pipeline_split]: 1.39e-06 [optimize]: 0.00705226, [52] [py_interpret_to_execute]: 1.47303e-05 [rewriter_before_opt_a]: 3.52203e-05 [opt_a]: 0.00537708, [2] [Cycle 1]: 0.00156704, [43] [expand_dump_flag]: 2.92994e-06 [switch_simplify]: 2.70498e-05 [loop_unroll]: 1.34399e-05 [a_1]: 0.00033317 [recompute_prepare]: 8.78004e-06 [updatestate_depend_eliminate]: 8.84011e-06 [updatestate_assign_eliminate]: 5.77001e-06 [updatestate_loads_eliminate]: 7.56979e-06 [parameter_eliminate]: 3.28012e-06 [a_2]: 0.0001178 [accelerated_algorithm]: 8.21985e-06 [shard]: 1.8701e-06 [meta_shard_fg_expand]: 2.01804e-05 [shard_inline]: 9.26014e-06 [auto_parallel]: 1.23698e-05 [parallel]: 6.74976e-06 [flash_sp]: 8.88016e-06 [merge_comm]: 7.37002e-06 [allreduce_fusion]: 5.67036e-06 [matmul_add_comm_reduction]: 9.77004e-06 [allreduce_slice_to_reducescatter]: 3.89758e-07 [virtual_shard_identity]: 9.97027e-06 [virtual_dataset]: 8.1202e-06 [get_grad_eliminate_]: 8.2897e-06 [virtual_output]: 8.07969e-06 [merge_forward]: 6.08014e-06 [cell_reuse_recompute_pass]: 1.95997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.69598e-05 [before_grad]: 1.42399e-05 [inplace_validation]: 4.81028e-06 [meta_fg_expand]: 5.72018e-06 [inplace_validation_after_expand]: 5.91995e-06 [flash_sp_send_recv_attached]: 4.40003e-06 [receive_attached]: 2.37999e-06 [after_resolve]: 1.11898e-05 [a_after_grad]: 1.28299e-05 [special_op_eliminate]: 7.62008e-06 [renormalize]: 0.00047109 [add_forward_monad_depend]: 3.40026e-06 [auto_monad_grad]: 2.30968e-06 [auto_monad_eliminator]: 3.10699e-05 [cse]: 3.15402e-05 [a_3]: 5.70603e-05 [Cycle 2]: 0.00077837, [43] [expand_dump_flag]: 1.31037e-06 [switch_simplify]: 8.96025e-06 [loop_unroll]: 9.68995e-06 [a_1]: 0.0002024 [recompute_prepare]: 7.33975e-06 [updatestate_depend_eliminate]: 5.65965e-06 [updatestate_assign_eliminate]: 4.76977e-06 [updatestate_loads_eliminate]: 5.58002e-06 [parameter_eliminate]: 1.23028e-06 [a_2]: 0.00010389 [accelerated_algorithm]: 8.55001e-06 [shard]: 1.27032e-06 [meta_shard_fg_expand]: 2.63983e-06 [shard_inline]: 8.10018e-06 [auto_parallel]: 1.12699e-05 [parallel]: 3.7998e-06 [flash_sp]: 3.34019e-06 [merge_comm]: 6.21006e-06 [allreduce_fusion]: 5.07012e-06 [matmul_add_comm_reduction]: 8.15e-06 [allreduce_slice_to_reducescatter]: 2.89641e-07 [virtual_shard_identity]: 8.70973e-06 [virtual_dataset]: 7.56001e-06 [get_grad_eliminate_]: 7.25966e-06 [virtual_output]: 6.97002e-06 [merge_forward]: 4.97e-06 [cell_reuse_recompute_pass]: 2.04006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.53803e-05 [before_grad]: 1.21603e-05 [inplace_validation]: 4.31016e-06 [meta_fg_expand]: 4.72972e-06 [inplace_validation_after_expand]: 5.43008e-06 [flash_sp_send_recv_attached]: 9.20147e-07 [receive_attached]: 7.69738e-07 [after_resolve]: 9.35979e-06 [a_after_grad]: 1.14497e-05 [special_op_eliminate]: 7.08969e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.00006e-07 [auto_monad_grad]: 1.08033e-06 [auto_monad_eliminator]: 1.82102e-05 [cse]: 1.87601e-05 [a_3]: 4.829e-05 [py_interpret_to_execute_after_opt_a]: 8.88994e-06 [slice_cell_reuse_recomputed_activation]: 2.12993e-06 [rewriter_after_opt_a]: 0.00014305 [convert_after_rewriter]: 9.27038e-06 [order_py_execute_after_rewriter]: 5.53997e-06 [opt_b]: 0.00024214, [1] [Cycle 1]: 0.00023627, [7] [b_1]: 0.00016166 [b_2]: 9.80962e-06 [updatestate_depend_eliminate]: 5.34998e-06 [updatestate_assign_eliminate]: 4.63007e-06 [updatestate_loads_eliminate]: 5.49993e-06 [renormalize]: 3.70201e-07 [cse]: 1.80202e-05 [optimize_parallel_all_gather_comm]: 8.01031e-06 [overlap_param_gather]: 1.01002e-06 [cconv]: 2.04002e-05 [loop_unroll]: 0.00050182 [opt_after_cconv]: 0.00013435, [1] [Cycle 1]: 0.00012833, [7] [c_1]: 5.37201e-05 [parameter_eliminate]: 2.70968e-06 [updatestate_depend_eliminate]: 8.30041e-06 [updatestate_assign_eliminate]: 4.75021e-06 [updatestate_loads_eliminate]: 5.30016e-06 [cse]: 2.146e-05 [renormalize]: 3.39933e-07 [remove_dup_value]: 1.192e-05 [tuple_transform]: 6.84499e-05, [1] [Cycle 1]: 6.39199e-05, [2] [d_1]: 5.45601e-05 [renormalize]: 2.59839e-07 [partial_unused_args_eliminate]: 1.56043e-06 [add_cache_embedding]: 1.31195e-05 [add_recomputation]: 5.983e-05 [cse_after_recomputation]: 2.65203e-05, [1] [Cycle 1]: 2.15e-05, [1] [cse]: 1.65398e-05 [environ_conv]: 7.39982e-06 [swap_dp_allreduce_reducescatter]: 7.2699e-06 [bias_add_comm_swap]: 2.09967e-06 [label_micro_interleaved_index]: 1.62981e-06 [label_fine_grained_interleaved_index]: 1.57021e-06 [merge_cast_opt]: 1.07987e-06 [slice_recompute_activation]: 1.11014e-06 [micro_interleaved_order_control]: 1.68011e-06 [assign_add_opt]: 6.77956e-06 [ForceFp32Comm]: 1.10036e-06 [remove_cast_before_assign_add]: 6.3004e-07 [full_micro_interleaved_order_control]: 1.36998e-06 [reorder_send_recv_between_fp_bp]: 1.53016e-06 [comm_op_add_attrs]: 9.69972e-07 [add_comm_op_reuse_tag]: 1.2801e-06 [interleave_split_concat_branches]: 7.59959e-07 [interleave_parallel_branches]: 6.50063e-07 [overlap_opt_shard_in_pipeline]: 1.22981e-06 [overlap_opt_shard_grad_in_pipeline]: 2.12993e-06 [control_data_broadcast_order]: 1.13016e-06 [grouped_pairwise_exchange_alltoall]: 1.26986e-06 [offloading_packed_experts]: 1.04029e-06 [overlap_recompute_and_grad_model_parallel]: 1.47009e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.69855e-07 [overlap_recompute_allgather_and_fa_grad]: 8.39587e-07 [overlap_grad_ring_attention]: 1.85007e-06 [overlap_grad_flash_sp]: 1.40998e-05 [begin_end_overlap_inline]: 6.39819e-07 [split_matmul_comm_elemetwise]: 1.94041e-06 [split_layernorm_comm]: 1.60979e-06 [handle_group_info]: 9.60194e-07 [symbol_engine_optimizer]: 8.23098e-05, [1] [Cycle 1]: 7.79699e-05, [6] [build]: 3.70992e-06 [elim_shapecalc]: 1.17896e-05 [elim_not_effective]: 1.54101e-05 [opt_reshape]: 8.74e-06 [fold_const_symbol]: 1.31903e-05 [renormalize]: 2.70084e-07 [pipeline_parallel_scheduler]: 1.45985e-06 [auto_monad_reorder]: 2.86498e-05 [get_jit_bprop_graph]: 4.20026e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00051601 [distribtued_split]: 4.157e-05 [validate]: 9.96804e-05 [task_emit]: 0.067769 [execute]: 1.06301e-05 Sums bootstrap : 0.000301s : 0.40% type_inference : 0.002392s : 3.21% auto_monad : 0.000116s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000023s : 0.03% optimize.opt_a.a_1 : 0.000536s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000023s : 0.03% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000471s : 0.63% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.07% optimize.opt_a.cse : 0.000050s : 0.07% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000143s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000020s : 0.03% optimize.loop_unroll : 0.000502s : 0.67% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000516s : 0.69% distribtued_split : 0.000042s : 0.06% validate : 0.000100s : 0.13% task_emit : 0.067769s : 90.85% execute : 0.000011s : 0.01% TotalTime = 0.0788664, [21] [bootstrap]: 0.00030361 [type_inference]: 0.00238352 [auto_monad]: 0.00012431 [graph_reusing]: 2.19001e-06 [inline]: 1.20979e-06 [parallel-infer-symbol]: 1.36998e-06 [pre_auto_parallel]: 2.39499e-05 [insert-virtual-dataset]: 2.73995e-06 [parallel-infer-symbol-second]: 3.59956e-07 [dataset_repeat_opt]: 1.27964e-06 [pipeline_split]: 1.34017e-06 [optimize]: 0.00705678, [52] [py_interpret_to_execute]: 1.42199e-05 [rewriter_before_opt_a]: 3.597e-05 [opt_a]: 0.00535558, [2] [Cycle 1]: 0.00154074, [43] [expand_dump_flag]: 3.23029e-06 [switch_simplify]: 2.879e-05 [loop_unroll]: 1.39498e-05 [a_1]: 0.00033881 [recompute_prepare]: 8.93977e-06 [updatestate_depend_eliminate]: 8.40984e-06 [updatestate_assign_eliminate]: 6.04009e-06 [updatestate_loads_eliminate]: 6.8699e-06 [parameter_eliminate]: 2.88989e-06 [a_2]: 0.00011711 [accelerated_algorithm]: 8.6301e-06 [shard]: 2.05031e-06 [meta_shard_fg_expand]: 4.13973e-06 [shard_inline]: 8.66968e-06 [auto_parallel]: 1.28401e-05 [parallel]: 6.57979e-06 [flash_sp]: 9.01008e-06 [merge_comm]: 8.15978e-06 [allreduce_fusion]: 5.65965e-06 [matmul_add_comm_reduction]: 9.96003e-06 [allreduce_slice_to_reducescatter]: 4.29805e-07 [virtual_shard_identity]: 9.70997e-06 [virtual_dataset]: 8.33021e-06 [get_grad_eliminate_]: 7.53999e-06 [virtual_output]: 8.23988e-06 [merge_forward]: 5.60004e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.72001e-05 [before_grad]: 1.392e-05 [inplace_validation]: 5.13019e-06 [meta_fg_expand]: 5.71972e-06 [inplace_validation_after_expand]: 6.36e-06 [flash_sp_send_recv_attached]: 4.69992e-06 [receive_attached]: 2.33017e-06 [after_resolve]: 1.11898e-05 [a_after_grad]: 1.29398e-05 [special_op_eliminate]: 7.54977e-06 [renormalize]: 0.00043114 [add_forward_monad_depend]: 3.32016e-06 [auto_monad_grad]: 1.64006e-06 [auto_monad_eliminator]: 2.97101e-05 [cse]: 2.98303e-05 [a_3]: 5.95096e-05 [Cycle 2]: 0.00079391, [43] [expand_dump_flag]: 1.10036e-06 [switch_simplify]: 9.03988e-06 [loop_unroll]: 7.89994e-06 [a_1]: 0.00020163 [recompute_prepare]: 7.29039e-06 [updatestate_depend_eliminate]: 6.31018e-06 [updatestate_assign_eliminate]: 4.73019e-06 [updatestate_loads_eliminate]: 5.24009e-06 [parameter_eliminate]: 1.15996e-06 [a_2]: 0.00010474 [accelerated_algorithm]: 8.22963e-06 [shard]: 1.26986e-06 [meta_shard_fg_expand]: 2.80002e-06 [shard_inline]: 7.89016e-06 [auto_parallel]: 1.09999e-05 [parallel]: 3.76021e-06 [flash_sp]: 3.26987e-06 [merge_comm]: 5.91017e-06 [allreduce_fusion]: 5.19026e-06 [matmul_add_comm_reduction]: 7.91997e-06 [allreduce_slice_to_reducescatter]: 2.60305e-07 [virtual_shard_identity]: 8.80007e-06 [virtual_dataset]: 7.74022e-06 [get_grad_eliminate_]: 7.51019e-06 [virtual_output]: 7.25035e-06 [merge_forward]: 4.70039e-06 [cell_reuse_recompute_pass]: 1.8701e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.60201e-05 [before_grad]: 1.28001e-05 [inplace_validation]: 4.42006e-06 [meta_fg_expand]: 5.24987e-06 [inplace_validation_after_expand]: 5.4799e-06 [flash_sp_send_recv_attached]: 9.19681e-07 [receive_attached]: 6.79865e-07 [after_resolve]: 1.02799e-05 [a_after_grad]: 1.194e-05 [special_op_eliminate]: 7.19028e-06 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 8.39587e-07 [auto_monad_grad]: 1.70013e-06 [auto_monad_eliminator]: 1.805e-05 [cse]: 1.93398e-05 [a_3]: 4.926e-05 [py_interpret_to_execute_after_opt_a]: 9.35979e-06 [slice_cell_reuse_recomputed_activation]: 2.39024e-06 [rewriter_after_opt_a]: 0.00014782 [convert_after_rewriter]: 8.82009e-06 [order_py_execute_after_rewriter]: 5.62007e-06 [opt_b]: 0.00024597, [1] [Cycle 1]: 0.00023985, [7] [b_1]: 0.00016358 [b_2]: 9.79984e-06 [updatestate_depend_eliminate]: 5.43986e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.23031e-06 [renormalize]: 2.49594e-07 [cse]: 1.92001e-05 [optimize_parallel_all_gather_comm]: 8.46991e-06 [overlap_param_gather]: 1.20979e-06 [cconv]: 2.18302e-05 [loop_unroll]: 0.00050024 [opt_after_cconv]: 0.00013681, [1] [Cycle 1]: 0.00012993, [7] [c_1]: 5.33899e-05 [parameter_eliminate]: 2.56021e-06 [updatestate_depend_eliminate]: 8.42987e-06 [updatestate_assign_eliminate]: 4.63007e-06 [updatestate_loads_eliminate]: 5.13997e-06 [cse]: 2.19299e-05 [renormalize]: 4.30271e-07 [remove_dup_value]: 1.308e-05 [tuple_transform]: 6.94203e-05, [1] [Cycle 1]: 6.43297e-05, [2] [d_1]: 5.44102e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 1.93017e-06 [add_cache_embedding]: 1.36602e-05 [add_recomputation]: 5.983e-05 [cse_after_recomputation]: 2.77003e-05, [1] [Cycle 1]: 2.23303e-05, [1] [cse]: 1.72402e-05 [environ_conv]: 6.97002e-06 [swap_dp_allreduce_reducescatter]: 7.35e-06 [bias_add_comm_swap]: 2.21003e-06 [label_micro_interleaved_index]: 1.62981e-06 [label_fine_grained_interleaved_index]: 1.91992e-06 [merge_cast_opt]: 1.35973e-06 [slice_recompute_activation]: 1.60001e-06 [micro_interleaved_order_control]: 2.10991e-06 [assign_add_opt]: 6.90995e-06 [ForceFp32Comm]: 8.00006e-07 [remove_cast_before_assign_add]: 7.59959e-07 [full_micro_interleaved_order_control]: 1.7602e-06 [reorder_send_recv_between_fp_bp]: 1.73971e-06 [comm_op_add_attrs]: 9.09902e-07 [add_comm_op_reuse_tag]: 6.00237e-07 [interleave_split_concat_branches]: 5.99772e-07 [interleave_parallel_branches]: 6.70087e-07 [overlap_opt_shard_in_pipeline]: 9.09902e-07 [overlap_opt_shard_grad_in_pipeline]: 1.91014e-06 [control_data_broadcast_order]: 8.60076e-07 [grouped_pairwise_exchange_alltoall]: 7.40401e-07 [offloading_packed_experts]: 7.79983e-07 [overlap_recompute_and_grad_model_parallel]: 2.00002e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.2003e-07 [overlap_recompute_allgather_and_fa_grad]: 7.30157e-07 [overlap_grad_ring_attention]: 1.81003e-06 [overlap_grad_flash_sp]: 1.32998e-05 [begin_end_overlap_inline]: 7.70204e-07 [split_matmul_comm_elemetwise]: 1.85007e-06 [split_layernorm_comm]: 1.42027e-06 [handle_group_info]: 7.30157e-07 [symbol_engine_optimizer]: 8.59499e-05, [1] [Cycle 1]: 8.08802e-05, [6] [build]: 3.49013e-06 [elim_shapecalc]: 1.16904e-05 [elim_not_effective]: 1.61799e-05 [opt_reshape]: 8.61008e-06 [fold_const_symbol]: 1.36802e-05 [renormalize]: 2.40281e-07 [pipeline_parallel_scheduler]: 1.39978e-06 [auto_monad_reorder]: 2.706e-05 [get_jit_bprop_graph]: 4.00003e-07 [rewriter_after_jit_bprop_graph]: 3.70201e-07 [eliminate_special_op_node]: 0.00051321 [distribtued_split]: 3.76599e-05 [validate]: 8.169e-05 [task_emit]: 0.0680423 [execute]: 1.10902e-05 Sums bootstrap : 0.000304s : 0.41% type_inference : 0.002384s : 3.19% auto_monad : 0.000124s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000540s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000431s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000048s : 0.06% optimize.opt_a.cse : 0.000049s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000148s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000500s : 0.67% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000513s : 0.69% distribtued_split : 0.000038s : 0.05% validate : 0.000082s : 0.11% task_emit : 0.068042s : 90.96% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000124 63 5.55% : 0.000007s : 2: substitution.depend_value_elim 1.86% : 0.000002s : 5: substitution.elim_not_effective 2.02% : 0.000003s : 5: substitution.fold_const_symbol 5.21% : 0.000006s : 6: substitution.graph_param_transform 48.86% : 0.000060s : 1: substitution.inline 4.27% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.55% : 0.000004s : 6: substitution.load_eliminater 2.66% : 0.000003s : 2: substitution.reduce_all_const_elim 5.88% : 0.000007s : 10: substitution.remove_not_recompute_node 2.59% : 0.000003s : 2: substitution.replace_old_param 9.03% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.51% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002364 2 88.45% : 0.002091s : 1: type_inference.infer 11.55% : 0.000273s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000059 1 100.00% : 0.000059s : 1: match.inline ------[predicate.] 0.000229 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 0.98% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.77% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.13% : 0.000005s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.35% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.90% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.24% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.20% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.92% : 0.000004s : 31: predicate.environ_get_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.77% : 0.000002s : 12: predicate.get_grad_eliminate 0.24% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.65% : 0.000013s : 63: predicate.inline 0.95% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.09% : 0.000002s : 12: predicate.less_batch_normalization 1.74% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.47% : 0.000006s : 38: predicate.load_eliminater 1.32% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.91% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.70% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.81% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.17% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.86% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 1.16% : 0.000003s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.22% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000002s : 12: predicate.shard_identity_eliminate 1.26% : 0.000003s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 0.97% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.07% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.31% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.60% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.46% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.67% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.88% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.46% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.47% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000140 4 9.72% : 0.000014s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.28% : 0.000127s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087491 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.15% : 0.000129s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000324s : 1: bootstrap 0.03% : 0.000024s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.06% : 0.000050s : 1: distribtued_split 0.61% : 0.000530s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.58% : 0.000512s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001086s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 6.15% : 0.005381s : 1: opt_a 0.16% : 0.000139s : 1: opt_after_cconv 0.28% : 0.000245s : 1: opt_b 8.07% : 0.007060s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.32% : 0.000276s : 1: renormalize.infer 0.22% : 0.000189s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000149s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 77.49% : 0.067796s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.76% : 0.002411s : 1: type_inference 0.15% : 0.000135s : 1: validate TotalTime = 0.0791246, [21] [bootstrap]: 0.00028463 [type_inference]: 0.00215383 [auto_monad]: 9.78401e-05 [graph_reusing]: 2.05031e-06 [inline]: 1.60001e-06 [parallel-infer-symbol]: 1.3602e-06 [pre_auto_parallel]: 2.10102e-05 [insert-virtual-dataset]: 2.00002e-06 [parallel-infer-symbol-second]: 4.29805e-07 [dataset_repeat_opt]: 9.09902e-07 [pipeline_split]: 1.07009e-06 [optimize]: 0.0066917, [52] [py_interpret_to_execute]: 1.18902e-05 [rewriter_before_opt_a]: 2.982e-05 [opt_a]: 0.00512131, [2] [Cycle 1]: 0.00143968, [43] [expand_dump_flag]: 2.68966e-06 [switch_simplify]: 2.62898e-05 [loop_unroll]: 1.30897e-05 [a_1]: 0.00032487 [recompute_prepare]: 8.57981e-06 [updatestate_depend_eliminate]: 7.91997e-06 [updatestate_assign_eliminate]: 5.20982e-06 [updatestate_loads_eliminate]: 6.52997e-06 [parameter_eliminate]: 2.21003e-06 [a_2]: 0.0001129 [accelerated_algorithm]: 7.89994e-06 [shard]: 1.57999e-06 [meta_shard_fg_expand]: 3.09991e-06 [shard_inline]: 8.36002e-06 [auto_parallel]: 1.091e-05 [parallel]: 5.93998e-06 [flash_sp]: 8.80007e-06 [merge_comm]: 7.82963e-06 [allreduce_fusion]: 5.12041e-06 [matmul_add_comm_reduction]: 8.76002e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 9.07993e-06 [virtual_dataset]: 8.18027e-06 [get_grad_eliminate_]: 7.95024e-06 [virtual_output]: 7.16001e-06 [merge_forward]: 5.96e-06 [cell_reuse_recompute_pass]: 1.45007e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.62902e-05 [before_grad]: 1.30702e-05 [inplace_validation]: 4.64031e-06 [meta_fg_expand]: 5.28013e-06 [inplace_validation_after_expand]: 5.59026e-06 [flash_sp_send_recv_attached]: 3.93996e-06 [receive_attached]: 1.97999e-06 [after_resolve]: 1.04601e-05 [a_after_grad]: 1.25798e-05 [special_op_eliminate]: 7.59028e-06 [renormalize]: 0.00041649 [add_forward_monad_depend]: 2.71993e-06 [auto_monad_grad]: 1.67033e-06 [auto_monad_eliminator]: 2.54097e-05 [cse]: 2.50102e-05 [a_3]: 5.91697e-05 [Cycle 2]: 0.00076548, [43] [expand_dump_flag]: 9.89996e-07 [switch_simplify]: 8.86014e-06 [loop_unroll]: 7.83987e-06 [a_1]: 0.00019952 [recompute_prepare]: 7.15954e-06 [updatestate_depend_eliminate]: 5.9302e-06 [updatestate_assign_eliminate]: 4.44986e-06 [updatestate_loads_eliminate]: 4.84008e-06 [parameter_eliminate]: 1.02958e-06 [a_2]: 0.00010395 [accelerated_algorithm]: 8.19005e-06 [shard]: 1.20001e-06 [meta_shard_fg_expand]: 2.46987e-06 [shard_inline]: 8.09971e-06 [auto_parallel]: 1.00504e-05 [parallel]: 3.31039e-06 [flash_sp]: 2.42982e-06 [merge_comm]: 5.69969e-06 [allreduce_fusion]: 4.92996e-06 [matmul_add_comm_reduction]: 7.44965e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 9.19029e-06 [virtual_dataset]: 7.58981e-06 [get_grad_eliminate_]: 7.16001e-06 [virtual_output]: 7.01007e-06 [merge_forward]: 4.84008e-06 [cell_reuse_recompute_pass]: 1.6503e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55699e-05 [before_grad]: 1.26502e-05 [inplace_validation]: 4.26034e-06 [meta_fg_expand]: 4.71994e-06 [inplace_validation_after_expand]: 4.9402e-06 [flash_sp_send_recv_attached]: 1.13016e-06 [receive_attached]: 6.50063e-07 [after_resolve]: 9.70019e-06 [a_after_grad]: 1.15903e-05 [special_op_eliminate]: 7.53021e-06 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 8.69855e-07 [auto_monad_grad]: 1.00024e-06 [auto_monad_eliminator]: 1.60704e-05 [cse]: 1.84602e-05 [a_3]: 4.86602e-05 [py_interpret_to_execute_after_opt_a]: 9.0301e-06 [slice_cell_reuse_recomputed_activation]: 1.47009e-06 [rewriter_after_opt_a]: 0.00012998 [convert_after_rewriter]: 8.10996e-06 [order_py_execute_after_rewriter]: 5.60982e-06 [opt_b]: 0.00024022, [1] [Cycle 1]: 0.00023519, [7] [b_1]: 0.00016299 [b_2]: 9.64012e-06 [updatestate_depend_eliminate]: 5.15021e-06 [updatestate_assign_eliminate]: 4.33996e-06 [updatestate_loads_eliminate]: 4.65009e-06 [renormalize]: 3.29688e-07 [cse]: 1.74199e-05 [optimize_parallel_all_gather_comm]: 7.35e-06 [overlap_param_gather]: 1.07987e-06 [cconv]: 1.30096e-05 [loop_unroll]: 0.00047655 [opt_after_cconv]: 0.00012625, [1] [Cycle 1]: 0.00012009, [7] [c_1]: 5.07599e-05 [parameter_eliminate]: 1.69035e-06 [updatestate_depend_eliminate]: 6.92997e-06 [updatestate_assign_eliminate]: 4.68967e-06 [updatestate_loads_eliminate]: 5.24987e-06 [cse]: 1.98502e-05 [renormalize]: 3.40398e-07 [remove_dup_value]: 8.65012e-06 [tuple_transform]: 6.67199e-05, [1] [Cycle 1]: 6.20997e-05, [2] [d_1]: 5.32703e-05 [renormalize]: 2.19792e-07 [partial_unused_args_eliminate]: 1.22981e-06 [add_cache_embedding]: 1.05198e-05 [add_recomputation]: 5.02402e-05 [cse_after_recomputation]: 2.58898e-05, [1] [Cycle 1]: 2.10297e-05, [1] [cse]: 1.61002e-05 [environ_conv]: 5.83986e-06 [swap_dp_allreduce_reducescatter]: 6.45034e-06 [bias_add_comm_swap]: 1.93994e-06 [label_micro_interleaved_index]: 1.57021e-06 [label_fine_grained_interleaved_index]: 1.25961e-06 [merge_cast_opt]: 8.29808e-07 [slice_recompute_activation]: 1.20001e-06 [micro_interleaved_order_control]: 9.80217e-07 [assign_add_opt]: 7.28015e-06 [ForceFp32Comm]: 4.90341e-07 [remove_cast_before_assign_add]: 4.70318e-07 [full_micro_interleaved_order_control]: 1.05007e-06 [reorder_send_recv_between_fp_bp]: 9.49949e-07 [comm_op_add_attrs]: 4.90341e-07 [add_comm_op_reuse_tag]: 4.80097e-07 [interleave_split_concat_branches]: 4.49829e-07 [interleave_parallel_branches]: 4.00003e-07 [overlap_opt_shard_in_pipeline]: 5.39701e-07 [overlap_opt_shard_grad_in_pipeline]: 1.4198e-06 [control_data_broadcast_order]: 5.09899e-07 [grouped_pairwise_exchange_alltoall]: 5.60191e-07 [offloading_packed_experts]: 5.80214e-07 [overlap_recompute_and_grad_model_parallel]: 1.29966e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.19678e-07 [overlap_recompute_allgather_and_fa_grad]: 7.09668e-07 [overlap_grad_ring_attention]: 1.32015e-06 [overlap_grad_flash_sp]: 1.131e-05 [begin_end_overlap_inline]: 4.89876e-07 [split_matmul_comm_elemetwise]: 1.22003e-06 [split_layernorm_comm]: 1.45007e-06 [handle_group_info]: 6.89644e-07 [symbol_engine_optimizer]: 8.266e-05, [1] [Cycle 1]: 7.84602e-05, [6] [build]: 4.19002e-06 [elim_shapecalc]: 1.11898e-05 [elim_not_effective]: 1.569e-05 [opt_reshape]: 8.59005e-06 [fold_const_symbol]: 1.28201e-05 [renormalize]: 2.79862e-07 [pipeline_parallel_scheduler]: 1.24983e-06 [auto_monad_reorder]: 2.05599e-05 [get_jit_bprop_graph]: 3.30154e-07 [rewriter_after_jit_bprop_graph]: 3.09665e-07 [eliminate_special_op_node]: 0.00048583 [distribtued_split]: 3.18997e-05 [validate]: 2.90098e-05 [task_emit]: 0.0690678 [execute]: 8.82009e-06 Sums bootstrap : 0.000285s : 0.38% type_inference : 0.002154s : 2.86% auto_monad : 0.000098s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000524s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.02% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000217s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000417s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.06% optimize.opt_a.cse : 0.000043s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000130s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000013s : 0.02% optimize.loop_unroll : 0.000477s : 0.63% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000050s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000000s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbo TotalTime = 0.0790336, [21] [bootstrap]: 0.00030566 [type_inference]: 0.00241805 [auto_monad]: 0.00012873 [graph_reusing]: 2.62028e-06 [inline]: 1.11992e-06 [parallel-infer-symbol]: 2.07033e-06 [pre_auto_parallel]: 2.65697e-05 [insert-virtual-dataset]: 2.25008e-06 [parallel-infer-symbol-second]: 3.19909e-07 [dataset_repeat_opt]: 1.67033e-06 [pipeline_split]: 1.79e-06 [optimize]: 0.00691484, [52] [py_interpret_to_execute]: 1.53198e-05 [rewriter_before_opt_a]: 3.54e-05 [opt_a]: 0.00527148, [2] [Cycle 1]: 0.00152895, [43] [expand_dump_flag]: 4.08012e-06 [switch_simplify]: 2.92202e-05 [loop_unroll]: 1.29398e-05 [a_1]: 0.00033983 [recompute_prepare]: 9.45013e-06 [updatestate_depend_eliminate]: 9.41986e-06 [updatestate_assign_eliminate]: 6.04987e-06 [updatestate_loads_eliminate]: 7.20983e-06 [parameter_eliminate]: 3.47011e-06 [a_2]: 0.00013706 [accelerated_algorithm]: 8.38004e-06 [shard]: 1.57999e-06 [meta_shard_fg_expand]: 3.09013e-06 [shard_inline]: 8.83965e-06 [auto_parallel]: 1.268e-05 [parallel]: 4.50015e-06 [flash_sp]: 9.6499e-06 [merge_comm]: 6.8997e-06 [allreduce_fusion]: 5.51995e-06 [matmul_add_comm_reduction]: 1.08001e-05 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 9.33977e-06 [virtual_dataset]: 8.04011e-06 [get_grad_eliminate_]: 8.2301e-06 [virtual_output]: 8.11974e-06 [merge_forward]: 4.96022e-06 [cell_reuse_recompute_pass]: 1.70991e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.62199e-05 [before_grad]: 1.32797e-05 [inplace_validation]: 4.56022e-06 [meta_fg_expand]: 5.1898e-06 [inplace_validation_after_expand]: 5.91017e-06 [flash_sp_send_recv_attached]: 5.0799e-06 [receive_attached]: 2.23983e-06 [after_resolve]: 1.16401e-05 [a_after_grad]: 1.25901e-05 [special_op_eliminate]: 8.05967e-06 [renormalize]: 0.00042997 [add_forward_monad_depend]: 3.01981e-06 [auto_monad_grad]: 1.30991e-06 [auto_monad_eliminator]: 3.157e-05 [cse]: 3.21204e-05 [a_3]: 5.86701e-05 [Cycle 2]: 0.00077637, [43] [expand_dump_flag]: 1.07009e-06 [switch_simplify]: 9.17958e-06 [loop_unroll]: 7.79983e-06 [a_1]: 0.00019956 [recompute_prepare]: 7.68015e-06 [updatestate_depend_eliminate]: 5.96e-06 [updatestate_assign_eliminate]: 4.5402e-06 [updatestate_loads_eliminate]: 5.13019e-06 [parameter_eliminate]: 1.30013e-06 [a_2]: 0.00010238 [accelerated_algorithm]: 8.07969e-06 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 7.46036e-06 [auto_parallel]: 1.11302e-05 [parallel]: 3.48035e-06 [flash_sp]: 2.80002e-06 [merge_comm]: 6.25011e-06 [allreduce_fusion]: 5.35976e-06 [matmul_add_comm_reduction]: 7.98982e-06 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 8.78982e-06 [virtual_dataset]: 7.72998e-06 [get_grad_eliminate_]: 7.51019e-06 [virtual_output]: 7.43987e-06 [merge_forward]: 4.67012e-06 [cell_reuse_recompute_pass]: 1.77976e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.548e-05 [before_grad]: 1.27498e-05 [inplace_validation]: 4.57e-06 [meta_fg_expand]: 4.69992e-06 [inplace_validation_after_expand]: 5.15999e-06 [flash_sp_send_recv_attached]: 9.80217e-07 [receive_attached]: 6.3004e-07 [after_resolve]: 9.39984e-06 [l_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000021s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000486s : 0.65% distribtued_split : 0.000032s : 0.04% validate : 0.000029s : 0.04% task_emit : 0.069068s : 91.76% execute : 0.000009s : 0.01% a_after_grad]: 1.14003e-05 [special_op_eliminate]: 7.50972e-06 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 8.49832e-07 [auto_monad_grad]: 1.57999e-06 [auto_monad_eliminator]: 1.88104e-05 [cse]: 1.99401e-05 [a_3]: 4.95999e-05 [py_interpret_to_execute_after_opt_a]: 9.05991e-06 [slice_cell_reuse_recomputed_activation]: 1.91992e-06 [rewriter_after_opt_a]: 0.00014635 [convert_after_rewriter]: 8.97003e-06 [order_py_execute_after_rewriter]: 5.96e-06 [opt_b]: 0.00024086, [1] [Cycle 1]: 0.00023527, [7] [b_1]: 0.00016053 [b_2]: 9.54e-06 [updatestate_depend_eliminate]: 5.51017e-06 [updatestate_assign_eliminate]: 4.5104e-06 [updatestate_loads_eliminate]: 5.30016e-06 [renormalize]: 2.10013e-07 [cse]: 1.91801e-05 [optimize_parallel_all_gather_comm]: 8.46013e-06 [overlap_param_gather]: 1.53016e-06 [cconv]: 2.29999e-05 [loop_unroll]: 0.00046633 [opt_after_cconv]: 0.0001315, [1] [Cycle 1]: 0.00012564, [7] [c_1]: 5.29597e-05 [parameter_eliminate]: 2.44984e-06 [updatestate_depend_eliminate]: 7.87992e-06 [updatestate_assign_eliminate]: 4.64031e-06 [updatestate_loads_eliminate]: 5.24987e-06 [cse]: 2.13902e-05 [renormalize]: 3.09665e-07 [remove_dup_value]: 1.23503e-05 [tuple_transform]: 6.88704e-05, [1] [Cycle 1]: 6.45602e-05, [2] [d_1]: 5.53099e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 1.76998e-06 [add_cache_embedding]: 1.35303e-05 [add_recomputation]: 6.139e-05 [cse_after_recomputation]: 2.719e-05, [1] [Cycle 1]: 2.27802e-05, [1] [cse]: 1.767e-05 [environ_conv]: 6.02007e-06 [swap_dp_allreduce_reducescatter]: 7.22008e-06 [bias_add_comm_swap]: 2.19001e-06 [label_micro_interleaved_index]: 2.16998e-06 [label_fine_grained_interleaved_index]: 1.54972e-06 [merge_cast_opt]: 1.32015e-06 [slice_recompute_activation]: 1.60001e-06 [micro_interleaved_order_control]: 1.17999e-06 [assign_add_opt]: 7.41007e-06 [ForceFp32Comm]: 7.69738e-07 [remove_cast_before_assign_add]: 5.59725e-07 [full_micro_interleaved_order_control]: 1.70013e-06 [reorder_send_recv_between_fp_bp]: 2.16998e-06 [comm_op_add_attrs]: 8.89879e-07 [add_comm_op_reuse_tag]: 9.49949e-07 [interleave_split_concat_branches]: 4.60073e-07 [interleave_parallel_branches]: 4.60073e-07 [overlap_opt_shard_in_pipeline]: 6.19795e-07 [overlap_opt_shard_grad_in_pipeline]: 1.64006e-06 [control_data_broadcast_order]: 5.69969e-07 [grouped_pairwise_exchange_alltoall]: 1.01002e-06 [offloading_packed_experts]: 5.89993e-07 [overlap_recompute_and_grad_model_parallel]: 1.91992e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.30271e-07 [overlap_recompute_allgather_and_fa_grad]: 8.30274e-07 [overlap_grad_ring_attention]: 1.36998e-06 [overlap_grad_flash_sp]: 1.29999e-05 [begin_end_overlap_inline]: 6.39819e-07 [split_matmul_comm_elemetwise]: 1.47987e-06 [split_layernorm_comm]: 1.36998e-06 [handle_group_info]: 8.69855e-07 [symbol_engine_optimizer]: 8.43601e-05, [1] [Cycle 1]: 7.98199e-05, [6] [build]: 3.56976e-06 [elim_shapecalc]: 1.192e-05 [elim_not_effective]: 1.672e-05 [opt_reshape]: 8.61986e-06 [fold_const_symbol]: 1.32802e-05 [renormalize]: 1.80211e-07 [pipeline_parallel_scheduler]: 8.60076e-07 [auto_monad_reorder]: 2.73497e-05 [get_jit_bprop_graph]: 4.20026e-07 [rewriter_after_jit_bprop_graph]: 2.29571e-07 [eliminate_special_op_node]: 0.00048143 [distribtued_split]: 3.92902e-05 [validate]: 3.38703e-05 [task_emit]: 0.0683219 [execute]: 1.28401e-05 Sums bootstrap : 0.000306s : 0.41% type_inference : 0.002418s : 3.22% auto_monad : 0.000129s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.04% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000539s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000239s : 0.32% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000430s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.07% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000466s : 0.62% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000481s : 0.64% distribtued_split : 0.000039s : 0.05% validate : 0.000034s : 0.05% task_emit : 0.068322s : 91.06% execute : 0.000013s : 0.02% Time group info: ------[substitution.] 0.000126 63 4.91% : 0.000006s : 2: substitution.depend_value_elim 2.07% : 0.000003s : 5: substitution.elim_not_effective 1.88% : 0.000002s : 5: substitution.fold_const_symbol 4.73% : 0.000006s : 6: substitution.graph_param_transform 49.56% : 0.000062s : 1: substitution.inline 4.12% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.52% : 0.000004s : 6: substitution.load_eliminater 3.01% : 0.000004s : 2: substitution.reduce_all_const_elim 6.19% : 0.000008s : 10: substitution.remove_not_recompute_node 2.47% : 0.000003s : 2: substitution.replace_old_param 8.94% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.61% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002356 2 88.89% : 0.002094s : 1: type_inference.infer 11.11% : 0.000262s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000061 1 100.00% : 0.000061s : 1: match.inline ------[predicate.] 0.000225 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.05% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.22% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.30% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.29% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000002s : 19: predicate.environ_get_depend_swap 1.95% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000000s : 6: predicate.fold_const_symbol 0.77% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.47% : 0.000012s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.43% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.00% : 0.000002s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.29% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.31% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.82% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.83% : 0.000002s : 13: predicate.minmaximum_grad 0.80% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.03% : 0.000002s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.21% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.86% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 1.08% : 0.000002s : 12: predicate.same_eliminate 0.52% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.29% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.06% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.33% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.81% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.72% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.64% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.54% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.95% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000139 4 10.94% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.06% : 0.000124s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087735 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000138s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000328s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000013s : 1: convert_after_rewriter 0.04% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000046s : 1: distribtued_split 0.60% : 0.000528s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.58% : 0.000510s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001098s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.18% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.11% : 0.005359s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.28% : 0.000249s : 1: opt_b 8.05% : 0.007065s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000233s : 1: renormalize.infer 0.22% : 0.000193s : 1: renormalize.specialize 0.01% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000154s : 1: rewriter_after_opt_a 0.05% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000089s : 1: symbol_engine_optimizer 77.59% : 0.068072s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.74% : 0.002402s : 1: type_inference 0.13% : 0.000116s : 1: validate Time group info: ------[substitution.] 0.000114 63 5.01% : 0.000006s : 2: substitution.depend_value_elim 2.13% : 0.000002s : 5: substitution.elim_not_effective 1.71% : 0.000002s : 5: substitution.fold_const_symbol 5.04% : 0.000006s : 6: substitution.graph_param_transform 48.88% : 0.000056s : 1: substitution.inline 4.25% : 0.000005s : 10: substitution.j_node_and_user_rematch 4.25% : 0.000005s : 6: substitution.load_eliminater 2.07% : 0.000002s : 2: substitution.reduce_all_const_elim 6.94% : 0.000008s : 10: substitution.remove_not_recompute_node 2.28% : 0.000003s : 2: substitution.replace_old_param 9.30% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.14% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002130 2 89.89% : 0.001915s : 1: type_inference.infer 10.11% : 0.000215s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000055 1 100.00% : 0.000055s : 1: match.inline ------[predicate.] 0.000228 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.15% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.07% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.20% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 1.93% : 0.000004s : 31: predicate.environ_get_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.26% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.54% : 0.000013s : 63: predicate.inline 1.00% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.73% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.47% : 0.000006s : 38: predicate.load_eliminater 1.22% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.90% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.78% : 0.000002s : 13: predicate.minmaximum_grad 0.64% : 0.000001s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 0.98% : 0.000002s : 13: predicate.reduce_eliminate 0.70% : 0.000002s : 12: predicate.remove_not_recompute_node 1.07% : 0.000002s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.97% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 0.90% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.02% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.34% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.69% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.35% : 0.000010s : 43: predicate.switch_simplify 0.85% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.81% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.84% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.72% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.78% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.46% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.49% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000135 4 6.37% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.63% : 0.000127s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087579 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000055s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.12% : 0.000109s : 1: auto_monad 0.03% : 0.000027s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000306s : 1: bootstrap 0.02% : 0.000016s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000039s : 1: distribtued_split 0.57% : 0.000498s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000485s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000013s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001069s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000044s : 4: opt.transform.symbol_engine_opt 5.85% : 0.005125s : 1: opt_a 0.15% : 0.000130s : 1: opt_after_cconv 0.28% : 0.000243s : 1: opt_b 7.65% : 0.006700s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.25% : 0.000216s : 1: renormalize.infer 0.22% : 0.000196s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000135s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000009s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 78.89% : 0.069092s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.48% : 0.002170s : 1: type_inference 0.07% : 0.000060s : 1: validate Time group info: ------[substitution.] 0.000126 63 3.90% : 0.000005s : 2: substitution.depend_value_elim 2.22% : 0.000003s : 5: substitution.elim_not_effective 1.50% : 0.000002s : 5: substitution.fold_const_symbol 5.84% : 0.000007s : 6: substitution.graph_param_transform 52.25% : 0.000066s : 1: substitution.inline 4.14% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.26% : 0.000004s : 6: substitution.load_eliminater 1.85% : 0.000002s : 2: substitution.reduce_all_const_elim 5.54% : 0.000007s : 10: substitution.remove_not_recompute_node 2.78% : 0.000004s : 2: substitution.replace_old_param 8.73% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.00% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002390 2 88.17% : 0.002107s : 1: type_inference.infer 11.83% : 0.000283s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000227 1420 0.89% : 0.000002s : 13: predicate.accumulaten_eliminater 1.25% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.15% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.87% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.41% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.89% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.99% : 0.000005s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.26% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.95% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.75% : 0.000013s : 63: predicate.inline 0.96% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.69% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000005s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.91% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.82% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.82% : 0.000002s : 13: predicate.minmaximum_grad 0.86% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 0.97% : 0.000002s : 13: predicate.reduce_eliminate 0.61% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.77% : 0.000002s : 13: predicate.reshape_eliminate 0.76% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.10% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.80% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.32% : 0.000010s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.81% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.55% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.43% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.80% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.30% : 0.000007s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.83% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000159 4 9.99% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.01% : 0.000143s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087766 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000066s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000141s : 1: auto_monad 0.04% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000333s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.02% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.56% : 0.000495s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000475s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.27% : 0.001113s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.01% : 0.005275s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.28% : 0.000244s : 1: opt_b 7.89% : 0.006923s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.26% : 0.000229s : 1: renormalize.infer 0.22% : 0.000195s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000152s : 1: rewriter_after_opt_a 0.05% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000087s : 1: symbol_engine_optimizer 77.88% : 0.068355s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.78% : 0.002436s : 1: type_inference 0.15% : 0.000129s : 1: validate TotalTime = 0.082697, [21] [bootstrap]: 0.00030452 [type_inference]: 0.00241719 [auto_monad]: 0.00012968 [graph_reusing]: 2.48011e-06 [inline]: 1.27032e-06 [parallel-infer-symbol]: 2.31015e-06 [pre_auto_parallel]: 2.567e-05 [insert-virtual-dataset]: 3.22983e-06 [parallel-infer-symbol-second]: 3.70201e-07 [dataset_repeat_opt]: 1.11014e-06 [pipeline_split]: 1.47987e-06 [optimize]: 0.00698496, [52] [py_interpret_to_execute]: 1.54898e-05 [rewriter_before_opt_a]: 3.47099e-05 [opt_a]: 0.00529435, [2] [Cycle 1]: 0.0015306, [43] [expand_dump_flag]: 3.11015e-06 [switch_simplify]: 2.86498e-05 [loop_unroll]: 1.32201e-05 [a_1]: 0.0003344 [recompute_prepare]: 9.38028e-06 [updatestate_depend_eliminate]: 8.48016e-06 [updatestate_assign_eliminate]: 5.48968e-06 [updatestate_loads_eliminate]: 6.97002e-06 [parameter_eliminate]: 2.84007e-06 [a_2]: 0.00011906 [accelerated_algorithm]: 8.65012e-06 [shard]: 2.01026e-06 [meta_shard_fg_expand]: 1.02599e-05 [shard_inline]: 8.72975e-06 [auto_parallel]: 1.24699e-05 [parallel]: 7.49016e-06 [flash_sp]: 1.07498e-05 [merge_comm]: 7.81985e-06 [allreduce_fusion]: 5.1097e-06 [matmul_add_comm_reduction]: 1.01002e-05 [allreduce_slice_to_reducescatter]: 4.69852e-07 [virtual_shard_identity]: 9.47015e-06 [virtual_dataset]: 8.06013e-06 [get_grad_eliminate_]: 7.6401e-06 [virtual_output]: 7.56001e-06 [merge_forward]: 5.72018e-06 [cell_reuse_recompute_pass]: 2.04006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.64802e-05 [before_grad]: 1.388e-05 [inplace_validation]: 5.28013e-06 [meta_fg_expand]: 5.53019e-06 [inplace_validation_after_expand]: 6.14021e-06 [flash_sp_send_recv_attached]: 4.99003e-06 [receive_attached]: 2.56998e-06 [after_resolve]: 1.13002e-05 [a_after_grad]: 1.23801e-05 [special_op_eliminate]: 7.62008e-06 [renormalize]: 0.00044031 [add_forward_monad_depend]: 3.93018e-06 [auto_monad_grad]: 2.08011e-06 [auto_monad_eliminator]: 3.256e-05 [cse]: 3.14796e-05 [a_3]: 5.71599e-05 [Cycle 2]: 0.00077737, [43] [expand_dump_flag]: 1.13994e-06 [switch_simplify]: 9.04966e-06 [loop_unroll]: 7.79005e-06 [a_1]: 0.00020018 [recompute_prepare]: 7.35e-06 [updatestate_depend_eliminate]: 6.20028e-06 [updatestate_assign_eliminate]: 4.82006e-06 [updatestate_loads_eliminate]: 5.51017e-06 [parameter_eliminate]: 1.28988e-06 [a_2]: 0.00010588 [accelerated_algorithm]: 8.40006e-06 [shard]: 1.13994e-06 [meta_shard_fg_expand]: 2.47965e-06 [shard_inline]: 7.75e-06 [auto_parallel]: 1.14697e-05 [parallel]: 3.23029e-06 [flash_sp]: 3.3197e-06 [merge_comm]: 6.17001e-06 [allreduce_fusion]: 4.6799e-06 [matmul_add_comm_reduction]: 7.71973e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.77958e-06 [virtual_dataset]: 7.41007e-06 [get_grad_eliminate_]: 7.20005e-06 [virtual_output]: 6.76978e-06 [merge_forward]: 4.86011e-06 [cell_reuse_recompute_pass]: 1.9297e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.533e-05 [before_grad]: 1.255e-05 [inplace_validation]: 4.40981e-06 [meta_fg_expand]: 4.73019e-06 [inplace_validation_after_expand]: 5.22006e-06 [flash_sp_send_recv_attached]: 9.20147e-07 [receive_attached]: 7.10133e-07 [after_resolve]: 1.00499e-05 [a_after_grad]: 1.19e-05 [special_op_eliminate]: 7.16979e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.40053e-07 [auto_monad_grad]: 1.14972e-06 [auto_monad_eliminator]: 1.81203e-05 [cse]: 1.91396e-05 [a_3]: 4.84302e-05 [py_interpret_to_execute_after_opt_a]: 9.80031e-06 [slice_cell_reuse_recomputed_activation]: 2.25008e-06 [rewriter_after_opt_a]: 0.0001425 [convert_after_rewriter]: 9.98983e-06 [order_py_execute_after_rewriter]: 6.8699e-06 [opt_b]: 0.00024194, [1] [Cycle 1]: 0.0002364, [7] [b_1]: 0.0001619 [b_2]: 9.73977e-06 [updatestate_depend_eliminate]: 5.29038e-06 [updatestate_assign_eliminate]: 4.35999e-06 [updatestate_loads_eliminate]: 5.11995e-06 [renormalize]: 3.59956e-07 [cse]: 1.84202e-05 [optimize_parallel_all_gather_comm]: 8.08993e-06 [overlap_param_gather]: 1.53994e-06 [cconv]: 2.21697e-05 [loop_unroll]: 0.00049634 [opt_after_cconv]: 0.00013319, [1] [Cycle 1]: 0.00012688, [7] [c_1]: 5.19603e-05 [parameter_eliminate]: 2.55974e-06 [updatestate_depend_eliminate]: 8.41031e-06 [updatestate_assign_eliminate]: 4.84986e-06 [updatestate_loads_eliminate]: 5.39003e-06 [cse]: 2.14102e-05 [renormalize]: 5.79748e-07 [remove_dup_value]: 1.30497e-05 [tuple_transform]: 7.06101e-05, [1] [Cycle 1]: 6.58403e-05, [2] [d_1]: 5.63296e-05 [renormalize]: 2.70084e-07 [partial_unused_args_eliminate]: 1.91014e-06 [add_cache_embedding]: 1.24099e-05 [add_recomputation]: 6.22203e-05 [cse_after_recomputation]: 2.639e-05, [1] [Cycle 1]: 2.17296e-05, [1] [cse]: 1.66101e-05 [environ_conv]: 7.51019e-06 [swap_dp_allreduce_reducescatter]: 7.31042e-06 [bias_add_comm_swap]: 2.14018e-06 [label_micro_interleaved_index]: 2.04984e-06 [label_fine_grained_interleaved_index]: 2.13971e-06 [merge_cast_opt]: 1.23028e-06 [slice_recompute_activation]: 1.68011e-06 [micro_interleaved_order_control]: 1.95997e-06 [assign_add_opt]: 7.34022e-06 [ForceFp32Comm]: 8.29808e-07 [remove_cast_before_assign_add]: 7.39936e-07 [full_micro_interleaved_order_control]: 2.00002e-06 [reorder_send_recv_between_fp_bp]: 2.32039e-06 [comm_op_add_attrs]: 6.3004e-07 [add_comm_op_reuse_tag]: 1.09989e-06 [interleave_split_concat_branches]: 8.60076e-07 [interleave_parallel_branches]: 6.50063e-07 [overlap_opt_shard_in_pipeline]: 9.4017e-07 [overlap_opt_shard_grad_in_pipeline]: 2.28034e-06 [control_data_broadcast_order]: 1.21025e-06 [grouped_pairwise_exchange_alltoall]: 1.45985e-06 [offloading_packed_experts]: 9.19681e-07 [overlap_recompute_and_grad_model_parallel]: 2.10991e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.27032e-06 [overlap_recompute_allgather_and_fa_grad]: 7.70204e-07 [overlap_grad_ring_attention]: 1.38022e-06 [overlap_grad_flash_sp]: 1.39601e-05 [begin_end_overlap_inline]: 8.10251e-07 [split_matmul_comm_elemetwise]: 1.75973e-06 [split_layernorm_comm]: 1.57021e-06 [handle_group_info]: 9.39704e-07 [symbol_engine_optimizer]: 8.502e-05, [1] [Cycle 1]: 8.01701e-05, [6] [build]: 4.10015e-06 [elim_shapecalc]: 1.19698e-05 [elim_not_effective]: 1.61803e-05 [opt_reshape]: 8.40006e-06 [fold_const_symbol]: 1.34199e-05 [renormalize]: 2.70084e-07 [pipeline_parallel_scheduler]: 1.53016e-06 [auto_monad_reorder]: 2.66302e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.10248e-07 [eliminate_special_op_node]: 0.00050824 [distribtued_split]: 4.06899e-05 [validate]: 3.477e-05 [task_emit]: 0.0719002 [execute]: 3.89903e-05 Sums bootstrap : 0.000305s : 0.39% type_inference : 0.002417s : 3.07% auto_monad : 0.000130s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000535s : 0.68% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000013s : 0.02% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000440s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.06% optimize.opt_a.cse : 0.000051s : 0.06% optimize.opt_a.a_3 : 0.000106s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000143s : 0.18% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000496s : 0.63% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000027s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000508s : 0.65% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.071900s : 91.37% execute : 0.000039s : 0.05% TotalTime = 0.0826456, [21] [bootstrap]: 0.00033106 [type_inference]: 0.00263148 [auto_monad]: 0.00014588 [graph_reusing]: 2.76975e-06 [inline]: 1.62981e-06 [parallel-infer-symbol]: 2.35019e-06 [pre_auto_parallel]: 2.738e-05 [insert-virtual-dataset]: 2.95974e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 1.27032e-06 [pipeline_split]: 1.68988e-06 [optimize]: 0.00784444, [52] [py_interpret_to_execute]: 1.75503e-05 [rewriter_before_opt_a]: 4.14196e-05 [opt_a]: 0.00594747, [2] [Cycle 1]: 0.00175394, [43] [expand_dump_flag]: 4.00003e-06 [switch_simplify]: 3.40799e-05 [loop_unroll]: 1.61203e-05 [a_1]: 0.00041172 [recompute_prepare]: 1.072e-05 [updatestate_depend_eliminate]: 9.6797e-06 [updatestate_assign_eliminate]: 5.91017e-06 [updatestate_loads_eliminate]: 7.83987e-06 [parameter_eliminate]: 3.47989e-06 [a_2]: 0.00014068 [accelerated_algorithm]: 1.04299e-05 [shard]: 2.14996e-06 [meta_shard_fg_expand]: 4.38979e-06 [shard_inline]: 1.04397e-05 [auto_parallel]: 1.28802e-05 [parallel]: 8.06991e-06 [flash_sp]: 1.22502e-05 [merge_comm]: 9.22009e-06 [allreduce_fusion]: 6.55977e-06 [matmul_add_comm_reduction]: 1.10902e-05 [allreduce_slice_to_reducescatter]: 4.69852e-07 [virtual_shard_identity]: 1.169e-05 [virtual_dataset]: 9.97027e-06 [get_grad_eliminate_]: 1.00802e-05 [virtual_output]: 9.0301e-06 [merge_forward]: 7.17025e-06 [cell_reuse_recompute_pass]: 1.93017e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.15899e-05 [before_grad]: 1.708e-05 [inplace_validation]: 5.97024e-06 [meta_fg_expand]: 6.31995e-06 [inplace_validation_after_expand]: 7.66991e-06 [flash_sp_send_recv_attached]: 5.56977e-06 [receive_attached]: 2.78e-06 [after_resolve]: 1.35601e-05 [a_after_grad]: 1.58199e-05 [special_op_eliminate]: 9.52976e-06 [renormalize]: 0.00048021 [add_forward_monad_depend]: 3.76999e-06 [auto_monad_grad]: 1.7602e-06 [auto_monad_eliminator]: 3.56399e-05 [cse]: 3.63099e-05 [a_3]: 7.11903e-05 [Cycle 2]: 0.00092865, [43] [expand_dump_flag]: 1.17999e-06 [switch_simplify]: 1.12001e-05 [loop_unroll]: 9.34023e-06 [a_1]: 0.00025391 [recompute_prepare]: 9.20007e-06 [updatestate_depend_eliminate]: 6.36978e-06 [updatestate_assign_eliminate]: 5.17024e-06 [updatestate_loads_eliminate]: 6.29015e-06 [parameter_eliminate]: 1.45985e-06 [a_2]: 0.0001284 [accelerated_algorithm]: 1.057e-05 [shard]: 1.30013e-06 [meta_shard_fg_expand]: 2.95974e-06 [shard_inline]: 9.79006e-06 [auto_parallel]: 1.228e-05 [parallel]: 4.07966e-06 [flash_sp]: 3.89991e-06 [merge_comm]: 7.05011e-06 [allreduce_fusion]: 6.20028e-06 [matmul_add_comm_reduction]: 8.55979e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 1.14096e-05 [virtual_dataset]: 9.41986e-06 [get_grad_eliminate_]: 9.05013e-06 [virtual_output]: 8.73022e-06 [merge_forward]: 5.16977e-06 [cell_reuse_recompute_pass]: 1.95019e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.91899e-05 [before_grad]: 1.61198e-05 [inplace_validation]: 4.66965e-06 [meta_fg_expand]: 5.68014e-06 [inplace_validation_after_expand]: 6.05965e-06 [flash_sp_send_recv_attached]: 9.49949e-07 [receive_attached]: 7.90227e-07 [after_resolve]: 1.18399e-05 [a_after_grad]: 1.453e-05 [special_op_eliminate]: 9.09995e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.40053e-07 [auto_monad_grad]: 9.79751e-07 [auto_monad_eliminator]: 2.06297e-05 [cse]: 2.16002e-05 [a_3]: 5.943e-05 [py_interpret_to_execute_after_opt_a]: 9.59961e-06 [slice_cell_reuse_recomputed_activation]: 2.38977e-06 [rewriter_after_opt_a]: 0.00014957 [convert_after_rewriter]: 1.116e-05 [order_py_execute_after_rewriter]: 7.07014e-06 [opt_b]: 0.00028472, [1] [Cycle 1]: 0.00027885, [7] [b_1]: 0.00019599 [b_2]: 1.14101e-05 [updatestate_depend_eliminate]: 5.98002e-06 [updatestate_assign_eliminate]: 5.09992e-06 [updatestate_loads_eliminate]: 5.56977e-06 [renormalize]: 3.19909e-07 [cse]: 2.03596e-05 [optimize_parallel_all_gather_comm]: 8.81962e-06 [overlap_param_gather]: 1.72993e-06 [cconv]: 2.51699e-05 [loop_unroll]: 0.0005025 [opt_after_cconv]: 0.00015213, [1] [Cycle 1]: 0.00014587, [7] [c_1]: 6.50301e-05 [parameter_eliminate]: 2.86987e-06 [updatestate_depend_eliminate]: 8.84989e-06 [updatestate_assign_eliminate]: 5.22984e-06 [updatestate_loads_eliminate]: 5.87991e-06 [cse]: 2.35103e-05 [renormalize]: 3.39933e-07 [remove_dup_value]: 5.15101e-05 [tuple_transform]: 8.54996e-05, [1] [Cycle 1]: 8.03201e-05, [2] [d_1]: 7.011e-05 [renormalize]: 2.20258e-07 [partial_unused_args_eliminate]: 2.33995e-06 [add_cache_embedding]: 1.44402e-05 [add_recomputation]: 6.95698e-05 [cse_after_recomputation]: 2.96501e-05, [1] [Cycle 1]: 2.45799e-05, [1] [cse]: 1.90898e-05 [environ_conv]: 8.82009e-06 [swap_dp_allreduce_reducescatter]: 8.21007e-06 [bias_add_comm_swap]: 2.74042e-06 [label_micro_interleaved_index]: 2.19001e-06 [label_fine_grained_interleaved_index]: 2.48989e-06 [merge_cast_opt]: 1.41002e-06 [slice_recompute_activation]: 2.46009e-06 [micro_interleaved_order_control]: 2.30968e-06 [assign_add_opt]: 8.47969e-06 [ForceFp32Comm]: 9.00123e-07 [remove_cast_before_assign_add]: 1.22981e-06 [full_micro_interleaved_order_control]: 2.29012e-06 [reorder_send_recv_between_fp_bp]: 2.4098e-06 [comm_op_add_attrs]: 1.07009e-06 [add_comm_op_reuse_tag]: 1.39e-06 [interleave_split_concat_branches]: 9.4017e-07 [interleave_parallel_branches]: 7.5018e-07 [overlap_opt_shard_in_pipeline]: 1.24983e-06 [overlap_opt_shard_grad_in_pipeline]: 2.47033e-06 [control_data_broadcast_order]: 1.24006e-06 [grouped_pairwise_exchange_alltoall]: 1.55997e-06 [offloading_packed_experts]: 1.24006e-06 [overlap_recompute_and_grad_model_parallel]: 2.07964e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.26986e-06 [overlap_recompute_allgather_and_fa_grad]: 1.30013e-06 [overlap_grad_ring_attention]: 2.02004e-06 [overlap_grad_flash_sp]: 1.81701e-05 [begin_end_overlap_inline]: 8.40053e-07 [split_matmul_comm_elemetwise]: 2.22959e-06 [split_layernorm_comm]: 1.95019e-06 [handle_group_info]: 1.27964e-06 [symbol_engine_optimizer]: 0.00010034, [1] [Cycle 1]: 9.55202e-05, [6] [build]: 4.69992e-06 [elim_shapecalc]: 1.44099e-05 [elim_not_effective]: 2.00197e-05 [opt_reshape]: 1.05598e-05 [fold_const_symbol]: 1.73799e-05 [renormalize]: 2.90107e-07 [pipeline_parallel_scheduler]: 1.91992e-06 [auto_monad_reorder]: 3.296e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 6.99889e-07 [eliminate_special_op_node]: 0.00052123 [distribtued_split]: 4.64902e-05 [validate]: 3.932e-05 [task_emit]: 0.0707316 [execute]: 1.26897e-05 Sums bootstrap : 0.000331s : 0.42% type_inference : 0.002631s : 3.36% auto_monad : 0.000146s : 0.19% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000018s : 0.02% optimize.rewriter_before_opt_a : 0.000041s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000045s : 0.06% optimize.opt_a.loop_unroll : 0.000025s : 0.03% optimize.opt_a.a_1 : 0.000666s : 0.85% optimize.opt_a.recompute_prepare : 0.000020s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000269s : 0.34% optimize.opt_a.accelerated_algorithm : 0.000021s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000020s : 0.03% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000013s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000023s : 0.03% optimize.opt_a.virtual_dataset : 0.000019s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000019s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000041s : 0.05% optimize.opt_a.before_grad : 0.000033s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000025s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.04% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000480s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000056s : 0.07% optimize.opt_a.cse : 0.000058s : 0.07% optimize.opt_a.a_3 : 0.000131s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000150s : 0.19% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000196s : 0.25% optimize.opt_b.b_2 : 0.000011s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000503s : 0.64% optimize.opt_after_cconv.c_1 : 0.000065s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000052s : 0.07% optimize.tuple_transform.d_1 : 0.000070s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000070s : 0.09% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000009s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000521s : 0.67% distribtued_split : 0.000046s : 0.06% validate : 0.000039s : 0.05% task_emit : 0.070732s : 90.33% execute : 0.000013s : 0.02% Time group info: ------[substitution.] 0.000125 63 5.16% : 0.000006s : 2: substitution.depend_value_elim 2.15% : 0.000003s : 5: substitution.elim_not_effective 1.83% : 0.000002s : 5: substitution.fold_const_symbol 5.95% : 0.000007s : 6: substitution.graph_param_transform 48.73% : 0.000061s : 1: substitution.inline 4.31% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.18% : 0.000004s : 6: substitution.load_eliminater 2.68% : 0.000003s : 2: substitution.reduce_all_const_elim 6.03% : 0.000008s : 10: substitution.remove_not_recompute_node 2.90% : 0.000004s : 2: substitution.replace_old_param 8.96% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.10% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002388 2 87.94% : 0.002100s : 1: type_inference.infer 12.06% : 0.000288s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000060 1 100.00% : 0.000060s : 1: match.inline ------[predicate.] 0.000229 1420 0.84% : 0.000002s : 13: predicate.accumulaten_eliminater 1.09% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.18% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.21% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.19% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.95% : 0.000004s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.49% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.65% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.98% : 0.000002s : 12: predicate.less_batch_normalization 1.79% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.51% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.81% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.42% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.31% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 0.94% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.90% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.36% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.41% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.86% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.49% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 3.04% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.50% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.50% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000161 4 9.69% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.31% : 0.000145s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091492 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000142s : 1: auto_monad 0.04% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000328s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.57% : 0.000523s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.05% : 0.000049s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000506s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.19% : 0.001088s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 5.79% : 0.005299s : 1: opt_a 0.15% : 0.000137s : 1: opt_after_cconv 0.27% : 0.000245s : 1: opt_b 7.64% : 0.006993s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000238s : 1: renormalize.infer 0.22% : 0.000197s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000148s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000088s : 1: symbol_engine_optimizer 78.62% : 0.071932s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.66% : 0.002436s : 1: type_inference 0.10% : 0.000095s : 1: validate TotalTime = 0.0830135, [21] [bootstrap]: 0.00030553 [type_inference]: 0.00241786 [auto_monad]: 0.00012866 [graph_reusing]: 2.2999e-06 [inline]: 1.58977e-06 [parallel-infer-symbol]: 2.06009e-06 [pre_auto_parallel]: 2.71299e-05 [insert-virtual-dataset]: 2.91038e-06 [parallel-infer-symbol-second]: 4.49829e-07 [dataset_repeat_opt]: 1.79978e-06 [pipeline_split]: 1.85007e-06 [optimize]: 0.00691431, [52] [py_interpret_to_execute]: 1.51098e-05 [rewriter_before_opt_a]: 3.55099e-05 [opt_a]: 0.00527229, [2] [Cycle 1]: 0.00152671, [43] [expand_dump_flag]: 3.03006e-06 [switch_simplify]: 2.93804e-05 [loop_unroll]: 1.33701e-05 [a_1]: 0.00033839 [recompute_prepare]: 9.12976e-06 [updatestate_depend_eliminate]: 9.32999e-06 [updatestate_assign_eliminate]: 6.36e-06 [updatestate_loads_eliminate]: 7.26013e-06 [parameter_eliminate]: 3.55998e-06 [a_2]: 0.00013188 [accelerated_algorithm]: 8.98959e-06 [shard]: 2.02982e-06 [meta_shard_fg_expand]: 3.69037e-06 [shard_inline]: 8.44989e-06 [auto_parallel]: 1.23698e-05 [parallel]: 7.01984e-06 [flash_sp]: 1.133e-05 [merge_comm]: 7.98004e-06 [allreduce_fusion]: 5.60004e-06 [matmul_add_comm_reduction]: 1.11302e-05 [allreduce_slice_to_reducescatter]: 4.99655e-07 [virtual_shard_identity]: 9.72021e-06 [virtual_dataset]: 8.17981e-06 [get_grad_eliminate_]: 7.6401e-06 [virtual_output]: 7.70995e-06 [merge_forward]: 5.51995e-06 [cell_reuse_recompute_pass]: 1.76998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.68597e-05 [before_grad]: 1.35498e-05 [inplace_validation]: 4.65009e-06 [meta_fg_expand]: 5.64987e-06 [inplace_validation_after_expand]: 6.25988e-06 [flash_sp_send_recv_attached]: 5.14975e-06 [receive_attached]: 1.77976e-06 [after_resolve]: 1.17901e-05 [a_after_grad]: 1.33999e-05 [special_op_eliminate]: 8.2301e-06 [renormalize]: 0.00042435 [add_forward_monad_depend]: 3.47989e-06 [auto_monad_grad]: 2.33017e-06 [auto_monad_eliminator]: 3.296e-05 [cse]: 3.22899e-05 [a_3]: 5.83599e-05 [Cycle 2]: 0.00077857, [43] [expand_dump_flag]: 1.13994e-06 [switch_simplify]: 9.41008e-06 [loop_unroll]: 7.64988e-06 [a_1]: 0.00020075 [recompute_prepare]: 7.18003e-06 [updatestate_depend_eliminate]: 6.18957e-06 [updatestate_assign_eliminate]: 4.82984e-06 [updatestate_loads_eliminate]: 5.59958e-06 [parameter_eliminate]: 1.64006e-06 [a_2]: 0.00010805 [accelerated_algorithm]: 8.6301e-06 [shard]: 1.13994e-06 [meta_shard_fg_expand]: 2.46987e-06 [shard_inline]: 7.68993e-06 [auto_parallel]: 1.06799e-05 [parallel]: 3.74019e-06 [flash_sp]: 2.54996e-06 [merge_comm]: 5.72018e-06 [allreduce_fusion]: 5.15999e-06 [matmul_add_comm_reduction]: 7.81007e-06 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 8.82987e-06 [virtual_dataset]: 7.71973e-06 [get_grad_eliminate_]: 7.56979e-06 [virtual_output]: 7.12043e-06 [merge_forward]: 4.59002e-06 [cell_reuse_recompute_pass]: 1.73971e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52802e-05 [before_grad]: 1.22902e-05 [inplace_validation]: 4.29014e-06 [meta_fg_expand]: 4.75999e-06 [inplace_validation_after_expand]: 5.30016e-06 [flash_sp_send_recv_attached]: 9.89996e-07 [receive_attached]: 7.19912e-07 [after_resolve]: 9.56981e-06 [a_after_grad]: 1.15498e-05 [special_op_eliminate]: 7.18981e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.79634e-07 [auto_monad_grad]: 1.45985e-06 [auto_monad_eliminator]: 1.788e-05 [cse]: 1.98497e-05 [a_3]: 4.84101e-05 [py_interpret_to_execute_after_opt_a]: 9.39006e-06 [slice_cell_reuse_recomputed_activation]: 1.54972e-06 [rewriter_after_opt_a]: 0.00014568 [convert_after_rewriter]: 8.38982e-06 [order_py_execute_after_rewriter]: 4.97978e-06 [opt_b]: 0.00025212, [1] [Cycle 1]: 0.00024611, [7] [b_1]: 0.00016839 [b_2]: 9.89018e-06 [updatestate_depend_eliminate]: 5.57024e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.45988e-06 [renormalize]: 5.09899e-07 [cse]: 1.95797e-05 [optimize_parallel_all_gather_comm]: 7.86036e-06 [overlap_param_gather]: 8.29808e-07 [cconv]: 1.49901e-05 [loop_unroll]: 0.00046995 [opt_after_cconv]: 0.00013413, [1] [Cycle 1]: 0.00012791, [7] [c_1]: 5.31799e-05 [parameter_eliminate]: 2.63005e-06 [updatestate_depend_eliminate]: 8.29017e-06 [updatestate_assign_eliminate]: 4.97978e-06 [updatestate_loads_eliminate]: 5.26989e-06 [cse]: 2.17697e-05 [renormalize]: 5.39701e-07 [remove_dup_value]: 8.66968e-06 [tuple_transform]: 6.79498e-05, [1] [Cycle 1]: 6.32699e-05, [2] [d_1]: 5.385e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.30013e-06 [add_cache_embedding]: 1.16099e-05 [add_recomputation]: 5.97099e-05 [cse_after_recomputation]: 2.68798e-05, [1] [Cycle 1]: 2.19997e-05, [1] [cse]: 1.70898e-05 [environ_conv]: 6.8401e-06 [swap_dp_allreduce_reducescatter]: 7.49994e-06 [bias_add_comm_swap]: 1.95019e-06 [label_micro_interleaved_index]: 2.1304e-06 [label_fine_grained_interleaved_index]: 1.79978e-06 [merge_cast_opt]: 1.12969e-06 [slice_recompute_activation]: 1.91014e-06 [micro_interleaved_order_control]: 1.56974e-06 [assign_add_opt]: 6.8401e-06 [ForceFp32Comm]: 9.00123e-07 [remove_cast_before_assign_add]: 9.09902e-07 [full_micro_interleaved_order_control]: 1.93994e-06 [reorder_send_recv_between_fp_bp]: 2.25985e-06 [comm_op_add_attrs]: 9.69972e-07 [add_comm_op_reuse_tag]: 8.89879e-07 [interleave_split_concat_branches]: 7.79983e-07 [interleave_parallel_branches]: 6.3004e-07 [overlap_opt_shard_in_pipeline]: 9.09902e-07 [overlap_opt_shard_grad_in_pipeline]: 1.66008e-06 [control_data_broadcast_order]: 9.00123e-07 [grouped_pairwise_exchange_alltoall]: 1.11014e-06 [offloading_packed_experts]: 9.60194e-07 [overlap_recompute_and_grad_model_parallel]: 1.68988e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.29691e-07 [overlap_recompute_allgather_and_fa_grad]: 1.10967e-06 [overlap_grad_ring_attention]: 1.51014e-06 [overlap_grad_flash_sp]: 1.30502e-05 [begin_end_overlap_inline]: 6.39819e-07 [split_matmul_comm_elemetwise]: 1.50967e-06 [split_layernorm_comm]: 1.46031e-06 [handle_group_info]: 8.40053e-07 [symbol_engine_optimizer]: 8.365e-05, [1] [Cycle 1]: 7.89999e-05, [6] [build]: 3.32016e-06 [elim_shapecalc]: 1.21896e-05 [elim_not_effective]: 1.58502e-05 [opt_reshape]: 8.93977e-06 [fold_const_symbol]: 1.33803e-05 [renormalize]: 1.8999e-07 [pipeline_parallel_scheduler]: 1.34995e-06 [auto_monad_reorder]: 2.81804e-05 [get_jit_bprop_graph]: 2.70084e-07 [rewriter_after_jit_bprop_graph]: 7.49715e-07 [eliminate_special_op_node]: 0.00047934 [distribtued_split]: 4.08902e-05 [validate]: 3.37898e-05 [task_emit]: 0.0723109 [execute]: 7.18003e-06 Sums bootstrap : 0.000306s : 0.39% type_inference : 0.002418s : 3.06% auto_monad : 0.000129s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000539s : 0.68% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000240s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.54% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.06% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000168s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000470s : 0.59% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.01% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000028s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000479s : 0.61% distribtued_split : 0.000041s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.072311s : 91.52% execute : 0.000007s : 0.01% Time group info: ------[substitution.] 0.000152 63 5.14% : 0.000008s : 2: substitution.depend_value_elim 2.20% : 0.000003s : 5: substitution.elim_not_effective 2.18% : 0.000003s : 5: substitution.fold_const_symbol 6.15% : 0.000009s : 6: substitution.graph_param_transform 46.89% : 0.000071s : 1: substitution.inline 4.45% : 0.000007s : 10: substitution.j_node_and_user_rematch 3.46% : 0.000005s : 6: substitution.load_eliminater 2.71% : 0.000004s : 2: substitution.reduce_all_const_elim 7.18% : 0.000011s : 10: substitution.remove_not_recompute_node 2.59% : 0.000004s : 2: substitution.replace_old_param 9.07% : 0.000014s : 6: substitution.updatestate_pure_node_eliminater 7.98% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002600 2 88.16% : 0.002292s : 1: type_inference.infer 11.84% : 0.000308s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000070 1 100.00% : 0.000070s : 1: match.inline ------[predicate.] 0.000272 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 1.14% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.27% : 0.000006s : 25: predicate.arithmetic_simplify 0.78% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.37% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.79% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.50% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 2.00% : 0.000005s : 31: predicate.environ_get_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.17% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 6.09% : 0.000017s : 63: predicate.inline 1.09% : 0.000003s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000003s : 12: predicate.less_batch_normalization 1.72% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000007s : 38: predicate.load_eliminater 1.29% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.25% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.88% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.09% : 0.000003s : 14: predicate.partial_defer_inline 1.34% : 0.000004s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000003s : 13: predicate.reduce_eliminate 0.58% : 0.000002s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 0.93% : 0.000003s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.41% : 0.000004s : 18: predicate.special_op_eliminate 1.11% : 0.000003s : 12: predicate.specialize_transform 1.15% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.41% : 0.000007s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.17% : 0.000011s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.66% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.49% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000005s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000007s : 38: predicate.updatestate_pure_node_eliminater 3.33% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.49% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000167 4 10.75% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.25% : 0.000149s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092656 192 0.01% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000074s : 1: add_recomputation 0.01% : 0.000013s : 1: assign_add_opt 0.17% : 0.000160s : 1: auto_monad 0.04% : 0.000040s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000357s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.04% : 0.000033s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000055s : 1: distribtued_split 0.58% : 0.000535s : 1: eliminate_special_op_node 0.01% : 0.000013s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000005s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000513s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.45% : 0.001342s : 80: opt.transform.opt_a 0.07% : 0.000063s : 1: opt.transform.opt_after_cconv 0.20% : 0.000183s : 27: opt.transform.opt_b 0.07% : 0.000068s : 1: opt.transform.opt_trans_graph 0.04% : 0.000039s : 3: opt.transform.special_op_eliminate 0.06% : 0.000058s : 4: opt.transform.symbol_engine_opt 6.42% : 0.005951s : 1: opt_a 0.17% : 0.000157s : 1: opt_after_cconv 0.31% : 0.000288s : 1: opt_b 8.48% : 0.007853s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000022s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000023s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.06% : 0.000057s : 1: remove_dup_value 0.28% : 0.000257s : 1: renormalize.infer 0.23% : 0.000217s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000156s : 1: rewriter_after_opt_a 0.05% : 0.000046s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000006s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000104s : 1: symbol_engine_optimizer 76.38% : 0.070767s : 1: task_emit 0.10% : 0.000089s : 1: tuple_transform 2.86% : 0.002651s : 1: type_inference 0.08% : 0.000076s : 1: validate Time group info: ------[substitution.] 0.000129 63 5.45% : 0.000007s : 2: substitution.depend_value_elim 2.01% : 0.000003s : 5: substitution.elim_not_effective 1.90% : 0.000002s : 5: substitution.fold_const_symbol 4.51% : 0.000006s : 6: substitution.graph_param_transform 50.53% : 0.000065s : 1: substitution.inline 4.04% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.55% : 0.000005s : 6: substitution.load_eliminater 2.87% : 0.000004s : 2: substitution.reduce_all_const_elim 6.15% : 0.000008s : 10: substitution.remove_not_recompute_node 2.62% : 0.000003s : 2: substitution.replace_old_param 9.15% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.22% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002388 2 88.22% : 0.002107s : 1: type_inference.infer 11.78% : 0.000281s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000229 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.08% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.32% : 0.000005s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.30% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.25% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.88% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.88% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.70% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.12% : 0.000003s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.34% : 0.000005s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.78% : 0.000002s : 13: predicate.minmaximum_grad 0.80% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.44% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.32% : 0.000003s : 19: predicate.partial_eliminate 0.90% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.06% : 0.000002s : 13: predicate.reduce_eliminate 0.63% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.51% : 0.000001s : 12: predicate.replace_old_param 0.30% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 0.96% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.04% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.75% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.43% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.88% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.65% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.49% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.60% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.48% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.41% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.56% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000143 4 6.38% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.62% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091749 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.15% : 0.000141s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000333s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.54% : 0.000493s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.52% : 0.000479s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001113s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000159s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.75% : 0.005276s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.28% : 0.000255s : 1: opt_b 7.54% : 0.006922s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000008s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.01% : 0.000012s : 1: remove_dup_value 0.25% : 0.000229s : 1: renormalize.infer 0.21% : 0.000190s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000151s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000086s : 1: symbol_engine_optimizer 78.84% : 0.072333s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.65% : 0.002436s : 1: type_inference 0.15% : 0.000138s : 1: validate TotalTime = 0.0774403, [21] [bootstrap]: 0.00027834 [type_inference]: 0.0022299 [auto_monad]: 9.62699e-05 [graph_reusing]: 1.91014e-06 [inline]: 1.11992e-06 [parallel-infer-symbol]: 1.23028e-06 [pre_auto_parallel]: 2.11797e-05 [insert-virtual-dataset]: 1.86032e-06 [parallel-infer-symbol-second]: 3.69735e-07 [dataset_repeat_opt]: 1.35973e-06 [pipeline_split]: 1.07009e-06 [optimize]: 0.00677071, [52] [py_interpret_to_execute]: 1.26399e-05 [rewriter_before_opt_a]: 3.00701e-05 [opt_a]: 0.0051478, [2] [Cycle 1]: 0.00141861, [43] [expand_dump_flag]: 2.59001e-06 [switch_simplify]: 2.57702e-05 [loop_unroll]: 1.30902e-05 [a_1]: 0.00032287 [recompute_prepare]: 8.74e-06 [updatestate_depend_eliminate]: 7.56001e-06 [updatestate_assign_eliminate]: 5.53997e-06 [updatestate_loads_eliminate]: 5.72018e-06 [parameter_eliminate]: 2.27988e-06 [a_2]: 0.00011605 [accelerated_algorithm]: 8.47038e-06 [shard]: 1.78022e-06 [meta_shard_fg_expand]: 3.09013e-06 [shard_inline]: 8.57981e-06 [auto_parallel]: 1.152e-05 [parallel]: 6.19981e-06 [flash_sp]: 7.51019e-06 [merge_comm]: 7.89994e-06 [allreduce_fusion]: 5.25033e-06 [matmul_add_comm_reduction]: 8.85036e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 9.84967e-06 [virtual_dataset]: 7.8599e-06 [get_grad_eliminate_]: 7.40029e-06 [virtual_output]: 7.43987e-06 [merge_forward]: 4.84008e-06 [cell_reuse_recompute_pass]: 1.47009e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.66502e-05 [before_grad]: 1.38003e-05 [inplace_validation]: 4.6799e-06 [meta_fg_expand]: 5.12041e-06 [inplace_validation_after_expand]: 5.4501e-06 [flash_sp_send_recv_attached]: 3.34019e-06 [receive_attached]: 1.6503e-06 [after_resolve]: 1.055e-05 [a_after_grad]: 1.25896e-05 [special_op_eliminate]: 7.85012e-06 [renormalize]: 0.00038943 [add_forward_monad_depend]: 2.42004e-06 [auto_monad_grad]: 1.30013e-06 [auto_monad_eliminator]: 2.43699e-05 [cse]: 2.643e-05 [a_3]: 5.85499e-05 [Cycle 2]: 0.00076368, [43] [expand_dump_flag]: 1.11014e-06 [switch_simplify]: 9.01986e-06 [loop_unroll]: 8.00984e-06 [a_1]: 0.00020015 [recompute_prepare]: 7.39004e-06 [updatestate_depend_eliminate]: 5.70994e-06 [updatestate_assign_eliminate]: 4.39025e-06 [updatestate_loads_eliminate]: 4.92996e-06 [parameter_eliminate]: 1.08033e-06 [a_2]: 0.00010317 [accelerated_algorithm]: 8.46991e-06 [shard]: 1.07987e-06 [meta_shard_fg_expand]: 2.42004e-06 [shard_inline]: 7.75978e-06 [auto_parallel]: 1.00001e-05 [parallel]: 3.18e-06 [flash_sp]: 2.4098e-06 [merge_comm]: 5.8501e-06 [allreduce_fusion]: 4.78001e-06 [matmul_add_comm_reduction]: 7.01007e-06 [allreduce_slice_to_reducescatter]: 2.60305e-07 [virtual_shard_identity]: 8.45967e-06 [virtual_dataset]: 7.70995e-06 [get_grad_eliminate_]: 7.41985e-06 [virtual_output]: 7.13998e-06 [merge_forward]: 4.21004e-06 [cell_reuse_recompute_pass]: 1.72015e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.533e-05 [before_grad]: 1.22003e-05 [inplace_validation]: 4.42006e-06 [meta_fg_expand]: 4.81028e-06 [inplace_validation_after_expand]: 4.84986e-06 [flash_sp_send_recv_attached]: 8.60076e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 9.83011e-06 [a_after_grad]: 1.17198e-05 [special_op_eliminate]: 7.28015e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.19912e-07 [auto_monad_grad]: 9.00123e-07 [auto_monad_eliminator]: 1.52001e-05 [cse]: 1.83401e-05 [a_3]: 4.869e-05 [py_interpret_to_execute_after_opt_a]: 8.3698e-06 [slice_cell_reuse_recomputed_activation]: 1.93017e-06 [rewriter_after_opt_a]: 0.0001329 [convert_after_rewriter]: 8.47969e-06 [order_py_execute_after_rewriter]: 5.56e-06 [opt_b]: 0.00024149, [1] [Cycle 1]: 0.00023644, [7] [b_1]: 0.00016266 [b_2]: 9.77004e-06 [updatestate_depend_eliminate]: 5.16977e-06 [updatestate_assign_eliminate]: 4.38001e-06 [updatestate_loads_eliminate]: 4.98025e-06 [renormalize]: 2.00234e-07 [cse]: 1.80602e-05 [optimize_parallel_all_gather_comm]: 7.78027e-06 [overlap_param_gather]: 9.10368e-07 [cconv]: 1.53696e-05 [loop_unroll]: 0.00051543 [opt_after_cconv]: 0.00012708, [1] [Cycle 1]: 0.00012148, [7] [c_1]: 5.166e-05 [parameter_eliminate]: 1.83005e-06 [updatestate_depend_eliminate]: 7.51996e-06 [updatestate_assign_eliminate]: 4.42984e-06 [updatestate_loads_eliminate]: 4.70039e-06 [cse]: 1.98502e-05 [renormalize]: 4.50294e-07 [remove_dup_value]: 1.03898e-05 [tuple_transform]: 6.62301e-05, [1] [Cycle 1]: 6.2e-05, [2] [d_1]: 5.31399e-05 [renormalize]: 1.50409e-07 [partial_unused_args_eliminate]: 1.41002e-06 [add_cache_embedding]: 1.055e-05 [add_recomputation]: 5.225e-05 [cse_after_recomputation]: 2.56402e-05, [1] [Cycle 1]: 2.13301e-05, [1] [cse]: 1.62199e-05 [environ_conv]: 6.96024e-06 [swap_dp_allreduce_reducescatter]: 6.99004e-06 [bias_add_comm_swap]: 1.68011e-06 [label_micro_interleaved_index]: 1.19023e-06 [label_fine_grained_interleaved_index]: 1.34017e-06 [merge_cast_opt]: 6.70087e-07 [slice_recompute_activation]: 9.69972e-07 [micro_interleaved_order_control]: 1.34995e-06 [assign_add_opt]: 6.51972e-06 [ForceFp32Comm]: 6.10016e-07 [remove_cast_before_assign_add]: 6.39819e-07 [full_micro_interleaved_order_control]: 1.64984e-06 [reorder_send_recv_between_fp_bp]: 1.09989e-06 [comm_op_add_attrs]: 5.79748e-07 [add_comm_op_reuse_tag]: 5.89993e-07 [interleave_split_concat_branches]: 5.20144e-07 [interleave_parallel_branches]: 5.89993e-07 [overlap_opt_shard_in_pipeline]: 1.09011e-06 [overlap_opt_shard_grad_in_pipeline]: 1.17021e-06 [control_data_broadcast_order]: 6.49597e-07 [grouped_pairwise_exchange_alltoall]: 6.20261e-07 [offloading_packed_experts]: 6.10016e-07 [overlap_recompute_and_grad_model_parallel]: 1.07009e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.39701e-07 [overlap_recompute_allgather_and_fa_grad]: 5.89993e-07 [overlap_grad_ring_attention]: 1.15996e-06 [overlap_grad_flash_sp]: 1.16299e-05 [begin_end_overlap_inline]: 4.69852e-07 [split_matmul_comm_elemetwise]: 1.18976e-06 [split_layernorm_comm]: 1.15996e-06 [handle_group_info]: 5.29923e-07 [symbol_engine_optimizer]: 8.28402e-05, [1] [Cycle 1]: 7.89398e-05, [6] [build]: 3.8296e-06 [elim_shapecalc]: 1.173e-05 [elim_not_effective]: 1.51503e-05 [opt_reshape]: 8.74e-06 [fold_const_symbol]: 1.38301e-05 [renormalize]: 2.40281e-07 [pipeline_parallel_scheduler]: 9.39704e-07 [auto_monad_reorder]: 2.27699e-05 [get_jit_bprop_graph]: 2.89641e-07 [rewriter_after_jit_bprop_graph]: 4.60073e-07 [eliminate_special_op_node]: 0.00048679 [distribtued_split]: 3.35802e-05 [validate]: 3.07402e-05 [task_emit]: 0.0672317 [execute]: 7.64988e-06 Sums bootstrap : 0.000278s : 0.38% type_inference : 0.002230s : 3.03% auto_monad : 0.000096s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000523s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000219s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000390s : 0.53% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000133s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000515s : 0.70% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000487s : 0.66% distribtued_split : 0.000034s : 0.05% validate : 0.000031s : 0.04% task_emit : 0.067232s : 91.43% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000109 63 4.51% : 0.000005s : 2: substitution.depend_value_elim 1.94% : 0.000002s : 5: substitution.elim_not_effective 1.89% : 0.000002s : 5: substitution.fold_const_symbol 5.95% : 0.000006s : 6: substitution.graph_param_transform 48.47% : 0.000053s : 1: substitution.inline 4.73% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.54% : 0.000004s : 6: substitution.load_eliminater 2.71% : 0.000003s : 2: substitution.reduce_all_const_elim 6.80% : 0.000007s : 10: substitution.remove_not_recompute_node 2.21% : 0.000002s : 2: substitution.replace_old_param 9.21% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.05% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002206 2 90.36% : 0.001993s : 1: type_inference.infer 9.64% : 0.000213s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000052 1 100.00% : 0.000052s : 1: match.inline ------[predicate.] 0.000226 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 0.92% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.30% : 0.000005s : 25: predicate.arithmetic_simplify 0.85% : 0.000002s : 13: predicate.cast_eliminate 0.84% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.24% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.10% : 0.000002s : 19: predicate.environ_add_const_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 1.87% : 0.000004s : 31: predicate.environ_get_eliminate 1.20% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.81% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.78% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.70% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.43% : 0.000005s : 38: predicate.load_eliminater 1.07% : 0.000002s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.83% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.78% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 1.05% : 0.000002s : 12: predicate.specialize_transform 1.07% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.27% : 0.000010s : 43: predicate.switch_simplify 0.84% : 0.000002s : 13: predicate.tile_eliminate 0.87% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 3.08% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.74% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.47% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.60% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.77% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000121 4 8.42% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.58% : 0.000111s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.085950 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000057s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000108s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000004s : 1: bias_add_comm_swap 0.35% : 0.000302s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000040s : 1: distribtued_split 0.58% : 0.000499s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.01% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.61% : 0.000525s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000013s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001071s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.18% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 5.99% : 0.005151s : 1: opt_a 0.15% : 0.000131s : 1: opt_after_cconv 0.28% : 0.000244s : 1: opt_b 7.89% : 0.006778s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000207s : 1: renormalize.infer 0.21% : 0.000177s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000139s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 78.25% : 0.067256s : 1: task_emit 0.08% : 0.000069s : 1: tuple_transform 2.61% : 0.002246s : 1: type_inference 0.07% : 0.000061s : 1: validate TotalTime = 0.0779172, [21] [bootstrap]: 0.00030064 [type_inference]: 0.00239141 [auto_monad]: 0.00012179 [graph_reusing]: 2.38977e-06 [inline]: 1.64984e-06 [parallel-infer-symbol]: 1.85007e-06 [pre_auto_parallel]: 2.58302e-05 [insert-virtual-dataset]: 2.54996e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 1.17021e-06 [pipeline_split]: 1.14972e-06 [optimize]: 0.0069764, [52] [py_interpret_to_execute]: 1.472e-05 [rewriter_before_opt_a]: 3.42e-05 [opt_a]: 0.00529403, [2] [Cycle 1]: 0.0015059, [43] [expand_dump_flag]: 3.36021e-06 [switch_simplify]: 2.99001e-05 [loop_unroll]: 1.31298e-05 [a_1]: 0.00034188 [recompute_prepare]: 9.06968e-06 [updatestate_depend_eliminate]: 8.17003e-06 [updatestate_assign_eliminate]: 6.84988e-06 [updatestate_loads_eliminate]: 5.96e-06 [parameter_eliminate]: 2.59001e-06 [a_2]: 0.00011772 [accelerated_algorithm]: 8.55001e-06 [shard]: 1.43005e-06 [meta_shard_fg_expand]: 3.62005e-06 [shard_inline]: 8.39029e-06 [auto_parallel]: 1.17901e-05 [parallel]: 6.62031e-06 [flash_sp]: 1.02697e-05 [merge_comm]: 8.04011e-06 [allreduce_fusion]: 5.19026e-06 [matmul_add_comm_reduction]: 1.034e-05 [allreduce_slice_to_reducescatter]: 3.60422e-07 [virtual_shard_identity]: 9.24012e-06 [virtual_dataset]: 8.5202e-06 [get_grad_eliminate_]: 7.53999e-06 [virtual_output]: 7.58003e-06 [merge_forward]: 5.91017e-06 [cell_reuse_recompute_pass]: 1.9297e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.66199e-05 [before_grad]: 1.38599e-05 [inplace_validation]: 4.59002e-06 [meta_fg_expand]: 5.43986e-06 [inplace_validation_after_expand]: 6.32042e-06 [flash_sp_send_recv_attached]: 4.19002e-06 [receive_attached]: 1.68011e-06 [after_resolve]: 1.10604e-05 [a_after_grad]: 1.28401e-05 [special_op_eliminate]: 7.93021e-06 [renormalize]: 0.00042714 [add_forward_monad_depend]: 3.51993e-06 [auto_monad_grad]: 1.17021e-06 [auto_monad_eliminator]: 3.11998e-05 [cse]: 3.14098e-05 [a_3]: 5.71599e-05 [Cycle 2]: 0.00076889, [43] [expand_dump_flag]: 1.11992e-06 [switch_simplify]: 9.36026e-06 [loop_unroll]: 7.72998e-06 [a_1]: 0.00020124 [recompute_prepare]: 7.53999e-06 [updatestate_depend_eliminate]: 6.36e-06 [updatestate_assign_eliminate]: 4.4601e-06 [updatestate_loads_eliminate]: 4.86011e-06 [parameter_eliminate]: 8.801e-07 [a_2]: 0.00010321 [accelerated_algorithm]: 8.32975e-06 [shard]: 1.03004e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 7.89016e-06 [auto_parallel]: 1.032e-05 [parallel]: 3.87989e-06 [flash_sp]: 3.39001e-06 [merge_comm]: 5.74999e-06 [allreduce_fusion]: 4.99981e-06 [matmul_add_comm_reduction]: 8.00984e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 8.84989e-06 [virtual_dataset]: 7.64988e-06 [get_grad_eliminate_]: 7.41007e-06 [virtual_output]: 6.93975e-06 [merge_forward]: 4.63007e-06 [cell_reuse_recompute_pass]: 1.64006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50101e-05 [before_grad]: 1.22199e-05 [inplace_validation]: 4.14019e-06 [meta_fg_expand]: 4.49969e-06 [inplace_validation_after_expand]: 4.93973e-06 [flash_sp_send_recv_attached]: 7.19912e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 9.72999e-06 [a_after_grad]: 1.16401e-05 [special_op_eliminate]: 6.92019e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.70321e-07 [auto_monad_grad]: 9.29926e-07 [auto_monad_eliminator]: 1.64001e-05 [cse]: 1.76802e-05 [a_3]: 4.852e-05 [py_interpret_to_execute_after_opt_a]: 8.46991e-06 [slice_cell_reuse_recomputed_activation]: 1.87987e-06 [rewriter_after_opt_a]: 0.00014763 [convert_after_rewriter]: 7.45989e-06 [order_py_execute_after_rewriter]: 5.81983e-06 [opt_b]: 0.00029898, [1] [Cycle 1]: 0.00029327, [7] [b_1]: 0.00016097 [b_2]: 6.674e-05 [updatestate_depend_eliminate]: 5.68014e-06 [updatestate_assign_eliminate]: 4.17978e-06 [updatestate_loads_eliminate]: 5.20004e-06 [renormalize]: 3.39933e-07 [cse]: 1.88602e-05 [optimize_parallel_all_gather_comm]: 7.83987e-06 [overlap_param_gather]: 6.70087e-07 [cconv]: 2.20099e-05 [loop_unroll]: 0.00046312 [opt_after_cconv]: 0.00012893, [1] [Cycle 1]: 0.00012318, [7] [c_1]: 5.19198e-05 [parameter_eliminate]: 2.47033e-06 [updatestate_depend_eliminate]: 7.77002e-06 [updatestate_assign_eliminate]: 4.5998e-06 [updatestate_loads_eliminate]: 5.03007e-06 [cse]: 2.043e-05 [renormalize]: 3.59956e-07 [remove_dup_value]: 1.31098e-05 [tuple_transform]: 6.92699e-05, [1] [Cycle 1]: 6.45001e-05, [2] [d_1]: 5.55702e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 1.21025e-06 [add_cache_embedding]: 1.26003e-05 [add_recomputation]: 5.85299e-05 [cse_after_recomputation]: 2.59401e-05, [1] [Cycle 1]: 2.09603e-05, [1] [cse]: 1.59703e-05 [environ_conv]: 7.05989e-06 [swap_dp_allreduce_reducescatter]: 7.41985e-06 [bias_add_comm_swap]: 1.83005e-06 [label_micro_interleaved_index]: 2.03028e-06 [label_fine_grained_interleaved_index]: 1.7602e-06 [merge_cast_opt]: 1.11014e-06 [slice_recompute_activation]: 1.60979e-06 [micro_interleaved_order_control]: 1.62981e-06 [assign_add_opt]: 7.2103e-06 [ForceFp32Comm]: 3.7998e-07 [remove_cast_before_assign_add]: 9.00123e-07 [full_micro_interleaved_order_control]: 1.91992e-06 [reorder_send_recv_between_fp_bp]: 1.93017e-06 [comm_op_add_attrs]: 9.69972e-07 [add_comm_op_reuse_tag]: 9.49949e-07 [interleave_split_concat_branches]: 9.00123e-07 [interleave_parallel_branches]: 8.60076e-07 [overlap_opt_shard_in_pipeline]: 8.99658e-07 [overlap_opt_shard_grad_in_pipeline]: 2.1304e-06 [control_data_broadcast_order]: 1.01002e-06 [grouped_pairwise_exchange_alltoall]: 1.20001e-06 [offloading_packed_experts]: 1.03004e-06 [overlap_recompute_and_grad_model_parallel]: 1.87987e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.69738e-07 [overlap_recompute_allgather_and_fa_grad]: 1.44029e-06 [overlap_grad_ring_attention]: 1.72993e-06 [overlap_grad_flash_sp]: 1.44299e-05 [begin_end_overlap_inline]: 3.20375e-07 [split_matmul_comm_elemetwise]: 1.89012e-06 [split_layernorm_comm]: 1.68011e-06 [handle_group_info]: 8.69855e-07 [symbol_engine_optimizer]: 8.29301e-05, [1] [Cycle 1]: 7.84299e-05, [6] [build]: 3.95998e-06 [elim_shapecalc]: 1.17002e-05 [elim_not_effective]: 1.592e-05 [opt_reshape]: 9.19029e-06 [fold_const_symbol]: 1.289e-05 [renormalize]: 1.99769e-07 [pipeline_parallel_scheduler]: 1.20001e-06 [auto_monad_reorder]: 2.71099e-05 [get_jit_bprop_graph]: 2.70084e-07 [rewriter_after_jit_bprop_graph]: 6.09551e-07 [eliminate_special_op_node]: 0.00047315 [distribtued_split]: 3.88296e-05 [validate]: 3.34298e-05 [task_emit]: 0.0672692 [execute]: 1.04201e-05 Sums bootstrap : 0.000301s : 0.41% type_inference : 0.002391s : 3.23% auto_monad : 0.000122s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000543s : 0.73% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000221s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000427s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000048s : 0.06% optimize.opt_a.cse : 0.000049s : 0.07% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000148s : 0.20% optimize.convert_after_rewriter : 0.000007s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.22% optimize.opt_b.b_2 : 0.000067s : 0.09% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000463s : 0.63% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000059s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000473s : 0.64% distribtued_split : 0.000039s : 0.05% validate : 0.000033s : 0.05% task_emit : 0.067269s : 90.98% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000125 63 4.31% : 0.000005s : 2: substitution.depend_value_elim 1.94% : 0.000002s : 5: substitution.elim_not_effective 1.38% : 0.000002s : 5: substitution.fold_const_symbol 5.43% : 0.000007s : 6: substitution.graph_param_transform 52.15% : 0.000065s : 1: substitution.inline 4.15% : 0.000005s : 10: substitution.j_node_and_user_rematch 2.98% : 0.000004s : 6: substitution.load_eliminater 2.85% : 0.000004s : 2: substitution.reduce_all_const_elim 6.39% : 0.000008s : 10: substitution.remove_not_recompute_node 2.65% : 0.000003s : 2: substitution.replace_old_param 8.00% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.76% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002364 2 89.09% : 0.002106s : 1: type_inference.infer 10.91% : 0.000258s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000231 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 0.96% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.18% : 0.000005s : 25: predicate.arithmetic_simplify 0.76% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.26% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.14% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 1.87% : 0.000004s : 31: predicate.environ_get_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.29% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.67% : 0.000013s : 63: predicate.inline 1.13% : 0.000003s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000002s : 12: predicate.less_batch_normalization 1.64% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.29% : 0.000005s : 38: predicate.load_eliminater 1.27% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.13% : 0.000003s : 13: predicate.reduce_eliminate 0.62% : 0.000001s : 12: predicate.remove_not_recompute_node 1.20% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.78% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.30% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.23% : 0.000005s : 38: predicate.stopgrad_eliminater 0.49% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.37% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.76% : 0.000002s : 13: predicate.transpose_eliminate 1.70% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.35% : 0.000005s : 37: predicate.tuple_list_set_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 5.20% : 0.000012s : 50: predicate.updatestate_useless_node_eliminater 0.44% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.48% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000147 4 10.59% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.41% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086755 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000063s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000134s : 1: auto_monad 0.04% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000324s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000011s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000046s : 1: distribtued_split 0.56% : 0.000486s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000472s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.26% : 0.001096s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.24% : 0.000208s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.11% : 0.005298s : 1: opt_a 0.15% : 0.000133s : 1: opt_after_cconv 0.35% : 0.000302s : 1: opt_b 8.05% : 0.006984s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000235s : 1: renormalize.infer 0.22% : 0.000188s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000153s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 77.57% : 0.067300s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.78% : 0.002410s : 1: type_inference 0.08% : 0.000066s : 1: validate TotalTime = 0.0785902, [21] [bootstrap]: 0.00030059 [type_inference]: 0.0023914 [auto_monad]: 0.0001218 [graph_reusing]: 2.40002e-06 [inline]: 1.28988e-06 [parallel-infer-symbol]: 1.71969e-06 [pre_auto_parallel]: 2.607e-05 [insert-virtual-dataset]: 2.52994e-06 [parallel-infer-symbol-second]: 3.70201e-07 [dataset_repeat_opt]: 1.17999e-06 [pipeline_split]: 1.45007e-06 [optimize]: 0.00697622, [52] [py_interpret_to_execute]: 1.46599e-05 [rewriter_before_opt_a]: 3.38997e-05 [opt_a]: 0.00529305, [2] [Cycle 1]: 0.00150653, [43] [expand_dump_flag]: 3.43006e-06 [switch_simplify]: 2.95602e-05 [loop_unroll]: 1.31298e-05 [a_1]: 0.00034042 [recompute_prepare]: 9.05013e-06 [updatestate_depend_eliminate]: 9.41986e-06 [updatestate_assign_eliminate]: 5.91995e-06 [updatestate_loads_eliminate]: 7.13998e-06 [parameter_eliminate]: 3.18e-06 [a_2]: 0.00011733 [accelerated_algorithm]: 8.6301e-06 [shard]: 1.97999e-06 [meta_shard_fg_expand]: 3.70992e-06 [shard_inline]: 8.50018e-06 [auto_parallel]: 1.17803e-05 [parallel]: 6.92997e-06 [flash_sp]: 1.07498e-05 [merge_comm]: 7.98004e-06 [allreduce_fusion]: 5.48968e-06 [matmul_add_comm_reduction]: 1.05202e-05 [allreduce_slice_to_reducescatter]: 5.09899e-07 [virtual_shard_identity]: 9.74024e-06 [virtual_dataset]: 8.27014e-06 [get_grad_eliminate_]: 7.96001e-06 [virtual_output]: 7.66013e-06 [merge_forward]: 5.11995e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.63498e-05 [before_grad]: 1.35698e-05 [inplace_validation]: 5.01005e-06 [meta_fg_expand]: 5.85988e-06 [inplace_validation_after_expand]: 5.9898e-06 [flash_sp_send_recv_attached]: 4.33996e-06 [receive_attached]: 2.40002e-06 [after_resolve]: 1.13198e-05 [a_after_grad]: 1.24197e-05 [special_op_eliminate]: 7.70995e-06 [renormalize]: 0.00042794 [add_forward_monad_depend]: 3.45986e-06 [auto_monad_grad]: 2.44007e-06 [auto_monad_eliminator]: 3.12501e-05 [cse]: 3.19304e-05 [a_3]: 5.84098e-05 [Cycle 2]: 0.0007675, [43] [expand_dump_flag]: 8.89879e-07 [switch_simplify]: 9.18005e-06 [loop_unroll]: 7.85012e-06 [a_1]: 0.00020085 [recompute_prepare]: 7.68993e-06 [updatestate_depend_eliminate]: 6.00005e-06 [updatestate_assign_eliminate]: 5.03007e-06 [updatestate_loads_eliminate]: 5.30994e-06 [parameter_eliminate]: 1.03004e-06 [a_2]: 0.00010371 [accelerated_algorithm]: 8.36002e-06 [shard]: 1.00024e-06 [meta_shard_fg_expand]: 2.40002e-06 [shard_inline]: 7.68015e-06 [auto_parallel]: 1.06497e-05 [parallel]: 3.64985e-06 [flash_sp]: 3.49991e-06 [merge_comm]: 5.9302e-06 [allreduce_fusion]: 5.32996e-06 [matmul_add_comm_reduction]: 7.56001e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 8.21007e-06 [virtual_dataset]: 7.36024e-06 [get_grad_eliminate_]: 7.25966e-06 [virtual_output]: 7.30017e-06 [merge_forward]: 4.50015e-06 [cell_reuse_recompute_pass]: 1.8198e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49999e-05 [before_grad]: 1.24201e-05 [inplace_validation]: 4.35999e-06 [meta_fg_expand]: 4.50015e-06 [inplace_validation_after_expand]: 5.28991e-06 [flash_sp_send_recv_attached]: 9.00123e-07 [receive_attached]: 8.00006e-07 [after_resolve]: 1.00201e-05 [a_after_grad]: 1.16699e-05 [special_op_eliminate]: 7.2401e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 7.20378e-07 [auto_monad_grad]: 1.23028e-06 [auto_monad_eliminator]: 1.79401e-05 [cse]: 1.89301e-05 [a_3]: 4.84199e-05 [py_interpret_to_execute_after_opt_a]: 9.14e-06 [slice_cell_reuse_recomputed_activation]: 2.00002e-06 [rewriter_after_opt_a]: 0.00014642 [convert_after_rewriter]: 8.8797e-06 [order_py_execute_after_rewriter]: 5.96e-06 [opt_b]: 0.00029823, [1] [Cycle 1]: 0.00029307, [7] [b_1]: 0.00016118 [b_2]: 6.71498e-05 [updatestate_depend_eliminate]: 5.45988e-06 [updatestate_assign_eliminate]: 4.38001e-06 [updatestate_loads_eliminate]: 5.11995e-06 [renormalize]: 2.10013e-07 [cse]: 1.904e-05 [optimize_parallel_all_gather_comm]: 8.00006e-06 [overlap_param_gather]: 1.34017e-06 [cconv]: 2.25902e-05 [loop_unroll]: 0.00046281 [opt_after_cconv]: 0.00012889, [1] [Cycle 1]: 0.00012322, [7] [c_1]: 5.17801e-05 [parameter_eliminate]: 1.98046e-06 [updatestate_depend_eliminate]: 8.2599e-06 [updatestate_assign_eliminate]: 4.39957e-06 [updatestate_loads_eliminate]: 5.04963e-06 [cse]: 2.09599e-05 [renormalize]: 3.39933e-07 [remove_dup_value]: 1.28997e-05 [tuple_transform]: 6.75898e-05, [1] [Cycle 1]: 6.30403e-05, [2] [d_1]: 5.36996e-05 [renormalize]: 1.59722e-07 [partial_unused_args_eliminate]: 1.85985e-06 [add_cache_embedding]: 1.23698e-05 [add_recomputation]: 5.85802e-05 [cse_after_recomputation]: 2.58498e-05, [1] [Cycle 1]: 2.167e-05, [1] [cse]: 1.66101e-05 [environ_conv]: 6.65011e-06 [swap_dp_allreduce_reducescatter]: 7.49994e-06 [bias_add_comm_swap]: 2.19001e-06 [label_micro_interleaved_index]: 1.82027e-06 [label_fine_grained_interleaved_index]: 1.79978e-06 [merge_cast_opt]: 9.49949e-07 [slice_recompute_activation]: 1.39978e-06 [micro_interleaved_order_control]: 1.28988e-06 [assign_add_opt]: 7.51019e-06 [ForceFp32Comm]: 8.00006e-07 [remove_cast_before_assign_add]: 1.01002e-06 [full_micro_interleaved_order_control]: 1.90036e-06 [reorder_send_recv_between_fp_bp]: 1.82027e-06 [comm_op_add_attrs]: 5.40167e-07 [add_comm_op_reuse_tag]: 9.19681e-07 [interleave_split_concat_branches]: 4.49829e-07 [interleave_parallel_branches]: 5.09899e-07 [overlap_opt_shard_in_pipeline]: 8.99658e-07 [overlap_opt_shard_grad_in_pipeline]: 1.93017e-06 [control_data_broadcast_order]: 1.17999e-06 [grouped_pairwise_exchange_alltoall]: 1.30013e-06 [offloading_packed_experts]: 9.30391e-07 [overlap_recompute_and_grad_model_parallel]: 1.6503e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.79865e-07 [overlap_recompute_allgather_and_fa_grad]: 1.36998e-06 [overlap_grad_ring_attention]: 1.4496e-06 [overlap_grad_flash_sp]: 1.37398e-05 [begin_end_overlap_inline]: 7.19912e-07 [split_matmul_comm_elemetwise]: 1.89012e-06 [split_layernorm_comm]: 1.64984e-06 [handle_group_info]: 7.10133e-07 [symbol_engine_optimizer]: 8.207e-05, [1] [Cycle 1]: 7.78199e-05, [6] [build]: 4.19002e-06 [elim_shapecalc]: 1.13198e-05 [elim_not_effective]: 1.55498e-05 [opt_reshape]: 8.59983e-06 [fold_const_symbol]: 1.34199e-05 [renormalize]: 1.60187e-07 [pipeline_parallel_scheduler]: 1.20979e-06 [auto_monad_reorder]: 2.69101e-05 [get_jit_bprop_graph]: 4.00003e-07 [rewriter_after_jit_bprop_graph]: 3.09665e-07 [eliminate_special_op_node]: 0.0004734 [distribtued_split]: 3.90899e-05 [validate]: 3.32599e-05 [task_emit]: 0.0679535 [execute]: 7.13021e-06 Sums bootstrap : 0.000301s : 0.40% type_inference : 0.002391s : 3.20% auto_monad : 0.000122s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.73% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000221s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000428s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.07% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.22% optimize.opt_b.b_2 : 0.000067s : 0.09% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000463s : 0.62% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000059s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000473s : 0.63% distribtued_split : 0.000039s : 0.05% validate : 0.000033s : 0.04% task_emit : 0.067954s : 91.05% execute : 0.000007s : 0.01% TotalTime = 0.0789369, [21] [bootstrap]: 0.00028764 [type_inference]: 0.00226433 [auto_monad]: 0.00010374 [graph_reusing]: 2.19001e-06 [inline]: 1.39e-06 [parallel-infer-symbol]: 1.22003e-06 [pre_auto_parallel]: 2.08397e-05 [insert-virtual-dataset]: 1.83983e-06 [parallel-infer-symbol-second]: 3.40398e-07 [dataset_repeat_opt]: 9.60194e-07 [pipeline_split]: 1.05007e-06 [optimize]: 0.0067659, [52] [py_interpret_to_execute]: 1.19703e-05 [rewriter_before_opt_a]: 3.03797e-05 [opt_a]: 0.00512876, [2] [Cycle 1]: 0.00141249, [43] [expand_dump_flag]: 2.4396e-06 [switch_simplify]: 2.62996e-05 [loop_unroll]: 1.31801e-05 [a_1]: 0.0003255 [recompute_prepare]: 8.71019e-06 [updatestate_depend_eliminate]: 7.47992e-06 [updatestate_assign_eliminate]: 5.69969e-06 [updatestate_loads_eliminate]: 6.40005e-06 [parameter_eliminate]: 2.18023e-06 [a_2]: 0.00011101 [accelerated_algorithm]: 7.98004e-06 [shard]: 2.12016e-06 [meta_shard_fg_expand]: 3.05986e-06 [shard_inline]: 7.94977e-06 [auto_parallel]: 1.04201e-05 [parallel]: 5.24009e-06 [flash_sp]: 8.69995e-06 [merge_comm]: 7.18981e-06 [allreduce_fusion]: 4.82984e-06 [matmul_add_comm_reduction]: 9.49996e-06 [allreduce_slice_to_reducescatter]: 6.10016e-07 [virtual_shard_identity]: 9.37004e-06 [virtual_dataset]: 7.56979e-06 [get_grad_eliminate_]: 7.31973e-06 [virtual_output]: 7.83987e-06 [merge_forward]: 4.78001e-06 [cell_reuse_recompute_pass]: 1.57999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.60201e-05 [before_grad]: 1.29798e-05 [inplace_validation]: 4.27011e-06 [meta_fg_expand]: 5.01005e-06 [inplace_validation_after_expand]: 5.51995e-06 [flash_sp_send_recv_attached]: 3.90038e-06 [receive_attached]: 2.33995e-06 [after_resolve]: 1.03e-05 [a_after_grad]: 1.196e-05 [special_op_eliminate]: 7.44034e-06 [renormalize]: 0.00039912 [add_forward_monad_depend]: 2.8098e-06 [auto_monad_grad]: 1.61026e-06 [auto_monad_eliminator]: 2.40896e-05 [cse]: 2.60998e-05 [a_3]: 5.63101e-05 [Cycle 2]: 0.00076478, [43] [expand_dump_flag]: 8.2003e-07 [switch_simplify]: 9.2797e-06 [loop_unroll]: 7.91019e-06 [a_1]: 0.00020133 [recompute_prepare]: 7.52043e-06 [updatestate_depend_eliminate]: 5.56e-06 [updatestate_assign_eliminate]: 4.50015e-06 [updatestate_loads_eliminate]: 4.97e-06 [parameter_eliminate]: 1.30991e-06 [a_2]: 0.00010333 [accelerated_algorithm]: 8.41031e-06 [shard]: 1.06031e-06 [meta_shard_fg_expand]: 2.89036e-06 [shard_inline]: 7.95024e-06 [auto_parallel]: 9.68017e-06 [parallel]: 3.17022e-06 [flash_sp]: 3.01981e-06 [merge_comm]: 5.92042e-06 [allreduce_fusion]: 5.11995e-06 [matmul_add_comm_reduction]: 7.39982e-06 [allreduce_slice_to_reducescatter]: 2.49594e-07 [virtual_shard_identity]: 8.61008e-06 [virtual_dataset]: 7.53021e-06 [get_grad_eliminate_]: 7.28015e-06 [virtual_output]: 6.88015e-06 [merge_forward]: 4.65987e-06 [cell_reuse_recompute_pass]: 1.88965e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49603e-05 [before_grad]: 1.29701e-05 [inplace_validation]: 4.19002e-06 [meta_fg_expand]: 4.71994e-06 [inplace_validation_after_expand]: 4.90015e-06 [flash_sp_send_recv_attached]: 8.50298e-07 [receive_attached]: 6.80331e-07 [after_resolve]: 9.1698e-06 [a_after_grad]: 1.15903e-05 [special_op_eliminate]: 7.13998e-06 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 8.10251e-07 [auto_monad_grad]: 1.03982e-06 [auto_monad_eliminator]: 1.56397e-05 [cse]: 1.80602e-05 [a_3]: 4.88297e-05 [py_interpret_to_execute_after_opt_a]: 9.28994e-06 [slice_cell_reuse_recomputed_activation]: 1.99024e-06 [rewriter_after_opt_a]: 0.00013047 [convert_after_rewriter]: 8.69995e-06 [order_py_execute_after_rewriter]: 5.52973e-06 [opt_b]: 0.00023692, [1] [Cycle 1]: 0.00023174, [7] [b_1]: 0.00016088 [b_2]: 9.49018e-06 [updatestate_depend_eliminate]: 4.99003e-06 [updatestate_assign_eliminate]: 4.12995e-06 [updatestate_loads_eliminate]: 4.57978e-06 [renormalize]: 2.70084e-07 [cse]: 1.72597e-05 [optimize_parallel_all_gather_comm]: 7.4897e-06 [overlap_param_gather]: 1.24006e-06 [cconv]: 1.53696e-05 [loop_unroll]: 0.00052986 [opt_after_cconv]: 0.00012508, [1] [Cycle 1]: 0.00011967, [7] [c_1]: 5.13103e-05 [parameter_eliminate]: 1.60979e-06 [updatestate_depend_eliminate]: 7.76025e-06 [updatestate_assign_eliminate]: 4.48013e-06 [updatestate_loads_eliminate]: 4.63007e-06 [cse]: 1.89799e-05 [renormalize]: 2.79862e-07 [remove_dup_value]: 1.03503e-05 [tuple_transform]: 6.78003e-05, [1] [Cycle 1]: 6.33001e-05, [2] [d_1]: 5.41201e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.36998e-06 [add_cache_embedding]: 1.16597e-05 [add_recomputation]: 5.41997e-05 [cse_after_recomputation]: 2.47499e-05, [1] [Cycle 1]: 2.049e-05, [1] [cse]: 1.57603e-05 [environ_conv]: 6.29993e-06 [swap_dp_allreduce_reducescatter]: 7.32997e-06 [bias_add_comm_swap]: 1.64984e-06 [label_micro_interleaved_index]: 1.72993e-06 [label_fine_grained_interleaved_index]: 1.48034e-06 [merge_cast_opt]: 8.00006e-07 [slice_recompute_activation]: 1.02026e-06 [micro_interleaved_order_control]: 1.26986e-06 [assign_add_opt]: 6.93975e-06 [ForceFp32Comm]: 6.10016e-07 [remove_cast_before_assign_add]: 6.39819e-07 [full_micro_interleaved_order_control]: 1.45985e-06 [reorder_send_recv_between_fp_bp]: 1.39978e-06 [comm_op_add_attrs]: 6.50063e-07 [add_comm_op_reuse_tag]: 6.50063e-07 [interleave_split_concat_branches]: 5.59725e-07 [interleave_parallel_branches]: 5.59725e-07 [overlap_opt_shard_in_pipeline]: 5.99772e-07 [overlap_opt_shard_grad_in_pipeline]: 1.2801e-06 [control_data_broadcast_order]: 6.79865e-07 [grouped_pairwise_exchange_alltoall]: 7.10133e-07 [offloading_packed_experts]: 7.69738e-07 [overlap_recompute_and_grad_model_parallel]: 1.36998e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.09899e-07 [overlap_recompute_allgather_and_fa_grad]: 7.39936e-07 [overlap_grad_ring_attention]: 1.78022e-06 [overlap_grad_flash_sp]: 1.32802e-05 [begin_end_overlap_inline]: 4.80097e-07 [split_matmul_comm_elemetwise]: 1.20979e-06 [split_layernorm_comm]: 1.1404e-06 [handle_group_info]: 5.89993e-07 [symbol_engine_optimizer]: 8.32798e-05, [1] [Cycle 1]: 7.87904e-05, [6] [build]: 3.50969e-06 [elim_shapecalc]: 1.18599e-05 [elim_not_effective]: 1.58902e-05 [opt_reshape]: 9.06968e-06 [fold_const_symbol]: 1.291e-05 [renormalize]: 1.99769e-07 [pipeline_parallel_scheduler]: 1.07009e-06 [auto_monad_reorder]: 2.245e-05 [get_jit_bprop_graph]: 5.49946e-07 [rewriter_after_jit_bprop_graph]: 2.99886e-07 [eliminate_special_op_node]: 0.00048601 [distribtued_split]: 3.39504e-05 [validate]: 3.03499e-05 [task_emit]: 0.0686758 [execute]: 9.66014e-06 Sums bootstrap : 0.000288s : 0.38% type_inference : 0.002264s : 3.02% au Time group info: ------[substitution.] 0.000128 63 4.97% : 0.000006s : 2: substitution.depend_value_elim 2.02% : 0.000003s : 5: substitution.elim_not_effective 1.92% : 0.000002s : 5: substitution.fold_const_symbol 5.27% : 0.000007s : 6: substitution.graph_param_transform 50.90% : 0.000065s : 1: substitution.inline 4.13% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.02% : 0.000004s : 6: substitution.load_eliminater 2.60% : 0.000003s : 2: substitution.reduce_all_const_elim 6.06% : 0.000008s : 10: substitution.remove_not_recompute_node 2.64% : 0.000003s : 2: substitution.replace_old_param 8.53% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.93% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002364 2 89.09% : 0.002106s : 1: type_inference.infer 10.91% : 0.000258s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000224 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 1.03% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.11% : 0.000005s : 25: predicate.arithmetic_simplify 0.85% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.35% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.88% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.22% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.93% : 0.000004s : 31: predicate.environ_get_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.21% : 0.000003s : 14: predicate.float_depend_g_call 0.80% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.63% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.76% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000005s : 38: predicate.load_eliminater 1.19% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicatto_monad : 0.000104s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000527s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.02% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000214s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000020s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000008s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000019s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000399s : 0.53% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000130s : 0.17% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 e.mini_step_allgather_replace 0.81% : 0.000002s : 13: predicate.minmaximum_grad 0.86% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.20% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.86% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.04% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.17% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.09% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.32% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.08% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.51% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.87% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.73% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.50% : 0.000003s : 25: predicate.tuple_list_get_set_item_eliminator 2.65% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.41% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.48% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000146 4 10.54% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.46% : 0.000131s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087424 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000063s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000134s : 1: auto_monad 0.04% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000324s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.00004 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000530s : 0.71% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000019s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engin6s : 1: distribtued_split 0.56% : 0.000486s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000472s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001094s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.24% : 0.000209s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 6.06% : 0.005297s : 1: opt_a 0.15% : 0.000133s : 1: opt_after_cconv 0.34% : 0.000301s : 1: opt_b 7.99% : 0.006985s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000233s : 1: renormalize.infer 0.22% : 0.000189s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000152s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 77.75% : 0.067976s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.76% : 0.002410s : 1: type_inference 0.08% : 0.000067s : 1: validate e_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000486s : 0.65% distribtued_split : 0.000034s : 0.05% validate : 0.000030s : 0.04% task_emit : 0.068676s : 91.51% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000111 63 4.28% : 0.000005s : 2: substitution.depend_value_elim 2.29% : 0.000003s : 5: substitution.elim_not_effective 1.76% : 0.000002s : 5: substitution.fold_const_symbol 5.51% : 0.000006s : 6: substitution.graph_param_transform 49.49% : 0.000055s : 1: substitution.inline 4.53% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.71% : 0.000004s : 6: substitution.load_eliminater 2.10% : 0.000002s : 2: substitution.reduce_all_const_elim 6.88% : 0.000008s : 10: substitution.remove_not_recompute_node 2.34% : 0.000003s : 2: substitution.replace_old_param 9.25% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.87% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002240 2 90.07% : 0.002017s : 1: type_inference.infer 9.93% : 0.000222s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000054 1 100.00% : 0.000054s : 1: match.inline ------[predicate.] 0.000225 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.07% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.89% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.27% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.30% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.29% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 1.97% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.32% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.31% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.64% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000002s : 12: predicate.less_batch_normalization 1.67% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.43% : 0.000005s : 38: predicate.load_eliminater 1.19% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.50% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.89% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000002s : 13: predicate.reduce_eliminate 0.48% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.42% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.77% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.29% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.06% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.37% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.33% : 0.000010s : 43: predicate.switch_simplify 0.85% : 0.000002s : 13: predicate.tile_eliminate 0.75% : 0.000002s : 13: predicate.transpose_eliminate 1.81% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 3.04% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.52% : 0.000003s : 25: predicate.tuple_list_get_set_item_eliminator 2.74% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.45% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.84% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000125 4 8.40% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.60% : 0.000115s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087441 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000058s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000115s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000311s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.57% : 0.000499s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000004s : 1: label_micro_interleaved_index 0.62% : 0.000539s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001065s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 5.87% : 0.005133s : 1: opt_a 0.15% : 0.000129s : 1: opt_after_cconv 0.27% : 0.000240s : 1: opt_b 7.75% : 0.006774s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000026s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.25% : 0.000216s : 1: renormalize.infer 0.20% : 0.000178s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000136s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 78.57% : 0.068701s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.61% : 0.002280s : 1: type_inference 0.07% : 0.000062s : 1: validate TotalTime = 0.0796402, [21] [bootstrap]: 0.00030063 [type_inference]: 0.00245675 [auto_monad]: 0.0001208 [graph_reusing]: 2.11969e-06 [inline]: 1.21025e-06 [parallel-infer-symbol]: 1.66008e-06 [pre_auto_parallel]: 2.43396e-05 [insert-virtual-dataset]: 2.44007e-06 [parallel-infer-symbol-second]: 3.69735e-07 [dataset_repeat_opt]: 1.20979e-06 [pipeline_split]: 1.19023e-06 [optimize]: 0.00708598, [52] [py_interpret_to_execute]: 1.40201e-05 [rewriter_before_opt_a]: 3.60301e-05 [opt_a]: 0.00536232, [2] [Cycle 1]: 0.0015004, [43] [expand_dump_flag]: 3.06033e-06 [switch_simplify]: 2.82601e-05 [loop_unroll]: 1.34702e-05 [a_1]: 0.00033816 [recompute_prepare]: 8.54023e-06 [updatestate_depend_eliminate]: 9.09995e-06 [updatestate_assign_eliminate]: 5.68992e-06 [updatestate_loads_eliminate]: 7.49994e-06 [parameter_eliminate]: 3.05008e-06 [a_2]: 0.00011543 [accelerated_algorithm]: 8.2301e-06 [shard]: 2.48989e-06 [meta_shard_fg_expand]: 4.09968e-06 [shard_inline]: 7.88039e-06 [auto_parallel]: 1.14199e-05 [parallel]: 7.91019e-06 [flash_sp]: 1.10301e-05 [merge_comm]: 7.34022e-06 [allreduce_fusion]: 5.15999e-06 [matmul_add_comm_reduction]: 9.98983e-06 [allreduce_slice_to_reducescatter]: 4.69852e-07 [virtual_shard_identity]: 9.35979e-06 [virtual_dataset]: 7.79005e-06 [get_grad_eliminate_]: 7.72998e-06 [virtual_output]: 7.8897e-06 [merge_forward]: 6.15977e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.67103e-05 [before_grad]: 1.35e-05 [inplace_validation]: 5.10039e-06 [meta_fg_expand]: 5.11995e-06 [inplace_validation_after_expand]: 5.9302e-06 [flash_sp_send_recv_attached]: 4.95976e-06 [receive_attached]: 2.78e-06 [after_resolve]: 1.14697e-05 [a_after_grad]: 1.29999e-05 [special_op_eliminate]: 7.68993e-06 [renormalize]: 0.00042371 [add_forward_monad_depend]: 3.60981e-06 [auto_monad_grad]: 1.86963e-06 [auto_monad_eliminator]: 3.22498e-05 [cse]: 2.93003e-05 [a_3]: 5.766e-05 [Cycle 2]: 0.0007749, [43] [expand_dump_flag]: 1.09989e-06 [switch_simplify]: 9.58005e-06 [loop_unroll]: 7.57957e-06 [a_1]: 0.00020303 [recompute_prepare]: 7.47992e-06 [updatestate_depend_eliminate]: 5.61029e-06 [updatestate_assign_eliminate]: 5.09014e-06 [updatestate_loads_eliminate]: 5.3402e-06 [parameter_eliminate]: 1.36998e-06 [a_2]: 0.00010445 [accelerated_algorithm]: 8.15e-06 [shard]: 1.24983e-06 [meta_shard_fg_expand]: 2.40002e-06 [shard_inline]: 7.53999e-06 [auto_parallel]: 1.091e-05 [parallel]: 3.51015e-06 [flash_sp]: 3.32994e-06 [merge_comm]: 5.69038e-06 [allreduce_fusion]: 4.66965e-06 [matmul_add_comm_reduction]: 8.33021e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 8.57003e-06 [virtual_dataset]: 7.70018e-06 [get_grad_eliminate_]: 7.52974e-06 [virtual_output]: 7.19028e-06 [merge_forward]: 4.69014e-06 [cell_reuse_recompute_pass]: 2.21981e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.47601e-05 [before_grad]: 1.23498e-05 [inplace_validation]: 3.93996e-06 [meta_fg_expand]: 4.94998e-06 [inplace_validation_after_expand]: 5.02961e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 7.10133e-07 [after_resolve]: 9.54e-06 [a_after_grad]: 1.17896e-05 [special_op_eliminate]: 7.20005e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.801e-07 [auto_monad_grad]: 1.18976e-06 [auto_monad_eliminator]: 1.81501e-05 [cse]: 1.87e-05 [a_3]: 4.88204e-05 [py_interpret_to_execute_after_opt_a]: 8.92021e-06 [slice_cell_reuse_recomputed_activation]: 2.30037e-06 [rewriter_after_opt_a]: 0.00013769 [convert_after_rewriter]: 9.26992e-06 [order_py_execute_after_rewriter]: 6.08992e-06 [opt_b]: 0.00029284, [1] [Cycle 1]: 0.00028719, [7] [b_1]: 0.00021286 [b_2]: 9.89996e-06 [updatestate_depend_eliminate]: 4.88013e-06 [updatestate_assign_eliminate]: 4.42984e-06 [updatestate_loads_eliminate]: 5.11017e-06 [renormalize]: 3.90224e-07 [cse]: 1.90502e-05 [optimize_parallel_all_gather_comm]: 7.72998e-06 [overlap_param_gather]: 1.0496e-06 [cconv]: 2.245e-05 [loop_unroll]: 0.00049517 [opt_after_cconv]: 0.00013252, [1] [Cycle 1]: 0.00012654, [7] [c_1]: 5.26402e-05 [parameter_eliminate]: 2.6701e-06 [updatestate_depend_eliminate]: 7.93021e-06 [updatestate_assign_eliminate]: 4.50993e-06 [updatestate_loads_eliminate]: 5.38025e-06 [cse]: 2.15396e-05 [renormalize]: 3.1013e-07 [remove_dup_value]: 1.375e-05 [tuple_transform]: 6.87898e-05, [1] [Cycle 1]: 6.42897e-05, [2] [d_1]: 5.51501e-05 [renormalize]: 2.19792e-07 [partial_unused_args_eliminate]: 2.35997e-06 [add_cache_embedding]: 1.22902e-05 [add_recomputation]: 6.29802e-05 [cse_after_recomputation]: 2.63201e-05, [1] [Cycle 1]: 2.127e-05, [1] [cse]: 1.632e-05 [environ_conv]: 7.40029e-06 [swap_dp_allreduce_reducescatter]: 7.75e-06 [bias_add_comm_swap]: 2.26032e-06 [label_micro_interleaved_index]: 1.85985e-06 [label_fine_grained_interleaved_index]: 1.88034e-06 [merge_cast_opt]: 1.31968e-06 [slice_recompute_activation]: 1.38022e-06 [micro_interleaved_order_control]: 1.47987e-06 [assign_add_opt]: 7.03987e-06 [ForceFp32Comm]: 7.89762e-07 [remove_cast_before_assign_add]: 7.90227e-07 [full_micro_interleaved_order_control]: 1.95019e-06 [reorder_send_recv_between_fp_bp]: 2.19001e-06 [comm_op_add_attrs]: 7.80448e-07 [add_comm_op_reuse_tag]: 8.801e-07 [interleave_split_concat_branches]: 8.09785e-07 [interleave_parallel_branches]: 6.69621e-07 [overlap_opt_shard_in_pipeline]: 1.15996e-06 [overlap_opt_shard_grad_in_pipeline]: 2.46987e-06 [control_data_broadcast_order]: 1.08965e-06 [grouped_pairwise_exchange_alltoall]: 1.20979e-06 [offloading_packed_experts]: 1.22003e-06 [overlap_recompute_and_grad_model_parallel]: 2.00979e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.39e-06 [overlap_recompute_allgather_and_fa_grad]: 9.49949e-07 [overlap_grad_ring_attention]: 1.4198e-06 [overlap_grad_flash_sp]: 1.506e-05 [begin_end_overlap_inline]: 6.99889e-07 [split_matmul_comm_elemetwise]: 1.74996e-06 [split_layernorm_comm]: 2.33017e-06 [handle_group_info]: 8.09785e-07 [symbol_engine_optimizer]: 8.468e-05, [1] [Cycle 1]: 8.024e-05, [6] [build]: 3.76999e-06 [elim_shapecalc]: 1.14902e-05 [elim_not_effective]: 1.63e-05 [opt_reshape]: 8.97003e-06 [fold_const_symbol]: 1.38897e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.60001e-06 [auto_monad_reorder]: 2.84603e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 5.0012e-07 [eliminate_special_op_node]: 0.00051229 [distribtued_split]: 4.10196e-05 [validate]: 3.557e-05 [task_emit]: 0.0687487 [execute]: 1.02296e-05 Sums bootstrap : 0.000301s : 0.40% type_inference : 0.002457s : 3.25% auto_monad : 0.000121s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000015s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.02% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.07% optimize.opt_a.cse : 0.000048s : 0.06% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000138s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000213s : 0.28% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000495s : 0.66% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000028s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000512s : 0.68% distribtued_split : 0.000041s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.068749s : 90.98% execute : 0.000010s : 0.01% TotalTime = 0.0799472, [21] [bootstrap]: 0.00031182 [type_inference]: 0.00247687 [auto_monad]: 0.00012276 [graph_reusing]: 1.95997e-06 [inline]: 1.2503e-06 [parallel-infer-symbol]: 1.53016e-06 [pre_auto_parallel]: 2.445e-05 [insert-virtual-dataset]: 3.20002e-06 [parallel-infer-symbol-second]: 3.69735e-07 [dataset_repeat_opt]: 1.41002e-06 [pipeline_split]: 1.61026e-06 [optimize]: 0.00709997, [52] [py_interpret_to_execute]: 1.45398e-05 [rewriter_before_opt_a]: 3.54298e-05 [opt_a]: 0.00539272, [2] [Cycle 1]: 0.00150277, [43] [expand_dump_flag]: 2.90992e-06 [switch_simplify]: 2.73599e-05 [loop_unroll]: 1.354e-05 [a_1]: 0.00033649 [recompute_prepare]: 9.37004e-06 [updatestate_depend_eliminate]: 8.6599e-06 [updatestate_assign_eliminate]: 5.59026e-06 [updatestate_loads_eliminate]: 6.54999e-06 [parameter_eliminate]: 3.39979e-06 [a_2]: 0.0001155 [accelerated_algorithm]: 8.36002e-06 [shard]: 1.97021e-06 [meta_shard_fg_expand]: 3.6899e-06 [shard_inline]: 8.46991e-06 [auto_parallel]: 1.194e-05 [parallel]: 6.21006e-06 [flash_sp]: 9.01008e-06 [merge_comm]: 7.69971e-06 [allreduce_fusion]: 5.30016e-06 [matmul_add_comm_reduction]: 1.04499e-05 [allreduce_slice_to_reducescatter]: 3.90224e-07 [virtual_shard_identity]: 9.58005e-06 [virtual_dataset]: 7.89994e-06 [get_grad_eliminate_]: 7.79005e-06 [virtual_output]: 7.42963e-06 [merge_forward]: 5.63962e-06 [cell_reuse_recompute_pass]: 1.94041e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.649e-05 [before_grad]: 1.27801e-05 [inplace_validation]: 5.12041e-06 [meta_fg_expand]: 5.41005e-06 [inplace_validation_after_expand]: 5.73974e-06 [flash_sp_send_recv_attached]: 4.02005e-06 [receive_attached]: 2.14018e-06 [after_resolve]: 1.10799e-05 [a_after_grad]: 1.22902e-05 [special_op_eliminate]: 7.39004e-06 [renormalize]: 0.00042979 [add_forward_monad_depend]: 3.20002e-06 [auto_monad_grad]: 1.51992e-06 [auto_monad_eliminator]: 2.87304e-05 [cse]: 2.79499e-05 [a_3]: 5.81103e-05 [Cycle 2]: 0.00079319, [43] [expand_dump_flag]: 1.09011e-06 [switch_simplify]: 9.22009e-06 [loop_unroll]: 8.17003e-06 [a_1]: 0.00020152 [recompute_prepare]: 7.37002e-06 [updatestate_depend_eliminate]: 6.52997e-06 [updatestate_assign_eliminate]: 4.78001e-06 [updatestate_loads_eliminate]: 5.32018e-06 [parameter_eliminate]: 1.29966e-06 [a_2]: 0.00010497 [accelerated_algorithm]: 8.17003e-06 [shard]: 1.34995e-06 [meta_shard_fg_expand]: 2.84985e-06 [shard_inline]: 7.6401e-06 [auto_parallel]: 1.097e-05 [parallel]: 4.08012e-06 [flash_sp]: 3.2098e-06 [merge_comm]: 5.92973e-06 [allreduce_fusion]: 5.08968e-06 [matmul_add_comm_reduction]: 7.85012e-06 [allreduce_slice_to_reducescatter]: 2.19792e-07 [virtual_shard_identity]: 8.69995e-06 [virtual_dataset]: 7.73976e-06 [get_grad_eliminate_]: 7.30995e-06 [virtual_output]: 7.18003e-06 [merge_forward]: 4.90015e-06 [cell_reuse_recompute_pass]: 2.04984e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52197e-05 [before_grad]: 1.25202e-05 [inplace_validation]: 4.22029e-06 [meta_fg_expand]: 4.99003e-06 [inplace_validation_after_expand]: 5.22984e-06 [flash_sp_send_recv_attached]: 8.801e-07 [receive_attached]: 7.19912e-07 [after_resolve]: 1.01998e-05 [a_after_grad]: 1.20699e-05 [special_op_eliminate]: 7.83987e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.69855e-07 [auto_monad_grad]: 1.15018e-06 [auto_monad_eliminator]: 1.85897e-05 [cse]: 1.89696e-05 [a_3]: 4.945e-05 [py_interpret_to_execute_after_opt_a]: 9.36026e-06 [slice_cell_reuse_recomputed_activation]: 2.30968e-06 [rewriter_after_opt_a]: 0.00014088 [convert_after_rewriter]: 8.84989e-06 [order_py_execute_after_rewriter]: 6.13974e-06 [opt_b]: 0.00026738, [1] [Cycle 1]: 0.00026087, [7] [b_1]: 0.00018201 [b_2]: 9.79006e-06 [updatestate_depend_eliminate]: 5.66989e-06 [updatestate_assign_eliminate]: 4.57e-06 [updatestate_loads_eliminate]: 5.28013e-06 [renormalize]: 2.90107e-07 [cse]: 1.91801e-05 [optimize_parallel_all_gather_comm]: 8.6301e-06 [overlap_param_gather]: 1.09011e-06 [cconv]: 1.908e-05 [loop_unroll]: 0.00049864 [opt_after_cconv]: 0.00013304, [1] [Cycle 1]: 0.00012627, [7] [c_1]: 5.223e-05 [parameter_eliminate]: 2.50991e-06 [updatestate_depend_eliminate]: 8.02008e-06 [updatestate_assign_eliminate]: 5.24987e-06 [updatestate_loads_eliminate]: 5.15999e-06 [cse]: 2.03098e-05 [renormalize]: 3.89758e-07 [remove_dup_value]: 1.16401e-05 [tuple_transform]: 6.88103e-05, [1] [Cycle 1]: 6.38003e-05, [2] [d_1]: 5.427e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 1.77976e-06 [add_cache_embedding]: 1.27498e-05 [add_recomputation]: 5.83502e-05 [cse_after_recomputation]: 2.81199e-05, [1] [Cycle 1]: 2.28602e-05, [1] [cse]: 1.729e-05 [environ_conv]: 6.50017e-06 [swap_dp_allreduce_reducescatter]: 7.3798e-06 [bias_add_comm_swap]: 2.35997e-06 [label_micro_interleaved_index]: 1.59023e-06 [label_fine_grained_interleaved_index]: 1.95997e-06 [merge_cast_opt]: 1.11014e-06 [slice_recompute_activation]: 1.66008e-06 [micro_interleaved_order_control]: 1.5297e-06 [assign_add_opt]: 6.78003e-06 [ForceFp32Comm]: 7.69738e-07 [remove_cast_before_assign_add]: 5.89993e-07 [full_micro_interleaved_order_control]: 1.84029e-06 [reorder_send_recv_between_fp_bp]: 1.65962e-06 [comm_op_add_attrs]: 9.89996e-07 [add_comm_op_reuse_tag]: 9.60194e-07 [interleave_split_concat_branches]: 7.19912e-07 [interleave_parallel_branches]: 7.10133e-07 [overlap_opt_shard_in_pipeline]: 1.08033e-06 [overlap_opt_shard_grad_in_pipeline]: 1.8198e-06 [control_data_broadcast_order]: 9.99775e-07 [grouped_pairwise_exchange_alltoall]: 8.2003e-07 [offloading_packed_experts]: 7.09668e-07 [overlap_recompute_and_grad_model_parallel]: 1.45007e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.00006e-07 [overlap_recompute_allgather_and_fa_grad]: 7.19912e-07 [overlap_grad_ring_attention]: 1.72015e-06 [overlap_grad_flash_sp]: 1.333e-05 [begin_end_overlap_inline]: 5.50412e-07 [split_matmul_comm_elemetwise]: 1.87987e-06 [split_layernorm_comm]: 1.68988e-06 [handle_group_info]: 9.50415e-07 [symbol_engine_optimizer]: 8.67103e-05, [1] [Cycle 1]: 8.17301e-05, [6] [build]: 3.27965e-06 [elim_shapecalc]: 1.21999e-05 [elim_not_effective]: 1.60099e-05 [opt_reshape]: 8.59005e-06 [fold_const_symbol]: 1.38702e-05 [renormalize]: 2.49594e-07 [pipeline_parallel_scheduler]: 1.43982e-06 [auto_monad_reorder]: 2.68002e-05 [get_jit_bprop_graph]: 4.00003e-07 [rewriter_after_jit_bprop_graph]: 4.10248e-07 [eliminate_special_op_node]: 0.0005154 [distribtued_split]: 3.93e-05 [validate]: 3.60799e-05 [task_emit]: 0.0690235 [execute]: 1.09198e-05 Sums bootstrap : 0.000312s : 0.41% type_inference : 0.002477s : 3.27% auto_monad : 0.000123s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000037s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000538s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000430s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000047s : 0.06% optimize.opt_a.cse : 0.000047s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000141s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000182s : 0.24% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000019s : 0.03% optimize.loop_unroll : 0.000499s : 0.66% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000058s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000515s : 0.68% distribtued_split : 0.000039s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.069023s : 91.03% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000129 63 4.96% : 0.000006s : 2: substitution.depend_value_elim 1.88% : 0.000002s : 5: substitution.elim_not_effective 2.00% : 0.000003s : 5: substitution.fold_const_symbol 5.31% : 0.000007s : 6: substitution.graph_param_transform 50.43% : 0.000065s : 1: substitution.inline 4.14% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.29% : 0.000004s : 6: substitution.load_eliminater 2.54% : 0.000003s : 2: substitution.reduce_all_const_elim 6.03% : 0.000008s : 10: substitution.remove_not_recompute_node 2.34% : 0.000003s : 2: substitution.replace_old_param 9.37% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.70% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002426 2 87.96% : 0.002134s : 1: type_inference.infer 12.04% : 0.000292s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000229 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.16% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.23% : 0.000005s : 25: predicate.arithmetic_simplify 0.85% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.54% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000004s : 31: predicate.environ_get_eliminate 1.21% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.78% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.26% : 0.000001s : 6: predicate.fold_const_symbol 0.86% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.64% : 0.000013s : 63: predicate.inline 0.93% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.09% : 0.000003s : 12: predicate.less_batch_normalization 1.84% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.14% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.74% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.56% : 0.000001s : 6: predicate.parallel_virtual_node 1.09% : 0.000002s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000003s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.13% : 0.000003s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.33% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 0.98% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.02% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.28% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.54% : 0.000010s : 43: predicate.switch_simplify 0.86% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.75% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.72% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.36% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.92% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000171 4 9.59% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.41% : 0.000154s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088570 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000067s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.15% : 0.000133s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000324s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.08% : 0.000072s : 1: distribtued_split 0.59% : 0.000526s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000505s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001089s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.23% : 0.000202s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.06% : 0.005366s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.33% : 0.000296s : 1: opt_b 8.01% : 0.007094s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.26% : 0.000233s : 1: renormalize.infer 0.21% : 0.000186s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000143s : 1: rewriter_after_opt_a 0.05% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000087s : 1: symbol_engine_optimizer 77.65% : 0.068777s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.79% : 0.002475s : 1: type_inference 0.08% : 0.000071s : 1: validate Time group info: ------[substitution.] 0.000126 63 4.96% : 0.000006s : 2: substitution.depend_value_elim 1.91% : 0.000002s : 5: substitution.elim_not_effective 1.66% : 0.000002s : 5: substitution.fold_const_symbol 5.22% : 0.000007s : 6: substitution.graph_param_transform 49.61% : 0.000062s : 1: substitution.inline 3.78% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.52% : 0.000004s : 6: substitution.load_eliminater 2.59% : 0.000003s : 2: substitution.reduce_all_const_elim 6.01% : 0.000008s : 10: substitution.remove_not_recompute_node 2.60% : 0.000003s : 2: substitution.replace_old_param 9.76% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.38% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002447 2 88.96% : 0.002176s : 1: type_inference.infer 11.04% : 0.000270s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000061 1 100.00% : 0.000061s : 1: match.inline ------[predicate.] 0.000225 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.11% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.77% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.22% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.86% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.23% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.88% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.98% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.88% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.14% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.86% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.67% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.36% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.31% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.79% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.78% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.17% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.06% : 0.000002s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.17% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.30% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.24% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.23% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.70% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.60% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.72% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.60% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000143 4 8.97% : 0.000013s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.03% : 0.000130s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088868 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000063s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.15% : 0.000136s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000336s : 1: bootstrap 0.03% : 0.000023s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.04% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.60% : 0.000530s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.57% : 0.000508s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001089s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.19% : 0.000172s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.07% : 0.005397s : 1: opt_a 0.15% : 0.000137s : 1: opt_after_cconv 0.31% : 0.000271s : 1: opt_b 8.00% : 0.007108s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.26% : 0.000232s : 1: renormalize.infer 0.22% : 0.000192s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000147s : 1: rewriter_after_opt_a 0.05% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 77.70% : 0.069053s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.81% : 0.002494s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.081573, [21] [bootstrap]: 0.00032133 [type_inference]: 0.00258634 [auto_monad]: 0.00013562 [graph_reusing]: 2.10013e-06 [inline]: 1.40024e-06 [parallel-infer-symbol]: 2.34973e-06 [pre_auto_parallel]: 2.61301e-05 [insert-virtual-dataset]: 2.90014e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 1.26008e-06 [pipeline_split]: 1.65962e-06 [optimize]: 0.00722837, [52] [py_interpret_to_execute]: 1.52998e-05 [rewriter_before_opt_a]: 3.68804e-05 [opt_a]: 0.00553012, [2] [Cycle 1]: 0.00154455, [43] [expand_dump_flag]: 4.11039e-06 [switch_simplify]: 3.16501e-05 [loop_unroll]: 1.29198e-05 [a_1]: 0.00034125 [recompute_prepare]: 9.25967e-06 [updatestate_depend_eliminate]: 8.86992e-06 [updatestate_assign_eliminate]: 5.91995e-06 [updatestate_loads_eliminate]: 7.54977e-06 [parameter_eliminate]: 3.49991e-06 [a_2]: 0.00011762 [accelerated_algorithm]: 8.25012e-06 [shard]: 2.28034e-06 [meta_shard_fg_expand]: 3.78024e-06 [shard_inline]: 8.27992e-06 [auto_parallel]: 1.21701e-05 [parallel]: 8.30973e-06 [flash_sp]: 1.25403e-05 [merge_comm]: 9.49018e-06 [allreduce_fusion]: 5.56e-06 [matmul_add_comm_reduction]: 1.08499e-05 [allreduce_slice_to_reducescatter]: 4.70318e-07 [virtual_shard_identity]: 9.75002e-06 [virtual_dataset]: 8.28039e-06 [get_grad_eliminate_]: 7.77002e-06 [virtual_output]: 7.66991e-06 [merge_forward]: 5.74999e-06 [cell_reuse_recompute_pass]: 1.71037e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.71997e-05 [before_grad]: 1.39098e-05 [inplace_validation]: 5.22006e-06 [meta_fg_expand]: 5.59026e-06 [inplace_validation_after_expand]: 6.82985e-06 [flash_sp_send_recv_attached]: 5.26989e-06 [receive_attached]: 2.57976e-06 [after_resolve]: 1.12099e-05 [a_after_grad]: 1.24904e-05 [special_op_eliminate]: 7.45011e-06 [renormalize]: 0.00043982 [add_forward_monad_depend]: 3.53996e-06 [auto_monad_grad]: 1.93017e-06 [auto_monad_eliminator]: 3.35197e-05 [cse]: 3.58704e-05 [a_3]: 5.76903e-05 [Cycle 2]: 0.00080353, [43] [expand_dump_flag]: 1.15996e-06 [switch_simplify]: 9.07015e-06 [loop_unroll]: 7.79005e-06 [a_1]: 0.00020057 [recompute_prepare]: 7.33975e-06 [updatestate_depend_eliminate]: 6.00982e-06 [updatestate_assign_eliminate]: 4.99003e-06 [updatestate_loads_eliminate]: 5.45988e-06 [parameter_eliminate]: 1.42958e-06 [a_2]: 0.00010503 [accelerated_algorithm]: 8.51974e-06 [shard]: 1.43005e-06 [meta_shard_fg_expand]: 2.61981e-06 [shard_inline]: 7.53999e-06 [auto_parallel]: 1.112e-05 [parallel]: 3.8296e-06 [flash_sp]: 3.74997e-06 [merge_comm]: 5.91995e-06 [allreduce_fusion]: 4.99003e-06 [matmul_add_comm_reduction]: 8.00984e-06 [allreduce_slice_to_reducescatter]: 3.1013e-07 [virtual_shard_identity]: 8.50018e-06 [virtual_dataset]: 7.58003e-06 [get_grad_eliminate_]: 7.39004e-06 [virtual_output]: 7.05011e-06 [merge_forward]: 4.78001e-06 [cell_reuse_recompute_pass]: 1.72993e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55e-05 [before_grad]: 1.25398e-05 [inplace_validation]: 4.57e-06 [meta_fg_expand]: 4.80004e-06 [inplace_validation_after_expand]: 5.37001e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 7.19912e-07 [after_resolve]: 2.96501e-05 [a_after_grad]: 1.26301e-05 [special_op_eliminate]: 7.43009e-06 [renormalize]: 8.98726e-08 [add_forward_monad_depend]: 9.89996e-07 [auto_monad_grad]: 1.20979e-06 [auto_monad_eliminator]: 1.944e-05 [cse]: 1.944e-05 [a_3]: 4.928e-05 [py_interpret_to_execute_after_opt_a]: 9.6499e-06 [slice_cell_reuse_recomputed_activation]: 2.80002e-06 [rewriter_after_opt_a]: 0.00014692 [convert_after_rewriter]: 9.28016e-06 [order_py_execute_after_rewriter]: 5.58002e-06 [opt_b]: 0.00024237, [1] [Cycle 1]: 0.00023647, [7] [b_1]: 0.0001609 [b_2]: 9.86969e-06 [updatestate_depend_eliminate]: 5.60004e-06 [updatestate_assign_eliminate]: 4.33996e-06 [updatestate_loads_eliminate]: 5.11995e-06 [renormalize]: 2.60305e-07 [cse]: 1.92402e-05 [optimize_parallel_all_gather_comm]: 8.58027e-06 [overlap_param_gather]: 1.54041e-06 [cconv]: 2.35504e-05 [loop_unroll]: 0.00049826 [opt_after_cconv]: 0.00013216, [1] [Cycle 1]: 0.00012608, [7] [c_1]: 5.223e-05 [parameter_eliminate]: 2.46987e-06 [updatestate_depend_eliminate]: 8.17981e-06 [updatestate_assign_eliminate]: 4.66034e-06 [updatestate_loads_eliminate]: 5.4501e-06 [cse]: 2.064e-05 [renormalize]: 4.39584e-07 [remove_dup_value]: 1.432e-05 [tuple_transform]: 6.75102e-05, [1] [Cycle 1]: 6.32503e-05, [2] [d_1]: 5.42901e-05 [renormalize]: 1.60187e-07 [partial_unused_args_eliminate]: 1.93017e-06 [add_cache_embedding]: 1.411e-05 [add_recomputation]: 6.21197e-05 [cse_after_recomputation]: 2.49902e-05, [1] [Cycle 1]: 2.06898e-05, [1] [cse]: 1.586e-05 [environ_conv]: 7.04033e-06 [swap_dp_allreduce_reducescatter]: 7.68015e-06 [bias_add_comm_swap]: 1.94972e-06 [label_micro_interleaved_index]: 1.95019e-06 [label_fine_grained_interleaved_index]: 2.41026e-06 [merge_cast_opt]: 1.44029e-06 [slice_recompute_activation]: 2.04984e-06 [micro_interleaved_order_control]: 1.93017e-06 [assign_add_opt]: 7.15023e-06 [ForceFp32Comm]: 8.801e-07 [remove_cast_before_assign_add]: 8.70321e-07 [full_micro_interleaved_order_control]: 2.61003e-06 [reorder_send_recv_between_fp_bp]: 2.06986e-06 [comm_op_add_attrs]: 1.19023e-06 [add_comm_op_reuse_tag]: 1.47009e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 8.801e-07 [overlap_opt_shard_in_pipeline]: 1.34995e-06 [overlap_opt_shard_grad_in_pipeline]: 2.99001e-06 [control_data_broadcast_order]: 9.4017e-07 [grouped_pairwise_exchange_alltoall]: 1.37975e-06 [offloading_packed_experts]: 1.10036e-06 [overlap_recompute_and_grad_model_parallel]: 2.19001e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.89879e-07 [overlap_recompute_allgather_and_fa_grad]: 8.09785e-07 [overlap_grad_ring_attention]: 2.31992e-06 [overlap_grad_flash_sp]: 1.46399e-05 [begin_end_overlap_inline]: 7.89762e-07 [split_matmul_comm_elemetwise]: 2.46009e-06 [split_layernorm_comm]: 1.97021e-06 [handle_group_info]: 1.05985e-06 [symbol_engine_optimizer]: 8.306e-05, [1] [Cycle 1]: 7.847e-05, [6] [build]: 3.83984e-06 [elim_shapecalc]: 1.12499e-05 [elim_not_effective]: 1.60304e-05 [opt_reshape]: 8.59005e-06 [fold_const_symbol]: 1.35498e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.43005e-06 [auto_monad_reorder]: 2.94e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 4.70318e-07 [eliminate_special_op_node]: 0.00051053 [distribtued_split]: 4.16301e-05 [validate]: 3.58098e-05 [task_emit]: 0.0703898 [execute]: 1.19801e-05 Sums bootstrap : 0.000321s : 0.42% type_inference : 0.002586s : 3.34% auto_monad : 0.000136s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000542s : 0.70% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000041s : 0.05% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000440s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000053s : 0.07% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000147s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000498s : 0.64% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000511s : 0.66% distribtued_split : 0.000042s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.070390s : 90.93% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000133 63 4.94% : 0.000007s : 2: substitution.depend_value_elim 1.85% : 0.000002s : 5: substitution.elim_not_effective 2.02% : 0.000003s : 5: substitution.fold_const_symbol 5.31% : 0.000007s : 6: substitution.graph_param_transform 50.77% : 0.000067s : 1: substitution.inline 4.11% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.20% : 0.000004s : 6: substitution.load_eliminater 2.60% : 0.000003s : 2: substitution.reduce_all_const_elim 6.06% : 0.000008s : 10: substitution.remove_not_recompute_node 2.11% : 0.000003s : 2: substitution.replace_old_param 8.87% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.17% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002555 2 89.07% : 0.002276s : 1: type_inference.infer 10.93% : 0.000279s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000228 1420 0.75% : 0.000002s : 13: predicate.accumulaten_eliminater 1.21% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.16% : 0.000005s : 25: predicate.arithmetic_simplify 0.81% : 0.000002s : 13: predicate.cast_eliminate 0.84% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.45% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.87% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.52% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.59% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.09% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000005s : 38: predicate.load_eliminater 1.45% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.83% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.22% : 0.000003s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.19% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.79% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.87% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.68% : 0.000011s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.77% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.93% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.59% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.53% : 0.000003s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.57% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000154 4 10.70% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.30% : 0.000138s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090638 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000148s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000346s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.58% : 0.000526s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000509s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001118s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.11% : 0.005534s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.27% : 0.000245s : 1: opt_b 7.98% : 0.007236s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000244s : 1: renormalize.infer 0.21% : 0.000190s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000153s : 1: rewriter_after_opt_a 0.05% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000086s : 1: symbol_engine_optimizer 77.69% : 0.070416s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.87% : 0.002605s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.0826275, [21] [bootstrap]: 0.00031897 [type_inference]: 0.00267434 [auto_monad]: 0.00014081 [graph_reusing]: 2.88012e-06 [inline]: 1.39e-06 [parallel-infer-symbol]: 2.10013e-06 [pre_auto_parallel]: 2.79099e-05 [insert-virtual-dataset]: 3.24985e-06 [parallel-infer-symbol-second]: 3.90224e-07 [dataset_repeat_opt]: 1.59023e-06 [pipeline_split]: 1.68011e-06 [optimize]: 0.00777928, [52] [py_interpret_to_execute]: 1.66898e-05 [rewriter_before_opt_a]: 4.02601e-05 [opt_a]: 0.00594085, [2] [Cycle 1]: 0.00173629, [43] [expand_dump_flag]: 4.51971e-06 [switch_simplify]: 3.43202e-05 [loop_unroll]: 1.61501e-05 [a_1]: 0.0004062 [recompute_prepare]: 1.08103e-05 [updatestate_depend_eliminate]: 9.35979e-06 [updatestate_assign_eliminate]: 6.4401e-06 [updatestate_loads_eliminate]: 8.49972e-06 [parameter_eliminate]: 3.87989e-06 [a_2]: 0.00014293 [accelerated_algorithm]: 1.04397e-05 [shard]: 2.23983e-06 [meta_shard_fg_expand]: 4.30038e-06 [shard_inline]: 1.02301e-05 [auto_parallel]: 1.23098e-05 [parallel]: 8.60961e-06 [flash_sp]: 1.18003e-05 [merge_comm]: 9.69041e-06 [allreduce_fusion]: 6.88015e-06 [matmul_add_comm_reduction]: 1.20499e-05 [allreduce_slice_to_reducescatter]: 4.50294e-07 [virtual_shard_identity]: 1.18399e-05 [virtual_dataset]: 9.87994e-06 [get_grad_eliminate_]: 9.83989e-06 [virtual_output]: 9.61032e-06 [merge_forward]: 6.77025e-06 [cell_reuse_recompute_pass]: 1.89012e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.12695e-05 [before_grad]: 1.729e-05 [inplace_validation]: 5.53997e-06 [meta_fg_expand]: 6.78003e-06 [inplace_validation_after_expand]: 7.56001e-06 [flash_sp_send_recv_attached]: 5.37001e-06 [receive_attached]: 2.68035e-06 [after_resolve]: 1.34599e-05 [a_after_grad]: 1.54302e-05 [special_op_eliminate]: 9.34023e-06 [renormalize]: 0.0004687 [add_forward_monad_depend]: 3.81004e-06 [auto_monad_grad]: 2.12016e-06 [auto_monad_eliminator]: 3.51002e-05 [cse]: 3.89898e-05 [a_3]: 6.78096e-05 [Cycle 2]: 0.00094482, [43] [expand_dump_flag]: 1.16974e-06 [switch_simplify]: 1.11503e-05 [loop_unroll]: 9.52976e-06 [a_1]: 0.00027658 [recompute_prepare]: 9.50973e-06 [updatestate_depend_eliminate]: 6.4699e-06 [updatestate_assign_eliminate]: 5.24009e-06 [updatestate_loads_eliminate]: 5.96e-06 [parameter_eliminate]: 1.55997e-06 [a_2]: 0.00012644 [accelerated_algorithm]: 1.01198e-05 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 2.94019e-06 [shard_inline]: 9.57027e-06 [auto_parallel]: 1.152e-05 [parallel]: 3.90038e-06 [flash_sp]: 3.38024e-06 [merge_comm]: 6.97002e-06 [allreduce_fusion]: 5.49993e-06 [matmul_add_comm_reduction]: 8.84989e-06 [allreduce_slice_to_reducescatter]: 2.60305e-07 [virtual_shard_identity]: 1.04401e-05 [virtual_dataset]: 9.1698e-06 [get_grad_eliminate_]: 8.86014e-06 [virtual_output]: 8.83033e-06 [merge_forward]: 5.11017e-06 [cell_reuse_recompute_pass]: 2.00979e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.96602e-05 [before_grad]: 1.59997e-05 [inplace_validation]: 4.75021e-06 [meta_fg_expand]: 5.60982e-06 [inplace_validation_after_expand]: 5.80028e-06 [flash_sp_send_recv_attached]: 9.89996e-07 [receive_attached]: 9.19681e-07 [after_resolve]: 1.16099e-05 [a_after_grad]: 1.42599e-05 [special_op_eliminate]: 8.76002e-06 [renormalize]: 1.00117e-07 [add_forward_monad_depend]: 1.00024e-06 [auto_monad_grad]: 1.32993e-06 [auto_monad_eliminator]: 2.05203e-05 [cse]: 2.22297e-05 [a_3]: 5.89602e-05 [py_interpret_to_execute_after_opt_a]: 9.81987e-06 [slice_cell_reuse_recomputed_activation]: 2.29012e-06 [rewriter_after_opt_a]: 0.00014952 [convert_after_rewriter]: 1.03503e-05 [order_py_execute_after_rewriter]: 6.67991e-06 [opt_b]: 0.00028419, [1] [Cycle 1]: 0.00027816, [7] [b_1]: 0.0001949 [b_2]: 1.21798e-05 [updatestate_depend_eliminate]: 6.06989e-06 [updatestate_assign_eliminate]: 4.85033e-06 [updatestate_loads_eliminate]: 5.81983e-06 [renormalize]: 2.40281e-07 [cse]: 1.988e-05 [optimize_parallel_all_gather_comm]: 9.37004e-06 [overlap_param_gather]: 1.27964e-06 [cconv]: 2.559e-05 [loop_unroll]: 0.00049645 [opt_after_cconv]: 0.00014934, [1] [Cycle 1]: 0.00014307, [7] [c_1]: 6.34301e-05 [parameter_eliminate]: 2.54018e-06 [updatestate_depend_eliminate]: 8.88994e-06 [updatestate_assign_eliminate]: 5.0501e-06 [updatestate_loads_eliminate]: 5.83008e-06 [cse]: 2.25799e-05 [renormalize]: 4.49829e-07 [remove_dup_value]: 1.651e-05 [tuple_transform]: 8.16998e-05, [1] [Cycle 1]: 7.687e-05, [2] [d_1]: 6.70501e-05 [renormalize]: 2.29571e-07 [partial_unused_args_eliminate]: 2.21003e-06 [add_cache_embedding]: 1.36099e-05 [add_recomputation]: 6.73602e-05 [cse_after_recomputation]: 2.75299e-05, [1] [Cycle 1]: 2.27899e-05, [1] [cse]: 1.765e-05 [environ_conv]: 8.61986e-06 [swap_dp_allreduce_reducescatter]: 8.36002e-06 [bias_add_comm_swap]: 2.42004e-06 [label_micro_interleaved_index]: 2.48989e-06 [label_fine_grained_interleaved_index]: 2.10013e-06 [merge_cast_opt]: 1.79e-06 [slice_recompute_activation]: 2.10013e-06 [micro_interleaved_order_control]: 2.27988e-06 [assign_add_opt]: 7.88039e-06 [ForceFp32Comm]: 8.40053e-07 [remove_cast_before_assign_add]: 1.07009e-06 [full_micro_interleaved_order_control]: 2.52016e-06 [reorder_send_recv_between_fp_bp]: 2.65008e-06 [comm_op_add_attrs]: 1.05007e-06 [add_comm_op_reuse_tag]: 1.20979e-06 [interleave_split_concat_branches]: 9.19681e-07 [interleave_parallel_branches]: 9.20147e-07 [overlap_opt_shard_in_pipeline]: 1.45985e-06 [overlap_opt_shard_grad_in_pipeline]: 2.39024e-06 [control_data_broadcast_order]: 1.19023e-06 [grouped_pairwise_exchange_alltoall]: 1.35973e-06 [offloading_packed_experts]: 1.11014e-06 [overlap_recompute_and_grad_model_parallel]: 2.35997e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.01002e-06 [overlap_recompute_allgather_and_fa_grad]: 1.26986e-06 [overlap_grad_ring_attention]: 2.12993e-06 [overlap_grad_flash_sp]: 1.81701e-05 [begin_end_overlap_inline]: 7.90227e-07 [split_matmul_comm_elemetwise]: 1.97999e-06 [split_layernorm_comm]: 2.12993e-06 [handle_group_info]: 1.24006e-06 [symbol_engine_optimizer]: 0.00010055, [1] [Cycle 1]: 9.55001e-05, [6] [build]: 4.12995e-06 [elim_shapecalc]: 1.40499e-05 [elim_not_effective]: 1.94297e-05 [opt_reshape]: 1.06301e-05 [fold_const_symbol]: 1.90199e-05 [renormalize]: 3.49712e-07 [pipeline_parallel_scheduler]: 1.56043e-06 [auto_monad_reorder]: 3.15597e-05 [get_jit_bprop_graph]: 4.89876e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00051803 [distribtued_split]: 4.63603e-05 [validate]: 3.76399e-05 [task_emit]: 0.0707647 [execute]: 1.27601e-05 Sums bootstrap : 0.000319s : 0.41% type_inference : 0.002674s : 3.42% auto_monad : 0.000141s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000040s : 0.05% optimize.opt_a.expand_dump_flag : 0.000006s : 0.01% optimize.opt_a.switch_simplify : 0.000045s : 0.06% optimize.opt_a.loop_unroll : 0.000026s : 0.03% optimize.opt_a.a_1 : 0.000683s : 0.87% optimize.opt_a.recompute_prepare : 0.000020s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000269s : 0.34% optimize.opt_a.accelerated_algorithm : 0.000021s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000020s : 0.03% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.02% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000017s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000021s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000019s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000019s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000041s : 0.05% optimize.opt_a.before_grad : 0.000033s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000025s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.04% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000469s : 0.60% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000056s : 0.07% optimize.opt_a.cse : 0.000061s : 0.08% optimize.opt_a.a_3 : 0.000127s : 0.16% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000150s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000195s : 0.25% optimize.opt_b.b_2 : 0.000012s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000026s : 0.03% optimize.loop_unroll : 0.000496s : 0.63% optimize.opt_after_cconv.c_1 : 0.000063s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000017s : 0.02% optimize.tuple_transform.d_1 : 0.000067s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000067s : 0.09% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000009s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000019s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000019s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000518s : 0.66% distribtued_split : 0.000046s : 0.06% validate : 0.000038s : 0.05% task_emit : 0.070765s : 90.37% execute : 0.000013s : 0.02% Time group info: ------[substitution.] 0.000149 63 4.74% : 0.000007s : 2: substitution.depend_value_elim 2.19% : 0.000003s : 5: substitution.elim_not_effective 2.21% : 0.000003s : 5: substitution.fold_const_symbol 5.93% : 0.000009s : 6: substitution.graph_param_transform 47.53% : 0.000071s : 1: substitution.inline 4.62% : 0.000007s : 10: substitution.j_node_and_user_rematch 3.66% : 0.000005s : 6: substitution.load_eliminater 2.81% : 0.000004s : 2: substitution.reduce_all_const_elim 6.84% : 0.000010s : 10: substitution.remove_not_recompute_node 2.59% : 0.000004s : 2: substitution.replace_old_param 8.85% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 8.03% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002643 2 88.45% : 0.002337s : 1: type_inference.infer 11.55% : 0.000305s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000070 1 100.00% : 0.000070s : 1: match.inline ------[predicate.] 0.000270 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000006s : 25: predicate.arithmetic_simplify 0.76% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.50% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.20% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000005s : 31: predicate.environ_get_eliminate 1.19% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.35% : 0.000004s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.78% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.82% : 0.000002s : 12: predicate.incorporate_call 0.73% : 0.000002s : 12: predicate.incorporate_call_switch 6.10% : 0.000016s : 63: predicate.inline 1.18% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.99% : 0.000003s : 12: predicate.less_batch_normalization 1.71% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.33% : 0.000006s : 38: predicate.load_eliminater 1.32% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.80% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.81% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.67% : 0.000002s : 6: predicate.mutable_eliminate 0.42% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.04% : 0.000003s : 14: predicate.partial_defer_inline 1.33% : 0.000004s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000003s : 13: predicate.reduce_eliminate 0.65% : 0.000002s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.80% : 0.000002s : 13: predicate.reshape_eliminate 0.76% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.56% : 0.000002s : 6: predicate.row_tensor_eliminate 0.96% : 0.000003s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000003s : 12: predicate.shard_identity_eliminate 1.41% : 0.000004s : 18: predicate.special_op_eliminate 1.05% : 0.000003s : 12: predicate.specialize_transform 1.14% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000006s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000005s : 26: predicate.switch_layer_defer_inline 4.18% : 0.000011s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.67% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.70% : 0.000005s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.68% : 0.000005s : 25: predicate.tuple_to_list_eliminator_ 2.38% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.42% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.88% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000166 4 10.86% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.14% : 0.000148s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092571 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000072s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.17% : 0.000154s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000343s : 1: bootstrap 0.03% : 0.000030s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.06% : 0.000054s : 1: distribtued_split 0.57% : 0.000532s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000007s : 1: get_jit_bprop_graph 0.01% : 0.000010s : 1: graph_reusing 0.01% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000005s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000506s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000018s : 1: opt.transform.loop_unroll_optimizer 1.46% : 0.001353s : 80: opt.transform.opt_a 0.07% : 0.000061s : 1: opt.transform.opt_after_cconv 0.20% : 0.000184s : 27: opt.transform.opt_b 0.07% : 0.000065s : 1: opt.transform.opt_trans_graph 0.04% : 0.000038s : 3: opt.transform.special_op_eliminate 0.06% : 0.000059s : 4: opt.transform.symbol_engine_opt 6.42% : 0.005945s : 1: opt_a 0.17% : 0.000154s : 1: opt_after_cconv 0.31% : 0.000288s : 1: opt_b 8.41% : 0.007787s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000022s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000021s : 1: remove_dup_value 0.27% : 0.000254s : 1: renormalize.infer 0.23% : 0.000209s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000156s : 1: rewriter_after_opt_a 0.05% : 0.000045s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000010s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000104s : 1: symbol_engine_optimizer 76.48% : 0.070800s : 1: task_emit 0.09% : 0.000085s : 1: tuple_transform 2.91% : 0.002692s : 1: type_inference 0.08% : 0.000074s : 1: validate TotalTime = 0.0785675, [21] [bootstrap]: 0.00028283 [type_inference]: 0.00221914 [auto_monad]: 9.759e-05 [graph_reusing]: 1.33971e-06 [inline]: 1.22003e-06 [parallel-infer-symbol]: 1.20979e-06 [pre_auto_parallel]: 2.01897e-05 [insert-virtual-dataset]: 2.00002e-06 [parallel-infer-symbol-second]: 3.30154e-07 [dataset_repeat_opt]: 8.2003e-07 [pipeline_split]: 1.13994e-06 [optimize]: 0.00665571, [52] [py_interpret_to_execute]: 1.20201e-05 [rewriter_before_opt_a]: 2.99299e-05 [opt_a]: 0.00504066, [2] [Cycle 1]: 0.0013762, [43] [expand_dump_flag]: 2.37999e-06 [switch_simplify]: 2.46e-05 [loop_unroll]: 1.29896e-05 [a_1]: 0.00031866 [recompute_prepare]: 8.58027e-06 [updatestate_depend_eliminate]: 7.12974e-06 [updatestate_assign_eliminate]: 5.26989e-06 [updatestate_loads_eliminate]: 5.41983e-06 [parameter_eliminate]: 2.42004e-06 [a_2]: 0.00010991 [accelerated_algorithm]: 8.79029e-06 [shard]: 1.6503e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 7.85012e-06 [auto_parallel]: 1.09803e-05 [parallel]: 5.4501e-06 [flash_sp]: 7.28993e-06 [merge_comm]: 6.58026e-06 [allreduce_fusion]: 4.82984e-06 [matmul_add_comm_reduction]: 8.08993e-06 [allreduce_slice_to_reducescatter]: 2.80328e-07 [virtual_shard_identity]: 9.70997e-06 [virtual_dataset]: 8.27992e-06 [get_grad_eliminate_]: 7.24988e-06 [virtual_output]: 7.45989e-06 [merge_forward]: 4.94998e-06 [cell_reuse_recompute_pass]: 1.39e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.588e-05 [before_grad]: 1.30897e-05 [inplace_validation]: 4.48991e-06 [meta_fg_expand]: 5.10039e-06 [inplace_validation_after_expand]: 5.03007e-06 [flash_sp_send_recv_attached]: 2.2999e-06 [receive_attached]: 1.47009e-06 [after_resolve]: 9.83011e-06 [a_after_grad]: 1.23102e-05 [special_op_eliminate]: 7.94977e-06 [renormalize]: 0.00037727 [add_forward_monad_depend]: 2.59001e-06 [auto_monad_grad]: 1.74996e-06 [auto_monad_eliminator]: 2.41799e-05 [cse]: 2.46498e-05 [a_3]: 5.61201e-05 [Cycle 2]: 0.000763, [43] [expand_dump_flag]: 1.03982e-06 [switch_simplify]: 9.49018e-06 [loop_unroll]: 7.77002e-06 [a_1]: 0.00019828 [recompute_prepare]: 7.24988e-06 [updatestate_depend_eliminate]: 5.64987e-06 [updatestate_assign_eliminate]: 4.68036e-06 [updatestate_loads_eliminate]: 4.93042e-06 [parameter_eliminate]: 1.08965e-06 [a_2]: 0.00010364 [accelerated_algorithm]: 8.20961e-06 [shard]: 1.14972e-06 [meta_shard_fg_expand]: 2.50991e-06 [shard_inline]: 7.7202e-06 [auto_parallel]: 1.03302e-05 [parallel]: 3.05008e-06 [flash_sp]: 2.61003e-06 [merge_comm]: 5.77979e-06 [allreduce_fusion]: 5.11995e-06 [matmul_add_comm_reduction]: 7.30017e-06 [allreduce_slice_to_reducescatter]: 2.69618e-07 [virtual_shard_identity]: 8.92999e-06 [virtual_dataset]: 7.59028e-06 [get_grad_eliminate_]: 7.35e-06 [virtual_output]: 7.21961e-06 [merge_forward]: 4.35999e-06 [cell_reuse_recompute_pass]: 1.51992e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.48099e-05 [before_grad]: 1.23503e-05 [inplace_validation]: 3.99025e-06 [meta_fg_expand]: 4.73997e-06 [inplace_validation_after_expand]: 4.92996e-06 [flash_sp_send_recv_attached]: 8.69855e-07 [receive_attached]: 6.39819e-07 [after_resolve]: 9.60985e-06 [a_after_grad]: 1.17701e-05 [special_op_eliminate]: 7.2699e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 6.59842e-07 [auto_monad_grad]: 9.49949e-07 [auto_monad_eliminator]: 1.55498e-05 [cse]: 1.887e-05 [a_3]: 4.83501e-05 [py_interpret_to_execute_after_opt_a]: 8.40006e-06 [slice_cell_reuse_recomputed_activation]: 1.74996e-06 [rewriter_after_opt_a]: 0.00013115 [convert_after_rewriter]: 8.34977e-06 [order_py_execute_after_rewriter]: 5.58002e-06 [opt_b]: 0.0002399, [1] [Cycle 1]: 0.00023481, [7] [b_1]: 0.00015957 [b_2]: 9.72999e-06 [updatestate_depend_eliminate]: 5.0501e-06 [updatestate_assign_eliminate]: 4.47966e-06 [updatestate_loads_eliminate]: 4.89037e-06 [renormalize]: 3.30154e-07 [cse]: 1.88299e-05 [optimize_parallel_all_gather_comm]: 7.7798e-06 [overlap_param_gather]: 1.08965e-06 [cconv]: 1.43498e-05 [loop_unroll]: 0.00051063 [opt_after_cconv]: 0.0001284, [1] [Cycle 1]: 0.00012288, [7] [c_1]: 5.09601e-05 [parameter_eliminate]: 2.15974e-06 [updatestate_depend_eliminate]: 6.88015e-06 [updatestate_assign_eliminate]: 4.89037e-06 [updatestate_loads_eliminate]: 4.92996e-06 [cse]: 2.03201e-05 [renormalize]: 4.20026e-07 [remove_dup_value]: 1.12001e-05 [tuple_transform]: 6.65598e-05, [1] [Cycle 1]: 6.21201e-05, [2] [d_1]: 5.29699e-05 [renormalize]: 2.20258e-07 [partial_unused_args_eliminate]: 1.66008e-06 [add_cache_embedding]: 1.14599e-05 [add_recomputation]: 5.27599e-05 [cse_after_recomputation]: 2.62801e-05, [1] [Cycle 1]: 2.20099e-05, [1] [cse]: 1.66702e-05 [environ_conv]: 6.08992e-06 [swap_dp_allreduce_reducescatter]: 7.53999e-06 [bias_add_comm_swap]: 1.60979e-06 [label_micro_interleaved_index]: 1.70013e-06 [label_fine_grained_interleaved_index]: 1.20979e-06 [merge_cast_opt]: 7.79983e-07 [slice_recompute_activation]: 1.20001e-06 [micro_interleaved_order_control]: 1.3602e-06 [assign_add_opt]: 6.55977e-06 [ForceFp32Comm]: 5.60191e-07 [remove_cast_before_assign_add]: 5.59725e-07 [full_micro_interleaved_order_control]: 1.34995e-06 [reorder_send_recv_between_fp_bp]: 1.66986e-06 [comm_op_add_attrs]: 7.59959e-07 [add_comm_op_reuse_tag]: 7.59959e-07 [interleave_split_concat_branches]: 4.80097e-07 [interleave_parallel_branches]: 5.60191e-07 [overlap_opt_shard_in_pipeline]: 9.89996e-07 [overlap_opt_shard_grad_in_pipeline]: 1.28988e-06 [control_data_broadcast_order]: 6.80331e-07 [grouped_pairwise_exchange_alltoall]: 6.79865e-07 [offloading_packed_experts]: 6.19795e-07 [overlap_recompute_and_grad_model_parallel]: 1.26008e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.5018e-07 [overlap_recompute_allgather_and_fa_grad]: 8.60076e-07 [overlap_grad_ring_attention]: 1.32015e-06 [overlap_grad_flash_sp]: 1.192e-05 [begin_end_overlap_inline]: 3.39933e-07 [split_matmul_comm_elemetwise]: 1.19023e-06 [split_layernorm_comm]: 1.11992e-06 [handle_group_info]: 6.20261e-07 [symbol_engine_optimizer]: 7.93799e-05, [1] [Cycle 1]: 7.56802e-05, [6] [build]: 3.45008e-06 [elim_shapecalc]: 1.11503e-05 [elim_not_effective]: 1.51903e-05 [opt_reshape]: 8.40984e-06 [fold_const_symbol]: 1.32499e-05 [renormalize]: 2.39816e-07 [pipeline_parallel_scheduler]: 9.49949e-07 [auto_monad_reorder]: 2.226e-05 [get_jit_bprop_graph]: 2.59839e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00049389 [distribtued_split]: 3.04799e-05 [validate]: 2.92999e-05 [task_emit]: 0.0684778 [execute]: 8.19005e-06 Sums bootstrap : 0.000283s : 0.38% type_inference : 0.002219s : 2.97% auto_monad : 0.000098s : 0.13% graph_reusing : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000517s : 0.69% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000010s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000214s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000015s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000008s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000019s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000377s : 0.50% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000104s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000131s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000014s : 0.02% optimize.loop_unroll : 0.000511s : 0.68% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.02% optimize.add_recomputation : 0.000053s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000494s : 0.66% distribtued_split : 0.000030s : 0.04% validate : 0.000029s : 0.04% task_emit : 0.068478s : 91.64% execute : 0.000008s : 0.01% TotalTime = 0.0786979, [21] [bootstrap]: 0.00028335 [type_inference]: 0.00221918 [auto_monad]: 9.69898e-05 [graph_reusing]: 2.18023e-06 [inline]: 1.03982e-06 [parallel-infer-symbol]: 1.23959e-06 [pre_auto_parallel]: 2.12803e-05 [insert-virtual-dataset]: 2.00002e-06 [parallel-infer-symbol-second]: 3.30154e-07 [dataset_repeat_opt]: 8.79634e-07 [pipeline_split]: 1.35042e-06 [optimize]: 0.00665669, [52] [py_interpret_to_execute]: 1.20499e-05 [rewriter_before_opt_a]: 2.93502e-05 [opt_a]: 0.00504009, [2] [Cycle 1]: 0.00139629, [43] [expand_dump_flag]: 2.92994e-06 [switch_simplify]: 2.65199e-05 [loop_unroll]: 1.31298e-05 [a_1]: 0.00031789 [recompute_prepare]: 8.56025e-06 [updatestate_depend_eliminate]: 7.35e-06 [updatestate_assign_eliminate]: 5.60004e-06 [updatestate_loads_eliminate]: 5.81006e-06 [parameter_eliminate]: 2.27988e-06 [a_2]: 0.00011423 [accelerated_algorithm]: 8.10018e-06 [shard]: 2.42982e-06 [meta_shard_fg_expand]: 3.20002e-06 [shard_inline]: 8.05967e-06 [auto_parallel]: 1.02599e-05 [parallel]: 5.78025e-06 [flash_sp]: 9.05013e-06 [merge_comm]: 7.2401e-06 [allreduce_fusion]: 5.06034e-06 [matmul_add_comm_reduction]: 9.94001e-06 [allreduce_slice_to_reducescatter]: 3.50177e-07 [virtual_shard_identity]: 8.89972e-06 [virtual_dataset]: 7.74022e-06 [get_grad_eliminate_]: 7.70018e-06 [virtual_output]: 7.16001e-06 [merge_forward]: 5.38025e-06 [cell_reuse_recompute_pass]: 1.33971e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.571e-05 [before_grad]: 1.30697e-05 [inplace_validation]: 4.52995e-06 [meta_fg_expand]: 5.26011e-06 [inplace_validation_after_expand]: 5.22006e-06 [flash_sp_send_recv_attached]: 3.53018e-06 [receive_attached]: 2.34041e-06 [after_resolve]: 1.013e-05 [a_after_grad]: 1.21798e-05 [special_op_eliminate]: 7.83009e-06 [renormalize]: 0.00039069 [add_forward_monad_depend]: 2.37022e-06 [auto_monad_grad]: 1.45985e-06 [auto_monad_eliminator]: 2.36901e-05 [cse]: 2.426e-05 [a_3]: 5.61699e-05 [Cycle 2]: 0.00076319, [43] [expand_dump_flag]: 1.15018e-06 [switch_simplify]: 8.44011e-06 [loop_unroll]: 7.87014e-06 [a_1]: 0.00020037 [recompute_prepare]: 7.41985e-06 [updatestate_depend_eliminate]: 5.78025e-06 [updatestate_assign_eliminate]: 4.5402e-06 [updatestate_loads_eliminate]: 4.7097e-06 [parameter_eliminate]: 1.11992e-06 [a_2]: 0.00010253 [accelerated_algorithm]: 8.06013e-06 [shard]: 1.01002e-06 [meta_shard_fg_expand]: 2.59001e-06 [shard_inline]: 7.53021e-06 [auto_parallel]: 1.02301e-05 [parallel]: 3.15998e-06 [flash_sp]: 2.97977e-06 [merge_comm]: 5.84964e-06 [allreduce_fusion]: 5.0501e-06 [matmul_add_comm_reduction]: 7.7202e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 8.83033e-06 [virtual_dataset]: 7.4897e-06 [get_grad_eliminate_]: 7.45989e-06 [virtual_output]: 7.02031e-06 [merge_forward]: 4.58024e-06 [cell_reuse_recompute_pass]: 1.50967e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.56602e-05 [before_grad]: 1.22497e-05 [inplace_validation]: 4.04008e-06 [meta_fg_expand]: 4.60027e-06 [inplace_validation_after_expand]: 4.90015e-06 [flash_sp_send_recv_attached]: 8.29808e-07 [receive_attached]: 6.9011e-07 [after_resolve]: 9.61963e-06 [a_after_grad]: 1.15498e-05 [special_op_eliminate]: 7.03987e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.30157e-07 [auto_monad_grad]: 8.40053e-07 [auto_monad_eliminator]: 1.56197e-05 [cse]: 1.90497e-05 [a_3]: 4.87603e-05 [py_interpret_to_execute_after_opt_a]: 8.62963e-06 [slice_cell_reuse_recomputed_activation]: 1.70991e-06 [rewriter_after_opt_a]: 0.00013138 [convert_after_rewriter]: 8.1202e-06 [order_py_execute_after_rewriter]: 5.22006e-06 [opt_b]: 0.00023719, [1] [Cycle 1]: 0.00023183, [7] [b_1]: 0.00015967 [b_2]: 9.76026e-06 [updatestate_depend_eliminate]: 4.99003e-06 [updatestate_assign_eliminate]: 4.18024e-06 [updatestate_loads_eliminate]: 4.65009e-06 [renormalize]: 3.30154e-07 [cse]: 1.79703e-05 [optimize_parallel_all_gather_comm]: 7.64988e-06 [overlap_param_gather]: 1.0198e-06 [cconv]: 1.66502e-05 [loop_unroll]: 0.00052436 [opt_after_cconv]: 0.00012512, [1] [Cycle 1]: 0.00011961, [7] [c_1]: 5.01801e-05 [parameter_eliminate]: 1.77976e-06 [updatestate_depend_eliminate]: 7.00029e-06 [updatestate_assign_eliminate]: 4.33996e-06 [updatestate_loads_eliminate]: 5.88037e-06 [cse]: 1.96099e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 8.53976e-06 [tuple_transform]: 7.11596e-05, [1] [Cycle 1]: 6.70198e-05, [2] [d_1]: 5.77602e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 1.39e-06 [add_cache_embedding]: 1.06301e-05 [add_recomputation]: 5.00698e-05 [cse_after_recomputation]: 2.54102e-05, [1] [Cycle 1]: 2.11e-05, [1] [cse]: 1.61603e-05 [environ_conv]: 5.81983e-06 [swap_dp_allreduce_reducescatter]: 6.52997e-06 [bias_add_comm_swap]: 1.43005e-06 [label_micro_interleaved_index]: 1.47987e-06 [label_fine_grained_interleaved_index]: 1.01002e-06 [merge_cast_opt]: 6.59842e-07 [slice_recompute_activation]: 8.29808e-07 [micro_interleaved_order_control]: 1.0198e-06 [assign_add_opt]: 6.14021e-06 [ForceFp32Comm]: 4.60073e-07 [remove_cast_before_assign_add]: 5.20144e-07 [full_micro_interleaved_order_control]: 1.16043e-06 [reorder_send_recv_between_fp_bp]: 1.05985e-06 [comm_op_add_attrs]: 4.60073e-07 [add_comm_op_reuse_tag]: 5.29923e-07 [interleave_split_concat_branches]: 4.80097e-07 [interleave_parallel_branches]: 4.39584e-07 [overlap_opt_shard_in_pipeline]: 9.60194e-07 [overlap_opt_shard_grad_in_pipeline]: 1.24983e-06 [control_data_broadcast_order]: 5.39701e-07 [grouped_pairwise_exchange_alltoall]: 5.09899e-07 [offloading_packed_experts]: 5.89993e-07 [overlap_recompute_and_grad_model_parallel]: 1.09011e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.09899e-07 [overlap_recompute_allgather_and_fa_grad]: 4.89876e-07 [overlap_grad_ring_attention]: 1.2801e-06 [overlap_grad_flash_sp]: 1.12499e-05 [begin_end_overlap_inline]: 4.60073e-07 [split_matmul_comm_elemetwise]: 1.26986e-06 [split_layernorm_comm]: 1.22003e-06 [handle_group_info]: 5.19678e-07 [symbol_engine_optimizer]: 8.007e-05, [1] [Cycle 1]: 7.59098e-05, [6] [build]: 3.26009e-06 [elim_shapecalc]: 1.116e-05 [elim_not_effective]: 1.533e-05 [opt_reshape]: 8.34977e-06 [fold_const_symbol]: 1.26599e-05 [renormalize]: 2.19792e-07 [pipeline_parallel_scheduler]: 8.00006e-07 [auto_monad_reorder]: 2.16099e-05 [get_jit_bprop_graph]: 3.39933e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00048725 [distribtued_split]: 3.50596e-05 [validate]: 2.957e-05 [task_emit]: 0.0686045 [execute]: 9.63034e-06 Sums bootstrap : 0.000283s : 0.38% type_inference : 0.002219s : 2.96% auto_monad : 0.000097s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000029s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000518s : 0.69% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000217s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000020s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000391s : 0.52% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000039s : 0.05% optimize.opt_a.cse : 0.000043s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000131s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000524s : 0.70% optimize.opt_after_cconv.c_1 : 0.000050s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000058s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000050s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000000s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000487s : 0.65% distribtued_split : 0.000035s : 0.05% validate : 0.000030s : 0.04% task_emit : 0.068604s : 91.61% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000108 63 3.86% : 0.000004s : 2: substitution.depend_value_elim 2.15% : 0.000002s : 5: substitution.elim_not_effective 2.20% : 0.000002s : 5: substitution.fold_const_symbol 5.46% : 0.000006s : 6: substitution.graph_param_transform 49.71% : 0.000053s : 1: substitution.inline 4.42% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.38% : 0.000004s : 6: substitution.load_eliminater 2.15% : 0.000002s : 2: substitution.reduce_all_const_elim 6.67% : 0.000007s : 10: substitution.remove_not_recompute_node 2.15% : 0.000002s : 2: substitution.replace_old_param 9.41% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.46% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002197 2 90.36% : 0.001985s : 1: type_inference.infer 9.64% : 0.000212s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000052 1 100.00% : 0.000052s : 1: match.inline ------[predicate.] 0.000224 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 0.98% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.28% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.21% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 1.04% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.25% : 0.000001s : 6: predicate.elim_not_effective 0.61% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.99% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.71% : 0.000002s : 12: predicate.incorporate_call_switch 5.54% : 0.000012s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.43% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.14% : 0.000003s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000005s : 38: predicate.load_eliminater 1.21% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.17% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.81% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.74% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.71% : 0.000002s : 6: predicate.mutable_eliminate 0.51% : 0.000001s : 6: predicate.opt_reshape 0.43% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.93% : 0.000002s : 12: predicate.reduce_all_const_elim 1.01% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.50% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.76% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.99% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.90% : 0.000002s : 12: predicate.specialize_transform 1.02% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.37% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.69% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.20% : 0.000009s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.75% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.97% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.63% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.50% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.87% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.84% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000116 4 8.60% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.40% : 0.000106s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086930 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000057s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000110s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000306s : 1: bootstrap 0.02% : 0.000018s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.04% : 0.000038s : 1: distribtued_split 0.58% : 0.000506s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000004s : 1: label_micro_interleaved_index 0.60% : 0.000520s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001054s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000051s : 1: opt.transform.opt_trans_graph 0.03% : 0.000029s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 5.80% : 0.005044s : 1: opt_a 0.15% : 0.000133s : 1: opt_after_cconv 0.28% : 0.000243s : 1: opt_b 7.67% : 0.006663s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000026s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.23% : 0.000203s : 1: renormalize.infer 0.20% : 0.000170s : 1: renormalize.specialize 0.01% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000137s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000082s : 1: symbol_engine_optimizer 78.80% : 0.068502s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.57% : 0.002236s : 1: type_inference 0.07% : 0.000059s : 1: validate Time group info: ------[substitution.] 0.000116 63 4.49% : 0.000005s : 2: substitution.depend_value_elim 1.90% : 0.000002s : 5: substitution.elim_not_effective 1.63% : 0.000002s : 5: substitution.fold_const_symbol 8.99% : 0.000010s : 6: substitution.graph_param_transform 46.99% : 0.000054s : 1: substitution.inline 4.29% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.45% : 0.000004s : 6: substitution.load_eliminater 2.18% : 0.000003s : 2: substitution.reduce_all_const_elim 6.88% : 0.000008s : 10: substitution.remove_not_recompute_node 2.43% : 0.000003s : 2: substitution.replace_old_param 8.66% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.10% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002196 2 90.35% : 0.001984s : 1: type_inference.infer 9.65% : 0.000212s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000224 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.83% : 0.000002s : 13: predicate.addn_zero_filter 0.81% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.24% : 0.000005s : 25: predicate.arithmetic_simplify 0.88% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.78% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.54% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.27% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.91% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.26% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 1.87% : 0.000004s : 31: predicate.environ_get_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.18% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000000s : 6: predicate.fold_const_symbol 0.89% : 0.000002s : 12: predicate.get_grad_eliminate 0.31% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.51% : 0.000012s : 63: predicate.inline 1.08% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000002s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.48% : 0.000006s : 38: predicate.load_eliminater 1.28% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.36% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.72% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.82% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.78% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.07% : 0.000002s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.03% : 0.000002s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.79% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.47% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.38% : 0.000003s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 1.03% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.24% : 0.000009s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.86% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.72% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.61% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.48% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000120 4 9.40% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.60% : 0.000109s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087079 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000054s : 1: add_recomputation 0.01% : 0.000009s : 1: assign_add_opt 0.12% : 0.000108s : 1: auto_monad 0.03% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000307s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.57% : 0.000500s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.61% : 0.000533s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001057s : 80: opt.transform.opt_a 0.06% : 0.000048s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000044s : 4: opt.transform.symbol_engine_opt 5.79% : 0.005044s : 1: opt_a 0.15% : 0.000129s : 1: opt_after_cconv 0.28% : 0.000240s : 1: opt_b 7.65% : 0.006664s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000014s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.01% : 0.000012s : 1: remove_dup_value 0.24% : 0.000210s : 1: renormalize.infer 0.20% : 0.000175s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000137s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000083s : 1: symbol_engine_optimizer 78.81% : 0.068630s : 1: task_emit 0.09% : 0.000074s : 1: tuple_transform 2.57% : 0.002235s : 1: type_inference 0.07% : 0.000061s : 1: validate TotalTime = 0.0791354, [21] [bootstrap]: 0.00030468 [type_inference]: 0.00253325 [auto_monad]: 0.00012929 [graph_reusing]: 2.3297e-06 [inline]: 1.54041e-06 [parallel-infer-symbol]: 2.03028e-06 [pre_auto_parallel]: 2.50302e-05 [insert-virtual-dataset]: 2.7502e-06 [parallel-infer-symbol-second]: 4.30271e-07 [dataset_repeat_opt]: 9.4017e-07 [pipeline_split]: 1.53016e-06 [optimize]: 0.00695586, [52] [py_interpret_to_execute]: 1.39303e-05 [rewriter_before_opt_a]: 3.54899e-05 [opt_a]: 0.00523536, [2] [Cycle 1]: 0.00150186, [43] [expand_dump_flag]: 3.39979e-06 [switch_simplify]: 2.85702e-05 [loop_unroll]: 1.32299e-05 [a_1]: 0.00033754 [recompute_prepare]: 9.02032e-06 [updatestate_depend_eliminate]: 7.96979e-06 [updatestate_assign_eliminate]: 5.73974e-06 [updatestate_loads_eliminate]: 7.05989e-06 [parameter_eliminate]: 3.53018e-06 [a_2]: 0.00011571 [accelerated_algorithm]: 8.52998e-06 [shard]: 1.8999e-06 [meta_shard_fg_expand]: 3.93996e-06 [shard_inline]: 8.12998e-06 [auto_parallel]: 1.18399e-05 [parallel]: 7.35978e-06 [flash_sp]: 1.13798e-05 [merge_comm]: 7.7202e-06 [allreduce_fusion]: 5.85988e-06 [matmul_add_comm_reduction]: 1.10003e-05 [allreduce_slice_to_reducescatter]: 5.10365e-07 [virtual_shard_identity]: 1.10799e-05 [virtual_dataset]: 7.83987e-06 [get_grad_eliminate_]: 7.29039e-06 [virtual_output]: 7.66013e-06 [merge_forward]: 5.22984e-06 [cell_reuse_recompute_pass]: 1.72993e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.75401e-05 [before_grad]: 1.31899e-05 [inplace_validation]: 5.01983e-06 [meta_fg_expand]: 5.13997e-06 [inplace_validation_after_expand]: 6.04009e-06 [flash_sp_send_recv_attached]: 4.38979e-06 [receive_attached]: 2.42982e-06 [after_resolve]: 1.24602e-05 [a_after_grad]: 1.215e-05 [special_op_eliminate]: 7.57026e-06 [renormalize]: 0.00042363 [add_forward_monad_depend]: 2.92016e-06 [auto_monad_grad]: 1.32993e-06 [auto_monad_eliminator]: 3.06698e-05 [cse]: 3.32501e-05 [a_3]: 5.77499e-05 [Cycle 2]: 0.00076462, [43] [expand_dump_flag]: 1.05007e-06 [switch_simplify]: 9.35001e-06 [loop_unroll]: 7.83009e-06 [a_1]: 0.00019952 [recompute_prepare]: 7.29971e-06 [updatestate_depend_eliminate]: 5.68992e-06 [updatestate_assign_eliminate]: 4.92996e-06 [updatestate_loads_eliminate]: 5.34998e-06 [parameter_eliminate]: 1.23028e-06 [a_2]: 0.00010257 [accelerated_algorithm]: 8.01962e-06 [shard]: 1.15996e-06 [meta_shard_fg_expand]: 2.7502e-06 [shard_inline]: 7.7798e-06 [auto_parallel]: 1.06599e-05 [parallel]: 3.69037e-06 [flash_sp]: 2.96999e-06 [merge_comm]: 6.05965e-06 [allreduce_fusion]: 5.0799e-06 [matmul_add_comm_reduction]: 8.04011e-06 [allreduce_slice_to_reducescatter]: 3.19909e-07 [virtual_shard_identity]: 8.48016e-06 [virtual_dataset]: 7.3202e-06 [get_grad_eliminate_]: 7.07014e-06 [virtual_output]: 7.09016e-06 [merge_forward]: 4.55976e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.491e-05 [before_grad]: 1.22003e-05 [inplace_validation]: 4.36977e-06 [meta_fg_expand]: 4.74043e-06 [inplace_validation_after_expand]: 5.34998e-06 [flash_sp_send_recv_attached]: 8.801e-07 [receive_attached]: 7.19912e-07 [after_resolve]: 9.25967e-06 [a_after_grad]: 1.13598e-05 [special_op_eliminate]: 7.27968e-06 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 7.69738e-07 [auto_monad_grad]: 1.11992e-06 [auto_monad_eliminator]: 1.74502e-05 [cse]: 1.984e-05 [a_3]: 4.77401e-05 [py_interpret_to_execute_after_opt_a]: 9.35979e-06 [slice_cell_reuse_recomputed_activation]: 2.21003e-06 [rewriter_after_opt_a]: 0.00021162 [convert_after_rewriter]: 1.02799e-05 [order_py_execute_after_rewriter]: 5.57024e-06 [opt_b]: 0.00024375, [1] [Cycle 1]: 0.00023748, [7] [b_1]: 0.0001605 [b_2]: 1.00099e-05 [updatestate_depend_eliminate]: 5.46966e-06 [updatestate_assign_eliminate]: 4.40003e-06 [updatestate_loads_eliminate]: 5.26989e-06 [renormalize]: 3.1013e-07 [cse]: 2.03201e-05 [optimize_parallel_all_gather_comm]: 7.75e-06 [overlap_param_gather]: 7.30157e-07 [cconv]: 1.66604e-05 [loop_unroll]: 0.00047306 [opt_after_cconv]: 0.00013234, [1] [Cycle 1]: 0.00012613, [7] [c_1]: 5.14202e-05 [parameter_eliminate]: 2.60957e-06 [updatestate_depend_eliminate]: 7.77002e-06 [updatestate_assign_eliminate]: 4.71016e-06 [updatestate_loads_eliminate]: 5.41983e-06 [cse]: 2.21701e-05 [renormalize]: 4.10248e-07 [remove_dup_value]: 9.55956e-06 [tuple_transform]: 6.74701e-05, [1] [Cycle 1]: 6.29602e-05, [2] [d_1]: 5.404e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.43005e-06 [add_cache_embedding]: 1.14599e-05 [add_recomputation]: 6.07898e-05 [cse_after_recomputation]: 2.71597e-05, [1] [Cycle 1]: 2.21599e-05, [1] [cse]: 1.689e-05 [environ_conv]: 7.76025e-06 [swap_dp_allreduce_reducescatter]: 1.10599e-05 [bias_add_comm_swap]: 2.31015e-06 [label_micro_interleaved_index]: 2.35019e-06 [label_fine_grained_interleaved_index]: 2.04984e-06 [merge_cast_opt]: 1.14972e-06 [slice_recompute_activation]: 1.76998e-06 [micro_interleaved_order_control]: 1.89012e-06 [assign_add_opt]: 7.64988e-06 [ForceFp32Comm]: 8.49832e-07 [remove_cast_before_assign_add]: 9.49949e-07 [full_micro_interleaved_order_control]: 2.00979e-06 [reorder_send_recv_between_fp_bp]: 2.12993e-06 [comm_op_add_attrs]: 9.99775e-07 [add_comm_op_reuse_tag]: 1.10967e-06 [interleave_split_concat_branches]: 8.79634e-07 [interleave_parallel_branches]: 8.30274e-07 [overlap_opt_shard_in_pipeline]: 1.0198e-06 [overlap_opt_shard_grad_in_pipeline]: 3.05008e-06 [control_data_broadcast_order]: 1.08033e-06 [grouped_pairwise_exchange_alltoall]: 1.38022e-06 [offloading_packed_experts]: 1.32015e-06 [overlap_recompute_and_grad_model_parallel]: 2.17976e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.29691e-07 [overlap_recompute_allgather_and_fa_grad]: 1.10036e-06 [overlap_grad_ring_attention]: 1.86963e-06 [overlap_grad_flash_sp]: 1.44201e-05 [begin_end_overlap_inline]: 7.59959e-07 [split_matmul_comm_elemetwise]: 2.31015e-06 [split_layernorm_comm]: 1.64006e-06 [handle_group_info]: 9.70438e-07 [symbol_engine_optimizer]: 8.49999e-05, [1] [Cycle 1]: 8.03401e-05, [6] [build]: 4.34043e-06 [elim_shapecalc]: 1.12299e-05 [elim_not_effective]: 1.61002e-05 [opt_reshape]: 8.89041e-06 [fold_const_symbol]: 1.375e-05 [renormalize]: 3.1013e-07 [pipeline_parallel_scheduler]: 1.86032e-06 [auto_monad_reorder]: 3.02996e-05 [get_jit_bprop_graph]: 5.09899e-07 [rewriter_after_jit_bprop_graph]: 2.39816e-07 [eliminate_special_op_node]: 0.00048564 [distribtued_split]: 3.56701e-05 [validate]: 3.28198e-05 [task_emit]: 0.0683345 [execute]: 1.08699e-05 Sums bootstrap : 0.000305s : 0.41% type_inference : 0.002533s : 3.37% auto_monad : 0.000129s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000537s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000218s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000014s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000048s : 0.06% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000212s : 0.28% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000473s : 0.63% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000011s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000486s : 0.65% distribtued_split : 0.000036s : 0.05% validate : 0.000033s : 0.04% task_emit : 0.068335s : 90.87% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000127 63 3.85% : 0.000005s : 2: substitution.depend_value_elim 1.98% : 0.000003s : 5: substitution.elim_not_effective 2.16% : 0.000003s : 5: substitution.fold_const_symbol 5.02% : 0.000006s : 6: substitution.graph_param_transform 52.11% : 0.000066s : 1: substitution.inline 3.92% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.38% : 0.000004s : 6: substitution.load_eliminater 1.89% : 0.000002s : 2: substitution.reduce_all_const_elim 7.18% : 0.000009s : 10: substitution.remove_not_recompute_node 2.10% : 0.000003s : 2: substitution.replace_old_param 8.70% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.71% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002504 2 89.26% : 0.002236s : 1: type_inference.infer 10.74% : 0.000269s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000231 1420 0.90% : 0.000002s : 13: predicate.accumulaten_eliminater 1.03% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.74% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.16% : 0.000005s : 25: predicate.arithmetic_simplify 0.80% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.36% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.99% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.50% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.09% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 1.83% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.78% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.72% : 0.000013s : 63: predicate.inline 1.07% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.11% : 0.000003s : 12: predicate.less_batch_normalization 1.67% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.33% : 0.000005s : 38: predicate.load_eliminater 1.37% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.33% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.91% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.74% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.43% : 0.000001s : 6: predicate.parallel_virtual_node 1.20% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000003s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.29% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.36% : 0.000003s : 18: predicate.special_op_eliminate 0.90% : 0.000002s : 12: predicate.specialize_transform 1.24% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.02% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.18% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.48% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.87% : 0.000002s : 13: predicate.transpose_eliminate 1.94% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.83% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.61% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.39% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.99% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000143 4 7.58% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.42% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087882 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000142s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000332s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000044s : 1: distribtued_split 0.57% : 0.000499s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000482s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001086s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000036s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 5.96% : 0.005239s : 1: opt_a 0.16% : 0.000137s : 1: opt_after_cconv 0.28% : 0.000247s : 1: opt_b 7.92% : 0.006963s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.26% : 0.000229s : 1: renormalize.infer 0.22% : 0.000190s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.25% : 0.000218s : 1: rewriter_after_opt_a 0.05% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.02% : 0.000014s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000088s : 1: symbol_engine_optimizer 77.79% : 0.068365s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.90% : 0.002552s : 1: type_inference 0.08% : 0.000066s : 1: validate TotalTime = 0.0799141, [21] [bootstrap]: 0.00030448 [type_inference]: 0.00253329 [auto_monad]: 0.00013081 [graph_reusing]: 2.10013e-06 [inline]: 1.32993e-06 [parallel-infer-symbol]: 2.30968e-06 [pre_auto_parallel]: 2.58302e-05 [insert-virtual-dataset]: 3.2899e-06 [parallel-infer-symbol-second]: 3.60422e-07 [dataset_repeat_opt]: 1.40024e-06 [pipeline_split]: 1.16043e-06 [optimize]: 0.00696015, [52] [py_interpret_to_execute]: 1.407e-05 [rewriter_before_opt_a]: 3.58401e-05 [opt_a]: 0.00524021, [2] [Cycle 1]: 0.00150297, [43] [expand_dump_flag]: 3.64007e-06 [switch_simplify]: 2.86601e-05 [loop_unroll]: 1.32397e-05 [a_1]: 0.00033639 [recompute_prepare]: 8.38982e-06 [updatestate_depend_eliminate]: 8.80007e-06 [updatestate_assign_eliminate]: 6.55977e-06 [updatestate_loads_eliminate]: 6.59004e-06 [parameter_eliminate]: 3.40026e-06 [a_2]: 0.00011495 [accelerated_algorithm]: 8.67993e-06 [shard]: 1.91014e-06 [meta_shard_fg_expand]: 3.49991e-06 [shard_inline]: 8.27992e-06 [auto_parallel]: 1.19898e-05 [parallel]: 7.58003e-06 [flash_sp]: 1.16299e-05 [merge_comm]: 7.32997e-06 [allreduce_fusion]: 5.5898e-06 [matmul_add_comm_reduction]: 1.10101e-05 [allreduce_slice_to_reducescatter]: 5.80214e-07 [virtual_shard_identity]: 9.6797e-06 [virtual_dataset]: 7.83987e-06 [get_grad_eliminate_]: 8.11974e-06 [virtual_output]: 7.49994e-06 [merge_forward]: 5.81983e-06 [cell_reuse_recompute_pass]: 1.90036e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.653e-05 [before_grad]: 1.371e-05 [inplace_validation]: 5.41005e-06 [meta_fg_expand]: 5.3402e-06 [inplace_validation_after_expand]: 6.29993e-06 [flash_sp_send_recv_attached]: 4.86011e-06 [receive_attached]: 2.88989e-06 [after_resolve]: 1.09603e-05 [a_after_grad]: 1.24103e-05 [special_op_eliminate]: 7.45011e-06 [renormalize]: 0.00042362 [add_forward_monad_depend]: 3.2899e-06 [auto_monad_grad]: 2.1602e-06 [auto_monad_eliminator]: 3.22904e-05 [cse]: 3.30401e-05 [a_3]: 5.94598e-05 [Cycle 2]: 0.00076885, [43] [expand_dump_flag]: 1.22981e-06 [switch_simplify]: 9.31975e-06 [loop_unroll]: 7.53999e-06 [a_1]: 0.0001992 [recompute_prepare]: 7.30995e-06 [updatestate_depend_eliminate]: 6.04009e-06 [updatestate_assign_eliminate]: 4.71994e-06 [updatestate_loads_eliminate]: 5.30994e-06 [parameter_eliminate]: 1.17021e-06 [a_2]: 0.00010257 [accelerated_algorithm]: 8.08015e-06 [shard]: 1.13016e-06 [meta_shard_fg_expand]: 2.34973e-06 [shard_inline]: 7.60984e-06 [auto_parallel]: 1.10799e-05 [parallel]: 3.62005e-06 [flash_sp]: 2.59979e-06 [merge_comm]: 5.9302e-06 [allreduce_fusion]: 5.57024e-06 [matmul_add_comm_reduction]: 8.11042e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.5202e-06 [virtual_dataset]: 7.3202e-06 [get_grad_eliminate_]: 7.47014e-06 [virtual_output]: 7.31973e-06 [merge_forward]: 4.80004e-06 [cell_reuse_recompute_pass]: 1.72015e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.508e-05 [before_grad]: 1.24197e-05 [inplace_validation]: 4.19002e-06 [meta_fg_expand]: 4.71994e-06 [inplace_validation_after_expand]: 5.03007e-06 [flash_sp_send_recv_attached]: 9.19681e-07 [receive_attached]: 7.70204e-07 [after_resolve]: 9.77982e-06 TotalTime = 0.0799458, [21] [bootstrap]: 0.00033483 [type_inference]: 0.00246593 [auto_monad]: 0.00011877 [graph_reusing]: 2.33995e-06 [inline]: 1.34995e-06 [parallel-infer-symbol]: 1.8198e-06 [pre_auto_parallel]: 2.53501e-05 [insert-virtual-dataset]: 2.49036e-06 [parallel-infer-symbol-second]: 3.59956e-07 [dataset_repeat_opt]: 1.64006e-06 [pipeline_split]: 1.51014e-06 [optimize]: 0.0070512, [52] [py_interpret_to_execute]: 1.35698e-05 [rewriter_before_opt_a]: 3.25399e-05 [opt_a]: 0.00532063, [2] [Cycle 1]: 0.00150472, [43] [expand_dump_flag]: 3.23029e-06 [switch_simplify]: 2.902e-05 [loop_unroll]: 1.29901e-05 [a_1]: 0.00035605 [recompute_prepare]: 8.57003e-06 [updatestate_depend_eliminate]: 8.31997e-06 [updatestate_assign_eliminate]: 6.04987e-06 [updatestate_loads_eliminate]: 6.52997e-06 [parameter_eliminate]: 3.2098e-06 [a_2]: 0.00011825 [accelerated_algorithm]: 8.25012e-06 [shard]: 2.08011e-06 [meta_shard_fg_expand]: 3.75975e-06 [shard_inline]: 8.44011e-06 [auto_parallel]: 1.20401e-05 [parallel]: 6.98026e-06 [flash_sp]: 1.04704e-05 [merge_comm]: 7.68015e-06 [allreduce_fusion]: 5.24987e-06 [matmul_add_comm_reduction]: 1.03801e-05 [allreduce_slice_to_reducescatter]: 4.49829e-07 [virtual_shard_identity]: 9.2499e-06 [virtual_dataset]: 8.33999e-06 [get_grad_eliminate_]: 7.42031e-06 [virtual_output]: 7.41007e-06 [merge_forward]: 5.91995e-06 [cell_reuse_recompute_pass]: 1.64984e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.66004e-05 [before_grad]: 1.34902e-05 [inplace_validation]: 4.81028e-06 [meta_fg_expand]: 5.1898e-06 [inplace_validation_after_expand]: 5.32996e-06 [flash_sp_send_recv_attached]: 4.11971e-06 [receive_attached]: 2.27988e-06 [after_resolve]: 1.11801e-05 [a_after_grad]: 1.22101e-05 [special_op_eliminate]: 8.32975e-06 [renormalize]: 0.00042227 [add_forward_monad_depend]: 3.40026e-06 [auto_monad_grad]: 1.58977e-06 [auto_monad_eliminator]: 2.88403e-05 [cse]: 2.76398e-05 [a_3]: 5.762e-05 [Cycle 2]: 0.00077058, [43] [expand_dump_flag]: 1.09989e-06 [switch_simplify]: 9.13022e-06 [loop_unroll]: 7.7798e-06 [a_1]: 0.00020056 [recompute_prepare]: 7.46967e-06 [updatestate_depend_eliminate]: 6.27991e-06 [updatestate_assign_eliminate]: 5.03985e-06 [updatestate_loads_eliminate]: 5.44032e-06 [parameter_eliminate]: 1.2801e-06 [a_2]: 0.00010385 [accelerated_algorithm]: 8.29017e-06 [shard]: 1.30991e-06 [meta_shard_fg_expand]: 2.58023e-06 [shard_inline]: 8.00006e-06 [auto_parallel]: 1.08904e-05 [parallel]: 3.70014e-06 [flash_sp]: 3.34019e-06 [merge_comm]: 5.8501e-06 [allreduce_fusion]: 5.4203e-06 [matmul_add_comm_reduction]: 7.96979e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 8.40006e-06 [virtual_dataset]: 7.38027e-06 [get_grad_eliminate_]: 7.43009e-06 [virtual_output]: 7.11018e-06 [merge_forward]: 4.61005e-06 [cell_reuse_recompute_pass]: 1.95997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55801e-05 [before_grad]: 1.19796e-05 [inplace_validation]: 4.21982e-06 [meta_fg_expand]: 4.79026e-06 [inplace_validation_after_expand]: 5.1097e-06 [flash_sp_send_recv_attached]: 9.49949e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 9.43989e-06 [a_after_grad]: 1.17598e-05 [special_op_eliminate]: 7.28993e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.11014e-06 [auto_monad_grad]: 1.18976e-06 [auto_monad_eliminator]: 1.85599e-05 [cse]: 1.94199e-05 [a_3]: 4.86602e-05 [py_interpret_to_execute_after_opt_a]: 8.80985e-06 [slice_cell_reuse_recomputed_activation]: 1.18976e-06 [rewriter_after_opt_a]: 0.00020653 [convert_after_rewriter]: 9.75002e-06 [order_py_execute_after_rewriter]: 6.23008e-06 [opt_b]: 0.00023968, [1] [Cycle 1]: 0.00023434, [7] [b_1]: 0.00015996 [b_2]: 9.54978e-06 [updatestate_depend_eliminate]: 5.81006e-06 [updatestate_assign_eliminate]: 4.54998e-06 [updatestate_loads_eliminate]: 5.20004e-06 [renormalize]: 2.10013e-07 [cse]: 1.89799e-05 [optimize_parallel_all_gather_comm]: 8.40984e-06 [overlap_param_gather]: 1.53994e-06 [cconv]: 2.30698e-05 [loop_unroll]: 0.00046774 [opt_after_cconv]: 0.0001323, [1] [Cycle 1]: 0.00012635, [7] [c_1]: 5.27203e-05 [parameter_eliminate]: 2.20025e-06 [updatestate_depend_eliminate]: 7.85012e-06 [updatestate_assign_eliminate]: 4.45032e-06 [updatestate_loads_eliminate]: 5.56e-06 [cse]: 2.13203e-05 [renormalize]: 4.29805e-07 [remove_dup_value]: 1.21598e-05 [tuple_transform]: 6.81398e-05, [1] [Cycle 1]: 6.34799e-05, [2] [d_1]: 5.42304e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.79978e-06 [add_cache_embedding]: 1.33803e-05 [add_recomputation]: 6.12098e-05 [cse_after_recomputation]: 4.71403e-05, [1] [Cycle 1]: 4.25e-05, [1] [cse]: 3.63002e-05 [environ_conv]: 6.4699e-06 [swap_dp_allreduce_reducescatter]: 7.37002e-06 [bias_add_comm_swap]: 1.30991e-06 [label_micro_interleaved_index]: 1.26008e-06 [label_fine_grained_interleaved_index]: 1.04029e-06 [merge_cast_opt]: 5.69969e-07 [slice_recompute_activation]: 4.88991e-06 [micro_interleaved_order_control]: 8.69855e-07 [assign_add_opt]: 6.42985e-06 [ForceFp32Comm]: 5.09899e-07 [remove_cast_before_assign_add]: 4.29805e-07 [full_micro_interleaved_order_control]: 9.19681e-07 [reorder_send_recv_between_fp_bp]: 8.09785e-07 [comm_op_add_attrs]: 4.80097e-07 [add_comm_op_reuse_tag]: 5.40167e-07 [interleave_split_concat_branches]: 4.49829e-07 [interleave_parallel_branches]: 4.80097e-07 [overlap_opt_shard_in_pipeline]: 6.10016e-07 [overlap_opt_shard_grad_in_pipeline]: 8.40053e-07 [control_data_broadcast_order]: 5.29923e-07 [grouped_pairwise_exchange_alltoall]: 5.49946e-07 [offloading_packed_experts]: 4.90341e-07 [overlap_recompute_and_grad_model_parallel]: 8.09785e-07 [overlap_grad_matmul_and_grad_allreduce]: 4.30271e-07 [overlap_recompute_allgather_and_fa_grad]: 4.4005e-07 [overlap_grad_ring_attention]: 9.49949e-07 [overlap_grad_flash_sp]: 1.15e-05 [begin_end_overlap_inline]: 3.7998e-07 [split_matmul_comm_elemetwise]: 9.29926e-07 [split_layernorm_comm]: 7.70204e-07 [handle_group_info]: 4.20026e-07 [symbol_engine_optimizer]: 8.944e-05, [1] [Cycle 1]: 8.45902e-05, [6] [build]: 4.05032e-06 [elim_shapecalc]: 1.22203e-05 [elim_not_effective]: 2.09403e-05 [opt_reshape]: 8.71019e-06 [fold_const_symbol]: 1.31503e-05 [renormalize]: 2.59839e-07 [pipeline_parallel_scheduler]: 9.09902e-07 [auto_monad_reorder]: 2.39797e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00048381 [distribtued_split]: 3.85703e-05 [validate]: 3.296e-05 [task_emit]: 0.0691273 [execute]: 7.27037e-06 Sums bootstrap : 0.000304s : 0.40% type_inference : 0.002533s : 3.33% auto_mon [a_after_grad]: 1.18702e-05 [special_op_eliminate]: 7.28993e-06 [renormalize]: 1.09896e-07 [add_forward_monad_depend]: 8.00006e-07 [auto_monad_grad]: 1.17021e-06 [auto_monad_eliminator]: 1.81501e-05 [cse]: 1.81901e-05 [a_3]: 4.67999e-05 [py_interpret_to_execute_after_opt_a]: 8.74978e-06 [slice_cell_reuse_recomputed_activation]: 2.63983e-06 [rewriter_after_opt_a]: 0.00020617 [convert_after_rewriter]: 9.83989e-06 [order_py_execute_after_rewriter]: 6.23986e-06 [opt_b]: 0.00024446, [1] [Cycle 1]: 0.00023849, [7] [b_1]: 0.00016281 [b_2]: 1.00397e-05 [updatestate_depend_eliminate]: 5.34998e-06 [updatestate_assign_eliminate]: 4.40003e-06 [updatestate_loads_eliminate]: 5.37001e-06 [renormalize]: 3.19909e-07 [cse]: 1.89599e-05 [optimize_parallel_all_gather_comm]: 8.6599e-06 [overlap_param_gather]: 1.09011e-06 [cconv]: 2.209e-05 [loop_unroll]: 0.00049393 [opt_after_cconv]: 0.00013186, [1] [Cycle 1]: 0.00012588, [7] [c_1]: 5.18099e-05 [parameter_eliminate]: 2.49036e-06 [updatestate_depend_eliminate]: 8.32975e-06 [updatestate_assign_eliminate]: 4.65009e-06 [updatestate_loads_eliminate]: 5.34998e-06 [cse]: 2.12099e-05 [renormalize]: 3.50177e-07 [remove_dup_value]: 1.28802e-05 [tuple_transform]: 6.73598e-05, [1] [Cycle 1]: 6.28401e-05, [2] [d_1]: 5.38998e-05 [renormalize]: 1.60187e-07 [partial_unused_args_eliminate]: 2.41958e-06 [add_cache_embedding]: 1.331e-05 [add_recomputation]: 6.15898e-05 [cse_after_recomputation]: 2.43201e-05, [1] [Cycle 1]: 1.98102e-05, [1] [cse]: 1.53002e-05 [environ_conv]: 7.58003e-06 [swap_dp_allreduce_reducescatter]: 7.83009e-06 [bias_add_comm_swap]: 2.19001e-06 [label_micro_interleaved_index]: 1.60979e-06 [label_fine_grained_interleaved_index]: 1.74996e-06 [merge_cast_opt]: 1.29035e-06 [slice_recompute_activation]: 1.62981e-06 [micro_interleaved_order_control]: 1.5297e-06 [assign_add_opt]: 8.04011e-06 [ForceFp32Comm]: 7.70204e-07 [remove_cast_before_assign_add]: 6.79865e-07 [full_micro_interleaved_order_control]: 1.81003e-06 [reorder_send_recv_between_fp_bp]: 2.12993e-06 [comm_op_add_attrs]: 1.15996e-06 [add_comm_op_reuse_tag]: 1.12969e-06 [interleave_split_concat_branches]: 7.69738e-07 [interleave_parallel_branches]: 6.10016e-07 [overlap_opt_shard_in_pipeline]: 1.17999e-06 [overlap_opt_shard_grad_in_pipeline]: 2.00002e-06 [control_data_broadcast_order]: 1.09989e-06 [grouped_pairwise_exchange_alltoall]: 1.53016e-06 [offloading_packed_experts]: 1.05985e-06 [overlap_recompute_and_grad_model_parallel]: 1.77976e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.30391e-07 [overlap_recompute_allgather_and_fa_grad]: 8.09785e-07 [overlap_grad_ring_attention]: 1.68011e-06 [overlap_grad_flash_sp]: 1.48104e-05 [begin_end_overlap_inline]: 7.10133e-07 [split_matmul_comm_elemetwise]: 1.72015e-06 [split_layernorm_comm]: 1.61957e-06 [handle_group_info]: 8.801e-07 [symbol_engine_optimizer]: 8.23201e-05, [1] [Cycle 1]: 7.78199e-05, [6] [build]: 3.49013e-06 [elim_shapecalc]: 1.16997e-05 [elim_not_effective]: 1.53799e-05 [opt_reshape]: 8.61986e-06 [fold_const_symbol]: 1.38599e-05 [renormalize]: 1.69966e-07 [pipeline_parallel_scheduler]: 1.92039e-06 [auto_monad_reorder]: 2.86899e-05 [get_jit_bprop_graph]: 5.0012e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00053528 [distribtued_split]: 4.03998e-05 [validate]: 3.54601e-05 [task_emit]: 0.069048 [execute]: 1.11996e-05 Sums bootstrap : 0.000335s : 0.44% type_inference : 0.002466s : 3.25% ad : 0.000131s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000536s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000218s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000207s : 0.27% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : auto_monad : 0.000119s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000557s : 0.73% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000422s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000047s : 0.06% optimize.opt_a.cse : 0.000046s : 0.06% optimize.opt_a.a_3 : 0.000104s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000206s : 0.27% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000468s : 0.62% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000036s : 0.05% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000005s : 0.01% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000000s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000000s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000000s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000021s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_opti : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000494s : 0.65% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000015s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbolmizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000484s : 0.64% distribtued_split : 0.000039s : 0.05% validate : 0.000033s : 0.04% task_emit : 0.069127s : 90.97% execute : 0.000007s : 0.01% _engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000535s : 0.70% distribtued_split : 0.000040s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.069048s : 90.92% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000129 63 5.10% : 0.000007s : 2: substitution.depend_value_elim 1.93% : 0.000002s : 5: substitution.elim_not_effective 1.93% : 0.000002s : 5: substitution.fold_const_symbol 5.40% : 0.000007s : 6: substitution.graph_param_transform 49.52% : 0.000064s : 1: substitution.inline 4.09% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.38% : 0.000004s : 6: substitution.load_eliminater 2.67% : 0.000003s : 2: substitution.reduce_all_const_elim 6.24% : 0.000008s : 10: substitution.remove_not_recompute_node 2.36% : 0.000003s : 2: substitution.replace_old_param 9.31% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.06% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002437 2 89.39% : 0.002179s : 1: type_inference.infer 10.61% : 0.000259s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000228 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.26% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.24% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.60% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.93% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.36% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.25% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.73% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.13% : 0.000003s : 12: predicate.less_batch_normalization 1.81% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.57% : 0.000006s : 38: predicate.load_eliminater 1.26% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.83% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000002s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.79% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.41% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 0.97% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.30% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.89% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.78% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.75% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.32% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.43% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.95% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.56% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000143 4 10.62% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.38% : 0.000128s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088806 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000132s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.40% : 0.000358s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000027s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.62% : 0.000549s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000504s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001105s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.00% : 0.005324s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.28% : 0.000247s : 1: opt_b 7.95% : 0.007059s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000231s : 1: renormalize.infer 0.21% : 0.000186s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.24% : 0.000212s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 77.78% : 0.069074s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.80% : 0.002485s : 1: type_inference 0.08% : 0.000070s : 1: validate Time group info: ------[substitution.] 0.000128 63 5.43% : 0.000007s : 2: substitution.depend_value_elim 1.68% : 0.000002s : 5: substitution.elim_not_effective 1.42% : 0.000002s : 5: substitution.fold_const_symbol 5.31% : 0.000007s : 6: substitution.graph_param_transform 51.45% : 0.000066s : 1: substitution.inline 4.10% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.26% : 0.000004s : 6: substitution.load_eliminater 2.33% : 0.000003s : 2: substitution.reduce_all_const_elim 5.95% : 0.000008s : 10: substitution.remove_not_recompute_node 2.36% : 0.000003s : 2: substitution.replace_old_param 8.82% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.89% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002505 2 89.26% : 0.002236s : 1: type_inference.infer 10.74% : 0.000269s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000226 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 1.12% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.80% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.11% : 0.000005s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.24% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.76% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.80% : 0.000002s : 13: predicate.dict_get_item_eliminator 1.00% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000002s : 19: predicate.environ_get_depend_swap 1.93% : 0.000004s : 31: predicate.environ_get_eliminate 1.24% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.33% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.90% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.68% : 0.000013s : 63: predicate.inline 1.12% : 0.000003s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.12% : 0.000003s : 12: predicate.less_batch_normalization 1.67% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.17% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.25% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.89% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.80% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.70% : 0.000002s : 13: predicate.minmaximum_grad 0.82% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.92% : 0.000002s : 12: predicate.reduce_all_const_elim 1.02% : 0.000002s : 13: predicate.reduce_eliminate 0.50% : 0.000001s : 12: predicate.remove_not_recompute_node 1.18% : 0.000003s : 25: predicate.replace_applicator 0.50% : 0.000001s : 12: predicate.replace_old_param 0.33% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.86% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.55% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.84% : 0.000002s : 12: predicate.shard_identity_eliminate 1.32% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 0.98% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.92% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.69% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.54% : 0.000010s : 43: predicate.switch_simplify 0.91% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.84% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.58% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.42% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.49% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000148 4 10.92% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.08% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088663 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.16% : 0.000143s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.37% : 0.000331s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.02% : 0.000013s : 1: convert_after_rewriter 0.06% : 0.000050s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000046s : 1: distribtued_split 0.56% : 0.000497s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000476s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001083s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.06% : 0.000051s : 4: opt.transform.symbol_engine_opt 5.91% : 0.005244s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.27% : 0.000243s : 1: opt_b 7.86% : 0.006970s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.04% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.26% : 0.000228s : 1: renormalize.infer 0.21% : 0.000190s : 1: renormalize.specialize 0.00% : 0.000003s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.24% : 0.000212s : 1: rewriter_after_opt_a 0.05% : 0.000040s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.02% : 0.000017s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 77.99% : 0.069149s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.88% : 0.002551s : 1: type_inference 0.07% : 0.000066s : 1: validate TotalTime = 0.0805616, [21] [bootstrap]: 0.0003166 [type_inference]: 0.00249675 [auto_monad]: 0.00012316 [graph_reusing]: 2.02004e-06 [inline]: 1.53016e-06 [parallel-infer-symbol]: 2.00002e-06 [pre_auto_parallel]: 2.45697e-05 [insert-virtual-dataset]: 2.92994e-06 [parallel-infer-symbol-second]: 3.69735e-07 [dataset_repeat_opt]: 1.32015e-06 [pipeline_split]: 1.56043e-06 [optimize]: 0.00698937, [52] [py_interpret_to_execute]: 1.60602e-05 [rewriter_before_opt_a]: 3.58401e-05 [opt_a]: 0.00527145, [2] [Cycle 1]: 0.00151767, [43] [expand_dump_flag]: 3.25032e-06 [switch_simplify]: 2.91197e-05 [loop_unroll]: 1.314e-05 [a_1]: 0.00033724 [recompute_prepare]: 8.7698e-06 [updatestate_depend_eliminate]: 8.51974e-06 [updatestate_assign_eliminate]: 5.68014e-06 [updatestate_loads_eliminate]: 7.51019e-06 [parameter_eliminate]: 3.00957e-06 [a_2]: 0.00011861 [accelerated_algorithm]: 8.2301e-06 [shard]: 2.10013e-06 [meta_shard_fg_expand]: 4.31994e-06 [shard_inline]: 8.02986e-06 [auto_parallel]: 1.16797e-05 [parallel]: 7.70995e-06 [flash_sp]: 1.02702e-05 [merge_comm]: 7.75e-06 [allreduce_fusion]: 5.41983e-06 [matmul_add_comm_reduction]: 1.04299e-05 [allreduce_slice_to_reducescatter]: 4.49829e-07 [virtual_shard_identity]: 9.45991e-06 [virtual_dataset]: 8.26968e-06 [get_grad_eliminate_]: 7.24988e-06 [virtual_output]: 7.54977e-06 [merge_forward]: 7.24988e-06 [cell_reuse_recompute_pass]: 2.00979e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.74302e-05 [before_grad]: 1.44802e-05 [inplace_validation]: 5.64009e-06 [meta_fg_expand]: 5.56977e-06 [inplace_validation_after_expand]: 6.50994e-06 [flash_sp_send_recv_attached]: 5.07012e-06 [receive_attached]: 2.68966e-06 [after_resolve]: 1.15e-05 [a_after_grad]: 1.22199e-05 [special_op_eliminate]: 7.91997e-06 [renormalize]: 0.00043196 [add_forward_monad_depend]: 3.5502e-06 [auto_monad_grad]: 1.62004e-06 [auto_monad_eliminator]: 3.11499e-05 [cse]: 2.89702e-05 [a_3]: 5.73797e-05 [Cycle 2]: 0.00077856, [43] [expand_dump_flag]: 1.10036e-06 [switch_simplify]: 9.33977e-06 [loop_unroll]: 7.75e-06 [a_1]: 0.00020007 [recompute_prepare]: 7.35e-06 [updatestate_depend_eliminate]: 6.22962e-06 [updatestate_assign_eliminate]: 4.66965e-06 [updatestate_loads_eliminate]: 5.67967e-06 [parameter_eliminate]: 1.23028e-06 [a_2]: 0.0001041 [accelerated_algorithm]: 8.42987e-06 [shard]: 1.39e-06 [meta_shard_fg_expand]: 2.69013e-06 [shard_inline]: 7.53999e-06 [auto_parallel]: 1.09901e-05 [parallel]: 3.81004e-06 [flash_sp]: 3.36999e-06 [merge_comm]: 6.31018e-06 [allreduce_fusion]: 5.11995e-06 [matmul_add_comm_reduction]: 7.89016e-06 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 8.63988e-06 [virtual_dataset]: 7.60984e-06 [get_grad_eliminate_]: 7.40029e-06 [virtual_output]: 6.95977e-06 [merge_forward]: 4.65009e-06 [cell_reuse_recompute_pass]: 1.8701e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55699e-05 [before_grad]: 1.228e-05 [inplace_validation]: 4.34974e-06 [meta_fg_expand]: 4.99003e-06 [inplace_validation_after_expand]: 5.30016e-06 [flash_sp_send_recv_attached]: 1.02026e-06 [receive_attached]: 7.29691e-07 [after_resolve]: 1.00001e-05 [a_after_grad]: 1.16699e-05 [special_op_eliminate]: 7.45989e-06 [renormalize]: 7.03149e-08 [add_forward_monad_depend]: 7.69738e-07 [auto_monad_grad]: 1.23028e-06 [auto_monad_eliminator]: 1.767e-05 [cse]: 1.83801e-05 [a_3]: 4.867e-05 [py_interpret_to_execute_after_opt_a]: 9.72999e-06 [slice_cell_reuse_recomputed_activation]: 2.35019e-06 [rewriter_after_opt_a]: 0.00017294 [convert_after_rewriter]: 9.98005e-06 [order_py_execute_after_rewriter]: 5.93998e-06 [opt_b]: 0.00024347, [1] [Cycle 1]: 0.00023743, [7] [b_1]: 0.00016205 [b_2]: 9.83989e-06 [updatestate_depend_eliminate]: 5.4203e-06 [updatestate_assign_eliminate]: 4.39025e-06 [updatestate_loads_eliminate]: 5.29969e-06 [renormalize]: 2.90107e-07 [cse]: 1.87904e-05 [optimize_parallel_all_gather_comm]: 8.42009e-06 [overlap_param_gather]: 1.51014e-06 [cconv]: 2.23704e-05 [loop_unroll]: 0.00049421 [opt_after_cconv]: 0.00013226, [1] [Cycle 1]: 0.00012593, [7] [c_1]: 5.107e-05 [parameter_eliminate]: 2.47965e-06 [updatestate_depend_eliminate]: 8.12998e-06 [updatestate_assign_eliminate]: 4.57978e-06 [updatestate_loads_eliminate]: 5.37001e-06 [cse]: 2.15401e-05 [renormalize]: 3.89758e-07 [remove_dup_value]: 1.20699e-05 [tuple_transform]: 6.88899e-05, [1] [Cycle 1]: 6.44098e-05, [2] [d_1]: 5.49001e-05 [renormalize]: 1.99769e-07 [partial_unused_args_eliminate]: 1.99024e-06 [add_cache_embedding]: 1.34399e-05 [add_recomputation]: 6.078e-05 [cse_after_recomputation]: 2.47201e-05, [1] [Cycle 1]: 2.00402e-05, [1] [cse]: 1.52797e-05 [environ_conv]: 7.20983e-06 [swap_dp_allreduce_reducescatter]: 7.35e-06 [bias_add_comm_swap]: 2.61003e-06 [label_micro_interleaved_index]: 2.25985e-06 [label_fine_grained_interleaved_index]: 1.74996e-06 [merge_cast_opt]: 1.22003e-06 [slice_recompute_activation]: 1.96975e-06 [micro_interleaved_order_control]: 1.87987e-06 [assign_add_opt]: 7.30995e-06 [ForceFp32Comm]: 7.79983e-07 [remove_cast_before_assign_add]: 1.05007e-06 [full_micro_interleaved_order_control]: 2.23005e-06 [reorder_send_recv_between_fp_bp]: 1.86963e-06 [comm_op_add_attrs]: 9.00123e-07 [add_comm_op_reuse_tag]: 8.49832e-07 [interleave_split_concat_branches]: 1.21025e-06 [interleave_parallel_branches]: 6.79865e-07 [overlap_opt_shard_in_pipeline]: 1.30991e-06 [overlap_opt_shard_grad_in_pipeline]: 2.07033e-06 [control_data_broadcast_order]: 8.79634e-07 [grouped_pairwise_exchange_alltoall]: 1.3602e-06 [offloading_packed_experts]: 1.04029e-06 [overlap_recompute_and_grad_model_parallel]: 2.21981e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.79983e-07 [overlap_recompute_allgather_and_fa_grad]: 6.10016e-07 [overlap_grad_ring_attention]: 1.88034e-06 [overlap_grad_flash_sp]: 1.41901e-05 [begin_end_overlap_inline]: 1.01002e-06 [split_matmul_comm_elemetwise]: 2.42004e-06 [split_layernorm_comm]: 1.53016e-06 [handle_group_info]: 8.2003e-07 [symbol_engine_optimizer]: 9.01604e-05, [1] [Cycle 1]: 8.55001e-05, [6] [build]: 3.84031e-06 [elim_shapecalc]: 1.156e-05 [elim_not_effective]: 1.58297e-05 [opt_reshape]: 1.11703e-05 [fold_const_symbol]: 1.31503e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.2801e-06 [auto_monad_reorder]: 2.74102e-05 [get_jit_bprop_graph]: 4.29805e-07 [rewriter_after_jit_bprop_graph]: 3.7998e-07 [eliminate_special_op_node]: 0.00050944 [distribtued_split]: 4.05498e-05 [validate]: 3.475e-05 [task_emit]: 0.0697144 [execute]: 3.16501e-05 Sums bootstrap : 0.000317s : 0.41% type_inference : 0.002497s : 3.26% auto_monad : 0.000123s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000537s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000432s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000047s : 0.06% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000173s : 0.23% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000494s : 0.65% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000015s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000509s : 0.67% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.069714s : 91.01% execute : 0.000032s : 0.04% Time group info: ------[substitution.] 0.000128 63 5.11% : 0.000007s : 2: substitution.depend_value_elim 1.57% : 0.000002s : 5: substitution.elim_not_effective 1.78% : 0.000002s : 5: substitution.fold_const_symbol 5.44% : 0.000007s : 6: substitution.graph_param_transform 50.02% : 0.000064s : 1: substitution.inline 4.38% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.15% : 0.000004s : 6: substitution.load_eliminater 2.50% : 0.000003s : 2: substitution.reduce_all_const_elim 6.35% : 0.000008s : 10: substitution.remove_not_recompute_node 2.62% : 0.000003s : 2: substitution.replace_old_param 8.88% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.20% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002467 2 89.21% : 0.002201s : 1: type_inference.infer 10.79% : 0.000266s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000227 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.15% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.71% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.16% : 0.000005s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.41% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.24% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 1.01% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 1.97% : 0.000004s : 31: predicate.environ_get_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.43% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.56% : 0.000013s : 63: predicate.inline 1.00% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000002s : 12: predicate.less_batch_normalization 1.67% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.28% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.83% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.84% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.72% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.51% : 0.000001s : 6: predicate.opt_reshape 0.59% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000002s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.46% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.11% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.92% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.58% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.54% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.74% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.87% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.88% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.59% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.52% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.60% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000150 4 10.61% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.39% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089358 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000136s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000341s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.00004 TotalTime = 0.0809484, [21] [bootstrap]: 0.00031494 [type_inference]: 0.00252569 [auto_monad]: 0.00011963 [graph_reusing]: 2.19978e-06 [inline]: 1.23028e-06 [parallel-infer-symbol]: 1.82027e-06 [pre_auto_parallel]: 2.462e-05 [insert-virtual-dataset]: 2.85963e-06 [parallel-infer-symbol-second]: 4.89876e-07 [dataset_repeat_opt]: 1.23028e-06 [pipeline_split]: 1.17999e-06 [optimize]: 0.00708004, [52] [py_interpret_to_execute]: 1.49999e-05 [rewriter_before_opt_a]: 3.25697e-05 [opt_a]: 0.00533184, [2] [Cycle 1]: 0.00151641, [43] [expand_dump_flag]: 2.90014e-06 [switch_simplify]: 2.89599e-05 [loop_unroll]: 1.38301e-05 [a_1]: 0.00033713 [recompute_prepare]: 9.24012e-06 [updatestate_depend_eliminate]: 8.31019e-06 [updatestate_assign_eliminate]: 5.79981e-06 [updatestate_loads_eliminate]: 7.36024e-06 [parameter_eliminate]: 2.99979e-06 [a_2]: 0.00011602 [accelerated_algorithm]: 8.57981e-06 [shard]: 2.03028e-06 [meta_shard_fg_expand]: 3.49013e-06 [shard_inline]: 8.17003e-06 [auto_parallel]: 1.24802e-05 [parallel]: 6.90995e-06 [flash_sp]: 9.41008e-06 [merge_comm]: 8.44989e-06 [allreduce_fusion]: 5.70016e-06 [matmul_add_comm_reduction]: 9.85013e-06 [allreduce_slice_to_reducescatter]: 4.20026e-07 [virtual_shard_identity]: 9.58983e-06 [virtual_dataset]: 8.40006e-06 [get_grad_eliminate_]: 7.60006e-06 [virtual_output]: 7.76025e-06 [merge_forward]: 5.62984e-06 [cell_reuse_recompute_pass]: 1.91992e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.66199e-05 [before_grad]: 1.36299e-05 [inplace_validation]: 5.0799e-06 [meta_fg_expand]: 5.22006e-06 [inplace_validation_after_expand]: 6.19004e-06 [flash_sp_send_recv_attached]: 4.95976e-06 [receive_attached]: 2.00979e-06 [after_resolve]: 1.13603e-05 [a_after_grad]: 1.27e-05 [special_op_eliminate]: 7.71973e-06 [renormalize]: 0.00042911 [add_forward_monad_depend]: 3.21027e-06 [auto_monad_grad]: 1.93994e-06 [auto_monad_eliminator]: 3.18303e-05 [cse]: 3.037e-05 [a_3]: 5.75199e-05 [Cycle 2]: 0.00078632, [43] [expand_dump_flag]: 1.22003e-06 [switch_simplify]: 9.37004e-06 [loop_unroll]: 7.70018e-06 [a_1]: 0.00020046 [recompute_prepare]: 7.18003e-06 [updatestate_depend_eliminate]: 6.00982e-06 [updatestate_assign_eliminate]: 4.92996e-06 [updatestate_loads_eliminate]: 5.15999e-06 [parameter_eliminate]: 1.11014e-06 [a_2]: 0.00010395 [accelerated_algorithm]: 7.89994e-06 [shard]: 1.21957e-06 [meta_shard_fg_expand]: 2.54996e-06 [shard_inline]: 7.8599e-06 [auto_parallel]: 1.11004e-05 [parallel]: 3.56976e-06 [flash_sp]: 3.58978e-06 [merge_comm]: 5.9302e-06 [allreduce_fusion]: 5.06965e-06 [matmul_add_comm_reduction]: 7.83987e-06 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 8.50996e-06 [virtual_dataset]: 7.62008e-06 [get_grad_eliminate_]: 7.60006e-06 [virtual_output]: 7.03987e-06 [merge_forward]: 4.5402e-06 [cell_reuse_recompute_pass]: 1.96975e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59e-05 [before_grad]: 1.268e-05 [inplace_validation]: 4.12017e-06 [meta_fg_expand]: 5.0799e-06 [inplace_validation_after_expand]: 5.1097e-06 [flash_sp_send_recv_attached]: 8.39587e-07 [receive_attached]: 6.70087e-07 [after_resolve]: 9.97027e-06 9s : 1: distribtued_split 0.59% : 0.000523s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.05% : 0.000041s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000503s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001090s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000035s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.90% : 0.005275s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.28% : 0.000246s : 1: opt_b 7.83% : 0.006998s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.26% : 0.000234s : 1: renormalize.infer 0.22% : 0.000192s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.20% : 0.000179s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 78.05% : 0.069740s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.81% : 0.002515s : 1: type_inference 0.08% : 0.000071s : 1: validate [a_after_grad]: 1.18902e-05 [special_op_eliminate]: 7.50972e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 9.29926e-07 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 1.79098e-05 [cse]: 1.94996e-05 [a_3]: 4.79501e-05 [py_interpret_to_execute_after_opt_a]: 9.35979e-06 [slice_cell_reuse_recomputed_activation]: 2.70968e-06 [rewriter_after_opt_a]: 0.00019218 [convert_after_rewriter]: 1.10897e-05 [order_py_execute_after_rewriter]: 5.68992e-06 [opt_b]: 0.00024586, [1] [Cycle 1]: 0.00023932, [7] [b_1]: 0.00016209 [b_2]: 9.89018e-06 [updatestate_depend_eliminate]: 5.60982e-06 [updatestate_assign_eliminate]: 4.36977e-06 [updatestate_loads_eliminate]: 5.21028e-06 [renormalize]: 2.80328e-07 [cse]: 1.97301e-05 [optimize_parallel_all_gather_comm]: 8.29995e-06 [overlap_param_gather]: 1.28988e-06 [cconv]: 2.14302e-05 [loop_unroll]: 0.00049483 [opt_after_cconv]: 0.00013582, [1] [Cycle 1]: 0.00012916, [7] [c_1]: 5.26402e-05 [parameter_eliminate]: 2.6999e-06 [updatestate_depend_eliminate]: 8.17003e-06 [updatestate_assign_eliminate]: 4.61983e-06 [updatestate_loads_eliminate]: 5.20004e-06 [cse]: 2.15801e-05 [renormalize]: 3.39933e-07 [remove_dup_value]: 1.41999e-05 [tuple_transform]: 7.00001e-05, [1] [Cycle 1]: 6.497e-05, [2] [d_1]: 5.47804e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.84029e-06 [add_cache_embedding]: 1.41598e-05 [add_recomputation]: 6.12698e-05 [cse_after_recomputation]: 2.79201e-05, [1] [Cycle 1]: 2.266e-05, [1] [cse]: 1.74297e-05 [environ_conv]: 6.65989e-06 [swap_dp_allreduce_reducescatter]: 7.87014e-06 [bias_add_comm_swap]: 2.59979e-06 [label_micro_interleaved_index]: 1.89012e-06 [label_fine_grained_interleaved_index]: 2.39024e-06 [merge_cast_opt]: 1.23028e-06 [slice_recompute_activation]: 1.86963e-06 [micro_interleaved_order_control]: 2.07033e-06 [assign_add_opt]: 7.22008e-06 [ForceFp32Comm]: 8.69855e-07 [remove_cast_before_assign_add]: 4.99655e-07 [full_micro_interleaved_order_control]: 1.54972e-06 [reorder_send_recv_between_fp_bp]: 2.4396e-06 [comm_op_add_attrs]: 1.07009e-06 [add_comm_op_reuse_tag]: 1.08965e-06 [interleave_split_concat_branches]: 8.70321e-07 [interleave_parallel_branches]: 6.59842e-07 [overlap_opt_shard_in_pipeline]: 1.39978e-06 [overlap_opt_shard_grad_in_pipeline]: 2.2403e-06 [control_data_broadcast_order]: 1.21025e-06 [grouped_pairwise_exchange_alltoall]: 1.13016e-06 [offloading_packed_experts]: 8.40053e-07 [overlap_recompute_and_grad_model_parallel]: 1.97021e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.801e-07 [overlap_recompute_allgather_and_fa_grad]: 9.80217e-07 [overlap_grad_ring_attention]: 2.08989e-06 [overlap_grad_flash_sp]: 1.40299e-05 [begin_end_overlap_inline]: 8.39587e-07 [split_matmul_comm_elemetwise]: 2.13971e-06 [split_layernorm_comm]: 1.58977e-06 [handle_group_info]: 9.99775e-07 [symbol_engine_optimizer]: 8.42102e-05, [1] [Cycle 1]: 7.89701e-05, [6] [build]: 4.23007e-06 [elim_shapecalc]: 1.16299e-05 [elim_not_effective]: 1.53203e-05 [opt_reshape]: 8.3698e-06 [fold_const_symbol]: 1.331e-05 [renormalize]: 2.20258e-07 [pipeline_parallel_scheduler]: 1.66986e-06 [auto_monad_reorder]: 2.944e-05 [get_jit_bprop_graph]: 5.40167e-07 [rewriter_after_jit_bprop_graph]: 4.50294e-07 [eliminate_special_op_node]: 0.00051942 [distribtued_split]: 4.11398e-05 [validate]: 3.72096e-05 [task_emit]: 0.0699875 [execute]: 1.00201e-05 Sums bootstrap : 0.000315s : 0.41% type_inference : 0.002526s : 3.28% auto_monad : 0.000120s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000538s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000429s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000050s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000192s : 0.25% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000021s : 0.03% optimize.loop_unroll : 0.000495s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000519s : 0.68% distribtued_split : 0.000041s : 0.05% validate : 0.000037s : 0.05% task_emit : 0.069987s : 91.01% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000128 63 4.82% : 0.000006s : 2: substitution.depend_value_elim 1.81% : 0.000002s : 5: substitution.elim_not_effective 1.77% : 0.000002s : 5: substitution.fold_const_symbol 5.26% : 0.000007s : 6: substitution.graph_param_transform 49.91% : 0.000064s : 1: substitution.inline 4.15% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.38% : 0.000004s : 6: substitution.load_eliminater 2.88% : 0.000004s : 2: substitution.reduce_all_const_elim 5.80% : 0.000007s : 10: substitution.remove_not_recompute_node 2.46% : 0.000003s : 2: substitution.replace_old_param 9.32% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.44% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002497 2 89.71% : 0.002240s : 1: type_inference.infer 10.29% : 0.000257s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000226 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.03% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.86% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.25% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.96% : 0.000004s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.78% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.52% : 0.000012s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.00% : 0.000002s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.47% : 0.000006s : 38: predicate.load_eliminater 1.34% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.31% : 0.000003s : 17: predicate.loop_unroll_before_grad 2.01% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.72% : 0.000002s : 13: predicate.minmaximum_grad 0.70% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.84% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.21% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.30% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000002s : 12: predicate.shard_identity_eliminate 1.30% : 0.000003s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 1.11% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.36% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.34% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.76% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.62% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.48% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.81% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.52% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.60% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000140 4 10.39% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.61% : 0.000126s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089826 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000134s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000338s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000050s : 1: distribtued_split 0.59% : 0.000534s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000505s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001090s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 5.94% : 0.005336s : 1: opt_a 0.16% : 0.000140s : 1: opt_after_cconv 0.28% : 0.000249s : 1: opt_b 7.89% : 0.007088s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.26% : 0.000233s : 1: renormalize.infer 0.21% : 0.000191s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.22% : 0.000198s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000088s : 1: symbol_engine_optimizer 77.95% : 0.070016s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.83% : 0.002544s : 1: type_inference 0.08% : 0.000072s : 1: validate TotalTime = 0.082825, [21] [bootstrap]: 0.00033364 [type_inference]: 0.00287051 [auto_monad]: 0.00014037 [graph_reusing]: 2.6999e-06 [inline]: 1.50967e-06 [parallel-infer-symbol]: 2.47033e-06 [pre_auto_parallel]: 2.92002e-05 [insert-virtual-dataset]: 2.78978e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 1.24006e-06 [pipeline_split]: 1.70013e-06 [optimize]: 0.00777747, [52] [py_interpret_to_execute]: 1.727e-05 [rewriter_before_opt_a]: 4.18299e-05 [opt_a]: 0.00591678, [2] [Cycle 1]: 0.00178813, [43] [expand_dump_flag]: 3.70014e-06 [switch_simplify]: 3.52897e-05 [loop_unroll]: 1.67503e-05 [a_1]: 0.00040226 [recompute_prepare]: 1.08699e-05 [updatestate_depend_eliminate]: 9.19029e-06 [updatestate_assign_eliminate]: 6.68969e-06 [updatestate_loads_eliminate]: 7.94977e-06 [parameter_eliminate]: 3.74997e-06 [a_2]: 0.00014391 [accelerated_algorithm]: 1.04099e-05 [shard]: 1.98977e-06 [meta_shard_fg_expand]: 4.23007e-06 [shard_inline]: 1.02101e-05 [auto_parallel]: 1.27698e-05 [parallel]: 7.81007e-06 [flash_sp]: 1.23e-05 [merge_comm]: 9.01008e-06 [allreduce_fusion]: 7.18981e-06 [matmul_add_comm_reduction]: 1.65598e-05 [allreduce_slice_to_reducescatter]: 4.79631e-07 [virtual_shard_identity]: 1.13901e-05 [virtual_dataset]: 9.72999e-06 [get_grad_eliminate_]: 9.14e-06 [virtual_output]: 9.0003e-06 [merge_forward]: 6.92997e-06 [cell_reuse_recompute_pass]: 2.08011e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.203e-05 [before_grad]: 1.74101e-05 [inplace_validation]: 5.83986e-06 [meta_fg_expand]: 6.87968e-06 [inplace_validation_after_expand]: 7.62986e-06 [flash_sp_send_recv_attached]: 5.64009e-06 [receive_attached]: 2.6999e-06 [after_resolve]: 1.415e-05 [a_after_grad]: 1.56998e-05 [special_op_eliminate]: 1.01398e-05 [renormalize]: 0.00048075 [add_forward_monad_depend]: 3.93018e-06 [auto_monad_grad]: 2.14996e-06 [auto_monad_eliminator]: 6.38799e-05 [cse]: 3.77502e-05 [a_3]: 6.84597e-05 [Cycle 2]: 0.00092139, [43] [expand_dump_flag]: 1.14972e-06 [switch_simplify]: 1.14902e-05 [loop_unroll]: 9.81987e-06 [a_1]: 0.00025065 [recompute_prepare]: 9.22987e-06 [updatestate_depend_eliminate]: 6.35022e-06 [updatestate_assign_eliminate]: 5.26989e-06 [updatestate_loads_eliminate]: 5.83008e-06 [parameter_eliminate]: 1.61026e-06 [a_2]: 0.00012778 [accelerated_algorithm]: 1.00499e-05 [shard]: 1.18976e-06 [meta_shard_fg_expand]: 2.96021e-06 [shard_inline]: 9.93023e-06 [auto_parallel]: 1.211e-05 [parallel]: 3.89013e-06 [flash_sp]: 3.45008e-06 [merge_comm]: 7.01984e-06 [allreduce_fusion]: 5.85988e-06 [matmul_add_comm_reduction]: 8.3698e-06 [allreduce_slice_to_reducescatter]: 3.19909e-07 [virtual_shard_identity]: 1.04802e-05 [virtual_dataset]: 9.16002e-06 [get_grad_eliminate_]: 8.95979e-06 [virtual_output]: 8.78004e-06 [merge_forward]: 5.09014e-06 [cell_reuse_recompute_pass]: 2.12993e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.01198e-05 [before_grad]: 1.58302e-05 [inplace_validation]: 4.54998e-06 [meta_fg_expand]: 5.58002e-06 [inplace_validation_after_expand]: 6.19981e-06 [flash_sp_send_recv_attached]: 1.01002e-06 [receive_attached]: 7.60425e-07 [after_resolve]: 1.20299e-05 [a_after_grad]: 1.44397e-05 [special_op_eliminate]: 9.06037e-06 [renormalize]: 1.10362e-07 [add_forward_monad_depend]: 9.80217e-07 [auto_monad_grad]: 1.36998e-06 [auto_monad_eliminator]: 1.96798e-05 [cse]: 2.13399e-05 [a_3]: 5.985e-05 [py_interpret_to_execute_after_opt_a]: 9.70997e-06 [slice_cell_reuse_recomputed_activation]: 2.21981e-06 [rewriter_after_opt_a]: 0.00014567 [convert_after_rewriter]: 1.03302e-05 [order_py_execute_after_rewriter]: 6.78981e-06 [opt_b]: 0.00028609, [1] [Cycle 1]: 0.00028021, [7] [b_1]: 0.00019724 [b_2]: 1.19898e-05 [updatestate_depend_eliminate]: 5.96978e-06 [updatestate_assign_eliminate]: 4.69014e-06 [updatestate_loads_eliminate]: 5.93998e-06 [renormalize]: 2.90107e-07 [cse]: 2.043e-05 [optimize_parallel_all_gather_comm]: 8.74e-06 [overlap_param_gather]: 1.34017e-06 [cconv]: 2.57203e-05 [loop_unroll]: 0.00050352 [opt_after_cconv]: 0.00014997, [1] [Cycle 1]: 0.00014349, [7] [c_1]: 6.23199e-05 [parameter_eliminate]: 2.92016e-06 [updatestate_depend_eliminate]: 8.42009e-06 [updatestate_assign_eliminate]: 5.19026e-06 [updatestate_loads_eliminate]: 6.79027e-06 [cse]: 2.29799e-05 [renormalize]: 4.29805e-07 [remove_dup_value]: 1.66302e-05 [tuple_transform]: 8.85101e-05, [1] [Cycle 1]: 8.365e-05, [2] [d_1]: 7.33603e-05 [renormalize]: 1.80211e-07 [partial_unused_args_eliminate]: 2.12016e-06 [add_cache_embedding]: 1.53198e-05 [add_recomputation]: 7.22399e-05 [cse_after_recomputation]: 2.93502e-05, [1] [Cycle 1]: 2.41697e-05, [1] [cse]: 1.85301e-05 [environ_conv]: 9.23965e-06 [swap_dp_allreduce_reducescatter]: 9.22009e-06 [bias_add_comm_swap]: 2.94996e-06 [label_micro_interleaved_index]: 2.90992e-06 [label_fine_grained_interleaved_index]: 2.06986e-06 [merge_cast_opt]: 1.67964e-06 [slice_recompute_activation]: 1.76998e-06 [micro_interleaved_order_control]: 2.08989e-06 [assign_add_opt]: 8.2599e-06 [ForceFp32Comm]: 9.09902e-07 [remove_cast_before_assign_add]: 1.09011e-06 [full_micro_interleaved_order_control]: 2.40002e-06 [reorder_send_recv_between_fp_bp]: 2.27988e-06 [comm_op_add_attrs]: 1.10967e-06 [add_comm_op_reuse_tag]: 1.21025e-06 [interleave_split_concat_branches]: 9.29926e-07 [interleave_parallel_branches]: 1.19023e-06 [overlap_opt_shard_in_pipeline]: 1.46031e-06 [overlap_opt_shard_grad_in_pipeline]: 2.39024e-06 [control_data_broadcast_order]: 1.32993e-06 [grouped_pairwise_exchange_alltoall]: 1.41002e-06 [offloading_packed_experts]: 1.2801e-06 [overlap_recompute_and_grad_model_parallel]: 2.10013e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.90344e-07 [overlap_recompute_allgather_and_fa_grad]: 1.2801e-06 [overlap_grad_ring_attention]: 1.97021e-06 [overlap_grad_flash_sp]: 1.767e-05 [begin_end_overlap_inline]: 8.69855e-07 [split_matmul_comm_elemetwise]: 2.16998e-06 [split_layernorm_comm]: 1.88965e-06 [handle_group_info]: 9.80217e-07 [symbol_engine_optimizer]: 9.972e-05, [1] [Cycle 1]: 9.49102e-05, [6] [build]: 4.75999e-06 [elim_shapecalc]: 1.434e-05 [elim_not_effective]: 2.01804e-05 [opt_reshape]: 1.06702e-05 [fold_const_symbol]: 1.72001e-05 [renormalize]: 3.1013e-07 [pipeline_parallel_scheduler]: 1.51992e-06 [auto_monad_reorder]: 3.17302e-05 [get_jit_bprop_graph]: 4.50294e-07 [rewriter_after_jit_bprop_graph]: 4.49829e-07 [eliminate_special_op_node]: 0.00052213 [distribtued_split]: 4.688e-05 [validate]: 3.84999e-05 [task_emit]: 0.0707424 [execute]: 1.23298e-05 Sums bootstrap : 0.000334s : 0.42% type_inference : 0.002871s : 3.65% auto_monad : 0.000140s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000029s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000042s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000047s : 0.06% optimize.opt_a.loop_unroll : 0.000027s : 0.03% optimize.opt_a.a_1 : 0.000653s : 0.83% optimize.opt_a.recompute_prepare : 0.000020s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000272s : 0.35% optimize.opt_a.accelerated_algorithm : 0.000020s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000020s : 0.03% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000013s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000025s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000019s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000042s : 0.05% optimize.opt_a.before_grad : 0.000033s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.04% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000481s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000084s : 0.11% optimize.opt_a.cse : 0.000059s : 0.08% optimize.opt_a.a_3 : 0.000128s : 0.16% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000197s : 0.25% optimize.opt_b.b_2 : 0.000012s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000026s : 0.03% optimize.loop_unroll : 0.000504s : 0.64% optimize.opt_after_cconv.c_1 : 0.000062s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000017s : 0.02% optimize.tuple_transform.d_1 : 0.000073s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000072s : 0.09% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000009s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000003s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000522s : 0.66% distribtued_split : 0.000047s : 0.06% validate : 0.000038s : 0.05% task_emit : 0.070742s : 90.07% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000157 63 4.89% : 0.000008s : 2: substitution.depend_value_elim 2.25% : 0.000004s : 5: substitution.elim_not_effective 2.18% : 0.000003s : 5: substitution.fold_const_symbol 8.59% : 0.000013s : 6: substitution.graph_param_transform 45.50% : 0.000071s : 1: substitution.inline 4.49% : 0.000007s : 10: substitution.j_node_and_user_rematch 3.38% : 0.000005s : 6: substitution.load_eliminater 2.75% : 0.000004s : 2: substitution.reduce_all_const_elim 7.16% : 0.000011s : 10: substitution.remove_not_recompute_node 2.59% : 0.000004s : 2: substitution.replace_old_param 8.36% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 7.85% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002837 2 89.15% : 0.002529s : 1: type_inference.infer 10.85% : 0.000308s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000070 1 100.00% : 0.000070s : 1: match.inline ------[predicate.] 0.000271 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.10% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.24% : 0.000006s : 25: predicate.arithmetic_simplify 0.77% : 0.000002s : 13: predicate.cast_eliminate 0.89% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.42% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.78% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.87% : 0.000005s : 31: predicate.environ_get_eliminate 1.05% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.36% : 0.000004s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 6.01% : 0.000016s : 63: predicate.inline 1.14% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.03% : 0.000003s : 12: predicate.less_batch_normalization 1.80% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.35% : 0.000006s : 38: predicate.load_eliminater 1.29% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.82% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.75% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.51% : 0.000001s : 6: predicate.parallel_virtual_node 1.07% : 0.000003s : 14: predicate.partial_defer_inline 1.34% : 0.000004s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.03% : 0.000003s : 13: predicate.reduce_eliminate 0.66% : 0.000002s : 12: predicate.remove_not_recompute_node 1.17% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000003s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000003s : 12: predicate.shard_identity_eliminate 1.48% : 0.000004s : 18: predicate.special_op_eliminate 1.04% : 0.000003s : 12: predicate.specialize_transform 1.05% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.21% : 0.000006s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.72% : 0.000005s : 26: predicate.switch_layer_defer_inline 4.21% : 0.000011s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.74% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.70% : 0.000005s : 25: predicate.tuple_list_get_set_item_eliminator 2.67% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.38% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.41% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.90% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.46% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000173 4 10.44% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.56% : 0.000155s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092761 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000077s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.17% : 0.000154s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000361s : 1: bootstrap 0.03% : 0.000030s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.04% : 0.000033s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000055s : 1: distribtued_split 0.58% : 0.000536s : 1: eliminate_special_op_node 0.01% : 0.000013s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.55% : 0.000514s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000018s : 1: opt.transform.loop_unroll_optimizer 1.44% : 0.001331s : 80: opt.transform.opt_a 0.07% : 0.000060s : 1: opt.transform.opt_after_cconv 0.20% : 0.000185s : 27: opt.transform.opt_b 0.08% : 0.000071s : 1: opt.transform.opt_trans_graph 0.04% : 0.000037s : 3: opt.transform.special_op_eliminate 0.06% : 0.000058s : 4: opt.transform.symbol_engine_opt 6.38% : 0.005921s : 1: opt_a 0.17% : 0.000154s : 1: opt_after_cconv 0.31% : 0.000289s : 1: opt_b 8.39% : 0.007786s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000036s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000021s : 1: remove_dup_value 0.27% : 0.000253s : 1: renormalize.infer 0.24% : 0.000221s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000151s : 1: rewriter_after_opt_a 0.05% : 0.000046s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000013s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000103s : 1: symbol_engine_optimizer 76.30% : 0.070775s : 1: task_emit 0.10% : 0.000092s : 1: tuple_transform 3.12% : 0.002890s : 1: type_inference 0.08% : 0.000076s : 1: validate TotalTime = 0.0775608, [21] [bootstrap]: 0.00027744 [type_inference]: 0.00216078 [auto_monad]: 9.74e-05 [graph_reusing]: 1.77976e-06 [inline]: 1.16974e-06 [parallel-infer-symbol]: 1.20001e-06 [pre_auto_parallel]: 2.089e-05 [insert-virtual-dataset]: 1.79e-06 [parallel-infer-symbol-second]: 5.09899e-07 [dataset_repeat_opt]: 7.20378e-07 [pipeline_split]: 1.0496e-06 [optimize]: 0.00669393, [52] [py_interpret_to_execute]: 1.32299e-05 [rewriter_before_opt_a]: 3.003e-05 [opt_a]: 0.00512024, [2] [Cycle 1]: 0.00147622, [43] [expand_dump_flag]: 2.87965e-06 [switch_simplify]: 2.45296e-05 [loop_unroll]: 1.308e-05 [a_1]: 0.00031905 [recompute_prepare]: 8.96025e-06 [updatestate_depend_eliminate]: 7.48038e-06 [updatestate_assign_eliminate]: 5.34998e-06 [updatestate_loads_eliminate]: 6.73998e-06 [parameter_eliminate]: 2.40002e-06 [a_2]: 0.00011539 [accelerated_algorithm]: 8.80985e-06 [shard]: 1.85985e-06 [meta_shard_fg_expand]: 3.2899e-06 [shard_inline]: 8.6599e-06 [auto_parallel]: 1.131e-05 [parallel]: 6.4997e-06 [flash_sp]: 7.20005e-06 [merge_comm]: 6.61006e-06 [allreduce_fusion]: 5.05988e-06 [matmul_add_comm_reduction]: 9.39006e-06 [allreduce_slice_to_reducescatter]: 3.29688e-07 [virtual_shard_identity]: 9.77004e-06 [virtual_dataset]: 8.17003e-06 [get_grad_eliminate_]: 7.67969e-06 [virtual_output]: 7.58981e-06 [merge_forward]: 5.17024e-06 [cell_reuse_recompute_pass]: 1.57999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.628e-05 [before_grad]: 1.39601e-05 [inplace_validation]: 4.91971e-06 [meta_fg_expand]: 5.13997e-06 [inplace_validation_after_expand]: 5.24009e-06 [flash_sp_send_recv_attached]: 3.36999e-06 [receive_attached]: 2.10991e-06 [after_resolve]: 1.10301e-05 [a_after_grad]: 1.24001e-05 [special_op_eliminate]: 7.8599e-06 [renormalize]: 0.00042227 [add_forward_monad_depend]: 2.50991e-06 [auto_monad_grad]: 1.79e-06 [auto_monad_eliminator]: 2.45599e-05 [cse]: 2.55401e-05 [a_3]: 5.659e-05 [Cycle 2]: 0.00076306, [43] [expand_dump_flag]: 9.30391e-07 [switch_simplify]: 8.99006e-06 [loop_unroll]: 7.95955e-06 [a_1]: 0.00019983 [recompute_prepare]: 7.52974e-06 [updatestate_depend_eliminate]: 5.72018e-06 [updatestate_assign_eliminate]: 4.72972e-06 [updatestate_loads_eliminate]: 5.3402e-06 [parameter_eliminate]: 1.11992e-06 [a_2]: 0.00010397 [accelerated_algorithm]: 8.17981e-06 [shard]: 1.11992e-06 [meta_shard_fg_expand]: 2.35997e-06 [shard_inline]: 7.70995e-06 [auto_parallel]: 1.11503e-05 [parallel]: 3.18e-06 [flash_sp]: 2.63005e-06 [merge_comm]: 5.77001e-06 [allreduce_fusion]: 4.97e-06 [matmul_add_comm_reduction]: 7.20005e-06 [allreduce_slice_to_reducescatter]: 2.39816e-07 [virtual_shard_identity]: 8.7698e-06 [virtual_dataset]: 7.72998e-06 [get_grad_eliminate_]: 7.35978e-06 [virtual_output]: 6.99982e-06 [merge_forward]: 4.40003e-06 [cell_reuse_recompute_pass]: 1.71969e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49598e-05 [before_grad]: 1.18599e-05 [inplace_validation]: 3.98001e-06 [meta_fg_expand]: 4.62029e-06 [inplace_validation_after_expand]: 4.88013e-06 [flash_sp_send_recv_attached]: 7.59959e-07 [receive_attached]: 6.39819e-07 [after_resolve]: 9.44035e-06 [a_after_grad]: 1.16699e-05 [special_op_eliminate]: 7.39982e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 5.79748e-07 [auto_monad_grad]: 1.06031e-06 [auto_monad_eliminator]: 1.54898e-05 [cse]: 1.84299e-05 [a_3]: 4.76502e-05 [py_interpret_to_execute_after_opt_a]: 8.06013e-06 [slice_cell_reuse_recomputed_activation]: 1.80956e-06 [rewriter_after_opt_a]: 0.00013172 [convert_after_rewriter]: 8.06013e-06 [order_py_execute_after_rewriter]: 5.60982e-06 [opt_b]: 0.00023743, [1] [Cycle 1]: 0.00023249, [7] [b_1]: 0.00015844 [b_2]: 9.79984e-06 [updatestate_depend_eliminate]: 4.97e-06 [updatestate_assign_eliminate]: 4.40981e-06 [updatestate_loads_eliminate]: 5.11995e-06 [renormalize]: 2.70084e-07 [cse]: 1.80197e-05 [optimize_parallel_all_gather_comm]: 7.56001e-06 [overlap_param_gather]: 9.4017e-07 [cconv]: 1.567e-05 [loop_unroll]: 0.00047265 [opt_after_cconv]: 0.00012579, [1] [Cycle 1]: 0.00012025, [7] [c_1]: 4.99198e-05 [parameter_eliminate]: 1.85007e-06 [updatestate_depend_eliminate]: 6.84988e-06 [updatestate_assign_eliminate]: 4.5402e-06 [updatestate_loads_eliminate]: 4.78001e-06 [cse]: 2.01701e-05 [renormalize]: 3.09665e-07 [remove_dup_value]: 9.83011e-06 [tuple_transform]: 6.56098e-05, [1] [Cycle 1]: 6.12801e-05, [2] [d_1]: 5.25299e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.55019e-06 [add_cache_embedding]: 1.06599e-05 [add_recomputation]: 5.39203e-05 [cse_after_recomputation]: 2.55997e-05, [1] [Cycle 1]: 2.142e-05, [1] [cse]: 1.63396e-05 [environ_conv]: 6.31018e-06 [swap_dp_allreduce_reducescatter]: 7.44034e-06 [bias_add_comm_swap]: 1.79978e-06 [label_micro_interleaved_index]: 1.48034e-06 [label_fine_grained_interleaved_index]: 1.55997e-06 [merge_cast_opt]: 7.69738e-07 [slice_recompute_activation]: 9.80217e-07 [micro_interleaved_order_control]: 1.17999e-06 [assign_add_opt]: 6.49039e-06 [ForceFp32Comm]: 5.40167e-07 [remove_cast_before_assign_add]: 5.79748e-07 [full_micro_interleaved_order_control]: 1.34995e-06 [reorder_send_recv_between_fp_bp]: 1.74996e-06 [comm_op_add_attrs]: 8.30274e-07 [add_comm_op_reuse_tag]: 5.89993e-07 [interleave_split_concat_branches]: 5.20144e-07 [interleave_parallel_branches]: 5.69969e-07 [overlap_opt_shard_in_pipeline]: 9.89996e-07 [overlap_opt_shard_grad_in_pipeline]: 1.23959e-06 [control_data_broadcast_order]: 6.70087e-07 [grouped_pairwise_exchange_alltoall]: 6.89644e-07 [offloading_packed_experts]: 6.79865e-07 [overlap_recompute_and_grad_model_parallel]: 1.05007e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.20026e-07 [overlap_recompute_allgather_and_fa_grad]: 5.89993e-07 [overlap_grad_ring_attention]: 1.17999e-06 [overlap_grad_flash_sp]: 1.13901e-05 [begin_end_overlap_inline]: 4.70318e-07 [split_matmul_comm_elemetwise]: 1.83005e-06 [split_layernorm_comm]: 1.11992e-06 [handle_group_info]: 5.89993e-07 [symbol_engine_optimizer]: 8.108e-05, [1] [Cycle 1]: 7.708e-05, [6] [build]: 3.97023e-06 [elim_shapecalc]: 1.12802e-05 [elim_not_effective]: 1.45398e-05 [opt_reshape]: 8.57981e-06 [fold_const_symbol]: 1.34399e-05 [renormalize]: 1.79745e-07 [pipeline_parallel_scheduler]: 1.03982e-06 [auto_monad_reorder]: 2.18102e-05 [get_jit_bprop_graph]: 3.69735e-07 [rewriter_after_jit_bprop_graph]: 2.99886e-07 [eliminate_special_op_node]: 0.00048288 [distribtued_split]: 3.35597e-05 [validate]: 2.94899e-05 [task_emit]: 0.0675025 [execute]: 8.88994e-06 Sums bootstrap : 0.000277s : 0.38% type_inference : 0.002161s : 2.93% auto_monad : 0.000097s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000034s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000519s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000219s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000422s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000104s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000132s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000158s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000473s : 0.64% optimize.opt_after_cconv.c_1 : 0.000050s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000483s : 0.66% distribtued_split : 0.000034s : 0.05% validate : 0.000029s : 0.04% task_emit : 0.067503s : 91.58% execute : 0.000009s : 0.01% TotalTime = 0.0776582, [21] [bootstrap]: 0.00030029 [type_inference]: 0.00235728 [auto_monad]: 0.00012296 [graph_reusing]: 2.42004e-06 [inline]: 1.4198e-06 [parallel-infer-symbol]: 2.01026e-06 [pre_auto_parallel]: 2.50801e-05 [insert-virtual-dataset]: 2.50014e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 8.59611e-07 [pipeline_split]: 1.66008e-06 [optimize]: 0.00686815, [52] [py_interpret_to_execute]: 1.51801e-05 [rewriter_before_opt_a]: 3.33502e-05 [opt_a]: 0.0052482, [2] [Cycle 1]: 0.00153474, [43] [expand_dump_flag]: 3.47011e-06 [switch_simplify]: 2.92398e-05 [loop_unroll]: 1.30301e-05 [a_1]: 0.0003645 [recompute_prepare]: 8.55979e-06 [updatestate_depend_eliminate]: 8.72975e-06 [updatestate_assign_eliminate]: 6.17979e-06 [updatestate_loads_eliminate]: 7.70018e-06 [parameter_eliminate]: 3.18e-06 [a_2]: 0.00011525 [accelerated_algorithm]: 7.89994e-06 [shard]: 2.06986e-06 [meta_shard_fg_expand]: 3.83006e-06 [shard_inline]: 7.98004e-06 [auto_parallel]: 1.23302e-05 [parallel]: 8.02008e-06 [flash_sp]: 1.09798e-05 [merge_comm]: 7.51019e-06 [allreduce_fusion]: 5.53019e-06 [matmul_add_comm_reduction]: 1.04699e-05 [allreduce_slice_to_reducescatter]: 4.4005e-07 [virtual_shard_identity]: 8.75024e-06 [virtual_dataset]: 7.75e-06 [get_grad_eliminate_]: 7.81985e-06 [virtual_output]: 7.45989e-06 [merge_forward]: 5.79003e-06 [cell_reuse_recompute_pass]: 2.04006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59303e-05 [before_grad]: 1.36001e-05 [inplace_validation]: 5.31971e-06 [meta_fg_expand]: 5.29969e-06 [inplace_validation_after_expand]: 6.31995e-06 [flash_sp_send_recv_attached]: 4.10993e-06 [receive_attached]: 2.28966e-06 [after_resolve]: 1.053e-05 [a_after_grad]: 1.192e-05 [special_op_eliminate]: 7.47992e-06 [renormalize]: 0.00046161 [add_forward_monad_depend]: 2.02982e-06 [auto_monad_grad]: 1.12969e-06 [auto_monad_eliminator]: 1.97701e-05 [cse]: 2.44402e-05 [a_3]: 5.76102e-05 [Cycle 2]: 0.00076434, [43] [expand_dump_flag]: 1.15996e-06 [switch_simplify]: 9.05991e-06 [loop_unroll]: 7.52974e-06 [a_1]: 0.00019642 [recompute_prepare]: 7.39982e-06 [updatestate_depend_eliminate]: 5.93998e-06 [updatestate_assign_eliminate]: 4.83962e-06 [updatestate_loads_eliminate]: 5.22984e-06 [parameter_eliminate]: 1.30991e-06 [a_2]: 0.00010347 [accelerated_algorithm]: 7.91997e-06 [shard]: 9.79751e-07 [meta_shard_fg_expand]: 2.40002e-06 [shard_inline]: 7.62008e-06 [auto_parallel]: 1.00499e-05 [parallel]: 3.11015e-06 [flash_sp]: 3.05986e-06 [merge_comm]: 5.74999e-06 [allreduce_fusion]: 4.75021e-06 [matmul_add_comm_reduction]: 7.66013e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 8.1202e-06 [virtual_dataset]: 7.75e-06 [get_grad_eliminate_]: 7.20983e-06 [virtual_output]: 6.82985e-06 [merge_forward]: 4.27989e-06 [cell_reuse_recompute_pass]: 1.74996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.46898e-05 [before_grad]: 1.21999e-05 [inplace_validation]: 4.0601e-06 [meta_fg_expand]: 4.71994e-06 [inplace_validation_after_expand]: 5.0501e-06 [flash_sp_send_recv_attached]: 7.19912e-07 [receive_attached]: 6.69621e-07 [after_resolve]: 1.02804e-05 [a_after_grad]: 1.20201e-05 [special_op_eliminate]: 7.47992e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.30274e-07 [auto_monad_grad]: 8.60076e-07 [auto_monad_eliminator]: 1.65398e-05 [cse]: 1.81701e-05 [a_3]: 4.747e-05 [py_interpret_to_execute_after_opt_a]: 8.35024e-06 [slice_cell_reuse_recomputed_activation]: 1.17021e-06 [rewriter_after_opt_a]: 0.00014946 [convert_after_rewriter]: 8.50018e-06 [order_py_execute_after_rewriter]: 5.74999e-06 [opt_b]: 0.00023943, [1] [Cycle 1]: 0.00023426, [7] [b_1]: 0.00015891 [b_2]: 1.00099e-05 [updatestate_depend_eliminate]: 4.78979e-06 [updatestate_assign_eliminate]: 4.14997e-06 [updatestate_loads_eliminate]: 4.48013e-06 [renormalize]: 2.90107e-07 [cse]: 1.82698e-05 [optimize_parallel_all_gather_comm]: 7.08038e-06 [overlap_param_gather]: 1.30013e-06 [cconv]: 2.00202e-05 [loop_unroll]: 0.00046533 [opt_after_cconv]: 0.00012838, [1] [Cycle 1]: 0.00012245, [7] [c_1]: 4.96702e-05 [parameter_eliminate]: 2.35997e-06 [updatestate_depend_eliminate]: 7.89994e-06 [updatestate_assign_eliminate]: 4.52017e-06 [updatestate_loads_eliminate]: 5.23031e-06 [cse]: 2.207e-05 [renormalize]: 3.40398e-07 [remove_dup_value]: 1.154e-05 [tuple_transform]: 6.66203e-05, [1] [Cycle 1]: 6.19702e-05, [2] [d_1]: 5.33699e-05 [renormalize]: 1.39698e-07 [partial_unused_args_eliminate]: 1.17999e-06 [add_cache_embedding]: 1.194e-05 [add_recomputation]: 5.96498e-05 [cse_after_recomputation]: 2.63997e-05, [1] [Cycle 1]: 2.17599e-05, [1] [cse]: 1.66702e-05 [environ_conv]: 5.8501e-06 [swap_dp_allreduce_reducescatter]: 7.03009e-06 [bias_add_comm_swap]: 1.46031e-06 [label_micro_interleaved_index]: 7.79983e-07 [label_fine_grained_interleaved_index]: 1.36998e-06 [merge_cast_opt]: 1.17021e-06 [slice_recompute_activation]: 1.51992e-06 [micro_interleaved_order_control]: 1.60979e-06 [assign_add_opt]: 7.22986e-06 [ForceFp32Comm]: 1.27032e-06 [remove_cast_before_assign_add]: 3.70201e-07 [full_micro_interleaved_order_control]: 1.68011e-06 [reorder_send_recv_between_fp_bp]: 1.91014e-06 [comm_op_add_attrs]: 8.801e-07 [add_comm_op_reuse_tag]: 1.03004e-06 [interleave_split_concat_branches]: 8.69855e-07 [interleave_parallel_branches]: 7.80448e-07 [overlap_opt_shard_in_pipeline]: 9.69972e-07 [overlap_opt_shard_grad_in_pipeline]: 1.60979e-06 [control_data_broadcast_order]: 9.69972e-07 [grouped_pairwise_exchange_alltoall]: 1.02026e-06 [offloading_packed_experts]: 1.0198e-06 [overlap_recompute_and_grad_model_parallel]: 1.55019e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.70204e-07 [overlap_recompute_allgather_and_fa_grad]: 9.29926e-07 [overlap_grad_ring_attention]: 1.33971e-06 [overlap_grad_flash_sp]: 1.40201e-05 [begin_end_overlap_inline]: 2.89641e-07 [split_matmul_comm_elemetwise]: 1.62004e-06 [split_layernorm_comm]: 1.60979e-06 [handle_group_info]: 8.49832e-07 [symbol_engine_optimizer]: 8.24202e-05, [1] [Cycle 1]: 7.76099e-05, [6] [build]: 4.12995e-06 [elim_shapecalc]: 1.18301e-05 [elim_not_effective]: 1.508e-05 [opt_reshape]: 8.27014e-06 [fold_const_symbol]: 1.28597e-05 [renormalize]: 2.59839e-07 [pipeline_parallel_scheduler]: 1.4999e-06 [auto_monad_reorder]: 2.803e-05 [get_jit_bprop_graph]: 3.89758e-07 [rewriter_after_jit_bprop_graph]: 4.30271e-07 [eliminate_special_op_node]: 0.00053283 [distribtued_split]: 3.91402e-05 [validate]: 3.36403e-05 [task_emit]: 0.067093 [execute]: 1.07e-05 Sums bootstrap : 0.000300s : 0.41% type_inference : 0.002357s : 3.20% auto_monad : 0.000123s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000561s : 0.76% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000219s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.02% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000462s : 0.63% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000036s : 0.05% optimize.opt_a.cse : 0.000043s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000149s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000159s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000004s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000020s : 0.03% optimize.loop_unroll : 0.000465s : 0.63% optimize.opt_after_cconv.c_1 : 0.000050s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000028s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000533s : 0.72% distribtued_split : 0.000039s : 0.05% validate : 0.000034s : 0.05% task_emit : 0.067093s : 90.97% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000108 63 4.67% : 0.000005s : 2: substitution.depend_value_elim 1.95% : 0.000002s : 5: substitution.elim_not_effective 1.82% : 0.000002s : 5: substitution.fold_const_symbol 5.64% : 0.000006s : 6: substitution.graph_param_transform 47.92% : 0.000052s : 1: substitution.inline 5.10% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.38% : 0.000004s : 6: substitution.load_eliminater 2.33% : 0.000003s : 2: substitution.reduce_all_const_elim 7.03% : 0.000008s : 10: substitution.remove_not_recompute_node 2.36% : 0.000003s : 2: substitution.replace_old_param 9.46% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.33% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002137 2 89.69% : 0.001916s : 1: type_inference.infer 10.31% : 0.000220s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000051 1 100.00% : 0.000051s : 1: match.inline ------[predicate.] 0.000226 1420 0.85% : 0.000002s : 13: predicate.accumulaten_eliminater 0.98% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.39% : 0.000005s : 25: predicate.arithmetic_simplify 0.89% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.50% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.35% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.24% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.21% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.89% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.68% : 0.000013s : 63: predicate.inline 1.12% : 0.000003s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.66% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.35% : 0.000005s : 38: predicate.load_eliminater 1.10% : 0.000002s : 6: predicate.loop_unroll_after_grad 1.25% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.75% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.65% : 0.000001s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.20% : 0.000003s : 14: predicate.partial_defer_inline 1.32% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.91% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.79% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.11% : 0.000003s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.03% : 0.000002s : 12: predicate.shard_identity_eliminate 1.45% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 1.07% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.17% : 0.000009s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.80% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.82% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.82% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.62% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.75% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.44% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.47% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000159 4 6.71% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.29% : 0.000148s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086013 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000059s : 1: add_recomputation 0.01% : 0.000009s : 1: assign_add_opt 0.13% : 0.000109s : 1: auto_monad 0.03% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000300s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.58% : 0.000495s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000482s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001065s : 80: opt.transform.opt_a 0.06% : 0.000048s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000051s : 1: opt.transform.opt_trans_graph 0.03% : 0.000029s : 3: opt.transform.special_op_eliminate 0.05% : 0.000044s : 4: opt.transform.symbol_engine_opt 5.96% : 0.005124s : 1: opt_a 0.15% : 0.000130s : 1: opt_after_cconv 0.28% : 0.000240s : 1: opt_b 7.79% : 0.006702s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000204s : 1: renormalize.infer 0.25% : 0.000213s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000137s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000084s : 1: symbol_engine_optimizer 78.51% : 0.067527s : 1: task_emit 0.08% : 0.000069s : 1: tuple_transform 2.53% : 0.002177s : 1: type_inference 0.07% : 0.000059s : 1: validate Time group info: ------[substitution.] 0.000127 63 5.23% : 0.000007s : 2: substitution.depend_value_elim 1.51% : 0.000002s : 5: substitution.elim_not_effective 1.59% : 0.000002s : 5: substitution.fold_const_symbol 4.93% : 0.000006s : 6: substitution.graph_param_transform 51.78% : 0.000066s : 1: substitution.inline 4.03% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.51% : 0.000004s : 6: substitution.load_eliminater 2.70% : 0.000003s : 2: substitution.reduce_all_const_elim 6.07% : 0.000008s : 10: substitution.remove_not_recompute_node 2.34% : 0.000003s : 2: substitution.replace_old_param 8.97% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.34% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002328 2 89.00% : 0.002072s : 1: type_inference.infer 11.00% : 0.000256s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000227 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.11% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.09% : 0.000005s : 25: predicate.arithmetic_simplify 0.92% : 0.000002s : 13: predicate.cast_eliminate 0.88% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.31% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 2.05% : 0.000005s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.87% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.23% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.57% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.00% : 0.000002s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.18% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.30% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.70% : 0.000002s : 13: predicate.minmaximum_grad 0.67% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.32% : 0.000003s : 19: predicate.partial_eliminate 0.90% : 0.000002s : 13: predicate.print_const_string_wrapper 0.94% : 0.000002s : 12: predicate.reduce_all_const_elim 1.27% : 0.000003s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.50% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.85% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.10% : 0.000003s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.33% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 1.09% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.08% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.20% : 0.000005s : 38: predicate.stopgrad_eliminater 0.47% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.53% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.43% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.47% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000183 4 8.24% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.76% : 0.000168s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086363 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.16% : 0.000134s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000326s : 1: bootstrap 0.03% : 0.000024s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.63% : 0.000546s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000474s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.28% : 0.001105s : 80: opt.transform.opt_a 0.06% : 0.000048s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000044s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005252s : 1: opt_a 0.15% : 0.000133s : 1: opt_after_cconv 0.28% : 0.000243s : 1: opt_b 7.96% : 0.006876s : 1: optimize 0.01% : 0.000010s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.27% : 0.000237s : 1: renormalize.infer 0.25% : 0.000219s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000155s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 77.72% : 0.067121s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.75% : 0.002376s : 1: type_inference 0.08% : 0.000066s : 1: validate TotalTime = 0.0781309, [21] [bootstrap]: 0.00030023 [type_inference]: 0.00237829 [auto_monad]: 0.00010459 [graph_reusing]: 2.61003e-06 [inline]: 8.801e-07 [parallel-infer-symbol]: 2.04006e-06 [pre_auto_parallel]: 2.49599e-05 [insert-virtual-dataset]: 2.26032e-06 [parallel-infer-symbol-second]: 3.69735e-07 [dataset_repeat_opt]: 1.16974e-06 [pipeline_split]: 1.45007e-06 [optimize]: 0.00686816, [52] [py_interpret_to_execute]: 1.49501e-05 [rewriter_before_opt_a]: 3.30899e-05 [opt_a]: 0.00524205, [2] [Cycle 1]: 0.00153531, [43] [expand_dump_flag]: 3.45008e-06 [switch_simplify]: 2.92999e-05 [loop_unroll]: 1.312e-05 [a_1]: 0.00042323 [recompute_prepare]: 8.76002e-06 [updatestate_depend_eliminate]: 6.88015e-06 [updatestate_assign_eliminate]: 4.82984e-06 [updatestate_loads_eliminate]: 4.88013e-06 [parameter_eliminate]: 1.89012e-06 [a_2]: 0.00011035 [accelerated_algorithm]: 8.49972e-06 [shard]: 1.39e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 8.2301e-06 [auto_parallel]: 9.81009e-06 [parallel]: 3.93996e-06 [flash_sp]: 5.07012e-06 [merge_comm]: 6.24033e-06 [allreduce_fusion]: 5.74021e-06 [matmul_add_comm_reduction]: 7.41007e-06 [allreduce_slice_to_reducescatter]: 3.7998e-07 [virtual_shard_identity]: 9.70019e-06 [virtual_dataset]: 8.08015e-06 [get_grad_eliminate_]: 7.68015e-06 [virtual_output]: 7.39982e-06 [merge_forward]: 4.56022e-06 [cell_reuse_recompute_pass]: 1.41002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.56402e-05 [before_grad]: 1.32401e-05 [inplace_validation]: 4.13042e-06 [meta_fg_expand]: 5.0501e-06 [inplace_validation_after_expand]: 5.3402e-06 [flash_sp_send_recv_attached]: 2.12993e-06 [receive_attached]: 1.05007e-06 [after_resolve]: 1.072e-05 [a_after_grad]: 1.22399e-05 [special_op_eliminate]: 7.56001e-06 [renormalize]: 0.00041548 [add_forward_monad_depend]: 3.60981e-06 [auto_monad_grad]: 1.89012e-06 [auto_monad_eliminator]: 3.16701e-05 [cse]: 3.23402e-05 [a_3]: 5.78901e-05 [Cycle 2]: 0.00075808, [43] [expand_dump_flag]: 9.09902e-07 [switch_simplify]: 8.84989e-06 [loop_unroll]: 7.60984e-06 [a_1]: 0.0001975 [recompute_prepare]: 7.26013e-06 [updatestate_depend_eliminate]: 5.66989e-06 [updatestate_assign_eliminate]: 4.44008e-06 [updatestate_loads_eliminate]: 5.01983e-06 [parameter_eliminate]: 9.59728e-07 [a_2]: 0.00010282 [accelerated_algorithm]: 8.09971e-06 [shard]: 1.15018e-06 [meta_shard_fg_expand]: 2.35997e-06 [shard_inline]: 7.41007e-06 [auto_parallel]: 1.11801e-05 [parallel]: 3.47989e-06 [flash_sp]: 3.37977e-06 [merge_comm]: 5.98002e-06 [allreduce_fusion]: 5.28991e-06 [matmul_add_comm_reduction]: 7.35978e-06 [allreduce_slice_to_reducescatter]: 2.39816e-07 [virtual_shard_identity]: 8.27992e-06 [virtual_dataset]: 7.19028e-06 [get_grad_eliminate_]: 7.01984e-06 [virtual_output]: 7.01007e-06 [merge_forward]: 4.50993e-06 [cell_reuse_recompute_pass]: 2.04984e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.46599e-05 [before_grad]: 1.20099e-05 [inplace_validation]: 4.0303e-06 [meta_fg_expand]: 4.6799e-06 [inplace_validation_after_expand]: 5.30016e-06 [flash_sp_send_recv_attached]: 1.08033e-06 [receive_attached]: 8.90344e-07 [after_resolve]: 9.66014e-06 [a_after_grad]: 1.18101e-05 [special_op_eliminate]: 7.11996e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 7.79983e-07 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 1.727e-05 [cse]: 1.87904e-05 [a_3]: 4.77703e-05 [py_interpret_to_execute_after_opt_a]: 9.48971e-06 [slice_cell_reuse_recomputed_activation]: 2.14996e-06 [rewriter_after_opt_a]: 0.00015334 [convert_after_rewriter]: 9.25967e-06 [order_py_execute_after_rewriter]: 5.89015e-06 [opt_b]: 0.00023675, [1] [Cycle 1]: 0.00023136, [7] [b_1]: 0.00016022 [b_2]: 9.32999e-06 [updatestate_depend_eliminate]: 5.15999e-06 [updatestate_assign_eliminate]: 4.49969e-06 [updatestate_loads_eliminate]: 4.97e-06 [renormalize]: 1.79745e-07 [cse]: 1.79801e-05 [optimize_parallel_all_gather_comm]: 7.73976e-06 [overlap_param_gather]: 1.39978e-06 [cconv]: 2.27299e-05 [loop_unroll]: 0.00046534 [opt_after_cconv]: 0.00012683, [1] [Cycle 1]: 0.00012111, [7] [c_1]: 4.96698e-05 [parameter_eliminate]: 2.3297e-06 [updatestate_depend_eliminate]: 7.83987e-06 [updatestate_assign_eliminate]: 5.43008e-06 [updatestate_loads_eliminate]: 4.85964e-06 [cse]: 1.969e-05 [renormalize]: 2.99886e-07 [remove_dup_value]: 1.352e-05 [tuple_transform]: 6.54799e-05, [1] [Cycle 1]: 6.10803e-05, [2] [d_1]: 5.244e-05 [renormalize]: 1.40164e-07 [partial_unused_args_eliminate]: 1.84029e-06 [add_cache_embedding]: 1.24499e-05 [add_recomputation]: 5.983e-05 [cse_after_recomputation]: 2.56002e-05, [1] [Cycle 1]: 2.09599e-05, [1] [cse]: 1.59903e-05 [environ_conv]: 6.71996e-06 [swap_dp_allreduce_reducescatter]: 7.42031e-06 [bias_add_comm_swap]: 2.20025e-06 [label_micro_interleaved_index]: 2.17976e-06 [label_fine_grained_interleaved_index]: 1.85007e-06 [merge_cast_opt]: 1.22003e-06 [slice_recompute_activation]: 1.26008e-06 [micro_interleaved_order_control]: 1.60001e-06 [assign_add_opt]: 7.2401e-06 [ForceFp32Comm]: 8.40053e-07 [remove_cast_before_assign_add]: 1.00024e-06 [full_micro_interleaved_order_control]: 1.97999e-06 [reorder_send_recv_between_fp_bp]: 1.72993e-06 [comm_op_add_attrs]: 1.1404e-06 [add_comm_op_reuse_tag]: 1.05007e-06 [interleave_split_concat_branches]: 5.59725e-07 [interleave_parallel_branches]: 5.29923e-07 [overlap_opt_shard_in_pipeline]: 9.59728e-07 [overlap_opt_shard_grad_in_pipeline]: 1.83983e-06 [control_data_broadcast_order]: 1.04029e-06 [grouped_pairwise_exchange_alltoall]: 1.57999e-06 [offloading_packed_experts]: 4.20026e-07 [overlap_recompute_and_grad_model_parallel]: 1.39e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89762e-07 [overlap_recompute_allgather_and_fa_grad]: 8.40053e-07 [overlap_grad_ring_attention]: 1.78022e-06 [overlap_grad_flash_sp]: 1.39801e-05 [begin_end_overlap_inline]: 7.30157e-07 [split_matmul_comm_elemetwise]: 1.76998e-06 [split_layernorm_comm]: 1.66986e-06 [handle_group_info]: 8.29808e-07 [symbol_engine_optimizer]: 8.19201e-05, [1] [Cycle 1]: 7.74399e-05, [6] [build]: 3.93996e-06 [elim_shapecalc]: 1.11898e-05 [elim_not_effective]: 1.62399e-05 [opt_reshape]: 8.55979e-06 [fold_const_symbol]: 1.30502e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.23028e-06 [auto_monad_reorder]: 2.805e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 2.59839e-07 [eliminate_special_op_node]: 0.00053362 [distribtued_split]: 3.915e-05 [validate]: 3.35402e-05 [task_emit]: 0.0675735 [execute]: 7.3202e-06 Sums bootstrap : 0.000300s : 0.40% type_inference : 0.002378s : 3.20% auto_monad : 0.000105s : 0.14% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000621s : 0.84% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000009s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000010s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000213s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000007s : 0.01% optimize.opt_a.flash_sp : 0.000008s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000015s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000030s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000008s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000416s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.07% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000153s : 0.21% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.22% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000465s : 0.63% optimize.opt_after_cconv.c_1 : 0.000050s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000052s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000000s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000028s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000534s : 0.72% distribtued_split : 0.000039s : 0.05% validate : 0.000034s : 0.05% task_emit : 0.067574s : 91.01% execute : 0.000007s : 0.01% Time group info: ------[substitution.] 0.000121 63 3.46% : 0.000004s : 2: substitution.depend_value_elim 2.18% : 0.000003s : 5: substitution.elim_not_effective 2.01% : 0.000002s : 5: substitution.fold_const_symbol 5.39% : 0.000007s : 6: substitution.graph_param_transform 54.34% : 0.000066s : 1: substitution.inline 3.76% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.08% : 0.000004s : 6: substitution.load_eliminater 1.74% : 0.000002s : 2: substitution.reduce_all_const_elim 6.30% : 0.000008s : 10: substitution.remove_not_recompute_node 1.92% : 0.000002s : 2: substitution.replace_old_param 8.03% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.79% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002356 2 87.94% : 0.002072s : 1: type_inference.infer 12.06% : 0.000284s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000222 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 0.96% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.84% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.06% : 0.000005s : 25: predicate.arithmetic_simplify 0.92% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.40% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.98% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000002s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000002s : 19: predicate.environ_get_depend_swap 1.99% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.32% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.71% : 0.000002s : 12: predicate.incorporate_call_switch 5.71% : 0.000013s : 63: predicate.inline 1.07% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.66% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.00% : 0.000002s : 6: predicate.loop_unroll_after_grad 1.27% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.80% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.51% : 0.000001s : 6: predicate.parallel_virtual_node 1.12% : 0.000002s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.95% : 0.000002s : 12: predicate.reduce_all_const_elim 1.04% : 0.000002s : 13: predicate.reduce_eliminate 0.48% : 0.000001s : 12: predicate.remove_not_recompute_node 1.26% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.97% : 0.000002s : 13: predicate.reshape_eliminate 0.91% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.10% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.36% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.22% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.40% : 0.000010s : 43: predicate.switch_simplify 0.94% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.64% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.59% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.48% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.48% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.60% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.84% : 0.000002s : 12: predicate.virtual_output_eliminate 0.61% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000196 4 8.38% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.62% : 0.000179s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086846 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000116s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000326s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.63% : 0.000546s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000015s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000474s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.34% : 0.001160s : 80: opt.transform.opt_a 0.06% : 0.000048s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000051s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 6.04% : 0.005246s : 1: opt_a 0.15% : 0.000130s : 1: opt_after_cconv 0.28% : 0.000240s : 1: opt_b 7.92% : 0.006876s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.25% : 0.000217s : 1: renormalize.infer 0.22% : 0.000194s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000158s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 77.84% : 0.067597s : 1: task_emit 0.08% : 0.000069s : 1: tuple_transform 2.76% : 0.002393s : 1: type_inference 0.08% : 0.000066s : 1: validate TotalTime = 0.0787907, [21] [bootstrap]: 0.00027829 [type_inference]: 0.0021857 [auto_monad]: 9.938e-05 [graph_reusing]: 1.85985e-06 [inline]: 1.34017e-06 [parallel-infer-symbol]: 1.22003e-06 [pre_auto_parallel]: 1.98502e-05 [insert-virtual-dataset]: 2.07033e-06 [parallel-infer-symbol-second]: 3.70201e-07 [dataset_repeat_opt]: 9.29926e-07 [pipeline_split]: 1.00024e-06 [optimize]: 0.00669319, [52] [py_interpret_to_execute]: 1.20201e-05 [rewriter_before_opt_a]: 2.96999e-05 [opt_a]: 0.00512123, [2] [Cycle 1]: 0.00148449, [43] [expand_dump_flag]: 2.14996e-06 [switch_simplify]: 2.49199e-05 [loop_unroll]: 1.33701e-05 [a_1]: 0.00031969 [recompute_prepare]: 8.37026e-06 [updatestate_depend_eliminate]: 7.54977e-06 [updatestate_assign_eliminate]: 5.30016e-06 [updatestate_loads_eliminate]: 6.59004e-06 [parameter_eliminate]: 2.10991e-06 [a_2]: 0.00011144 [accelerated_algorithm]: 8.36002e-06 [shard]: 2.00002e-06 [meta_shard_fg_expand]: 3.13018e-06 [shard_inline]: 7.94977e-06 [auto_parallel]: 1.05202e-05 [parallel]: 4.58956e-06 [flash_sp]: 6.6096e-06 [merge_comm]: 6.52019e-06 [allreduce_fusion]: 4.74043e-06 [matmul_add_comm_reduction]: 7.89994e-06 [allreduce_slice_to_reducescatter]: 3.1013e-07 [virtual_shard_identity]: 4.64399e-05 [virtual_dataset]: 8.5202e-06 [get_grad_eliminate_]: 7.79983e-06 [virtual_output]: 7.71973e-06 [merge_forward]: 5.63031e-06 [cell_reuse_recompute_pass]: 1.49012e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.628e-05 [before_grad]: 1.83498e-05 [inplace_validation]: 4.75999e-06 [meta_fg_expand]: 5.37001e-06 [inplace_validation_after_expand]: 5.30994e-06 [flash_sp_send_recv_attached]: 3.02028e-06 [receive_attached]: 1.68988e-06 [after_resolve]: 1.06902e-05 [a_after_grad]: 1.41198e-05 [special_op_eliminate]: 8.45967e-06 [renormalize]: 0.00043327 [add_forward_monad_depend]: 2.54018e-06 [auto_monad_grad]: 1.64006e-06 [auto_monad_eliminator]: 2.46498e-05 [cse]: 2.639e-05 [a_3]: 5.68703e-05 [Cycle 2]: 0.00075965, [43] [expand_dump_flag]: 1.0198e-06 [switch_simplify]: 8.76002e-06 [loop_unroll]: 7.81985e-06 [a_1]: 0.00019826 [recompute_prepare]: 7.58003e-06 [updatestate_depend_eliminate]: 5.68992e-06 [updatestate_assign_eliminate]: 4.84008e-06 [updatestate_loads_eliminate]: 4.92996e-06 [parameter_eliminate]: 1.03004e-06 [a_2]: 0.00010334 [accelerated_algorithm]: 8.14022e-06 [shard]: 9.99775e-07 [meta_shard_fg_expand]: 2.2999e-06 [shard_inline]: 7.67969e-06 [auto_parallel]: 1.091e-05 [parallel]: 3.11993e-06 [flash_sp]: 2.87034e-06 [merge_comm]: 5.56e-06 [allreduce_fusion]: 4.68967e-06 [matmul_add_comm_reduction]: 7.66991e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 8.57981e-06 [virtual_dataset]: 7.51996e-06 [get_grad_eliminate_]: 7.24988e-06 [virtual_output]: 7.22986e-06 [merge_forward]: 4.39025e-06 [cell_reuse_recompute_pass]: 1.59023e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49696e-05 [before_grad]: 1.23e-05 [inplace_validation]: 4.02983e-06 [meta_fg_expand]: 4.72972e-06 [inplace_validation_after_expand]: 5.01983e-06 [flash_sp_send_recv_attached]: 7.10133e-07 [receive_attached]: 7.90227e-07 [after_resolve]: 9.79984e-06 [a_after_grad]: 1.16299e-05 [special_op_eliminate]: 7.09994e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.801e-07 [auto_monad_grad]: 9.49949e-07 [auto_monad_eliminator]: 1.59899e-05 [cse]: 1.82302e-05 [a_3]: 4.80101e-05 [py_interpret_to_execute_after_opt_a]: 9.18005e-06 [slice_cell_reuse_recomputed_activation]: 1.95997e-06 [rewriter_after_opt_a]: 0.00013855 [convert_after_rewriter]: 7.87014e-06 [order_py_execute_after_rewriter]: 5.62984e-06 [opt_b]: 0.00023647, [1] [Cycle 1]: 0.00023148, [7] [b_1]: 0.00015966 [b_2]: 9.28016e-06 [updatestate_depend_eliminate]: 4.93973e-06 [updatestate_assign_eliminate]: 4.24031e-06 [updatestate_loads_eliminate]: 4.71994e-06 [renormalize]: 3.09665e-07 [cse]: 1.80798e-05 [optimize_parallel_all_gather_comm]: 7.68993e-06 [overlap_param_gather]: 8.2003e-07 [cconv]: 1.65198e-05 [loop_unroll]: 0.0004725 [opt_after_cconv]: 0.00012312, [1] [Cycle 1]: 0.00011767, [7] [c_1]: 4.907e-05 [parameter_eliminate]: 1.74996e-06 [updatestate_depend_eliminate]: 6.89039e-06 [updatestate_assign_eliminate]: 4.5104e-06 [updatestate_loads_eliminate]: 4.67012e-06 [cse]: 1.95699e-05 [renormalize]: 3.20375e-07 [remove_dup_value]: 1.01198e-05 [tuple_transform]: 6.59502e-05, [1] [Cycle 1]: 6.183e-05, [2] [d_1]: 5.341e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.51014e-06 [add_cache_embedding]: 1.24001e-05 [add_recomputation]: 5.364e-05 [cse_after_recomputation]: 2.51699e-05, [1] [Cycle 1]: 2.09101e-05, [1] [cse]: 1.59102e-05 [environ_conv]: 6.47968e-06 [swap_dp_allreduce_reducescatter]: 6.67013e-06 [bias_add_comm_swap]: 1.67033e-06 [label_micro_interleaved_index]: 1.51992e-06 [label_fine_grained_interleaved_index]: 1.43005e-06 [merge_cast_opt]: 7.59959e-07 [slice_recompute_activation]: 1.07987e-06 [micro_interleaved_order_control]: 1.21025e-06 [assign_add_opt]: 7.22008e-06 [ForceFp32Comm]: 6.19795e-07 [remove_cast_before_assign_add]: 4.80097e-07 [full_micro_interleaved_order_control]: 1.24983e-06 [reorder_send_recv_between_fp_bp]: 1.15996e-06 [comm_op_add_attrs]: 9.49949e-07 [add_comm_op_reuse_tag]: 6.00237e-07 [interleave_split_concat_branches]: 5.59725e-07 [interleave_parallel_branches]: 5.60191e-07 [overlap_opt_shard_in_pipeline]: 8.50298e-07 [overlap_opt_shard_grad_in_pipeline]: 1.67033e-06 [control_data_broadcast_order]: 6.50063e-07 [grouped_pairwise_exchange_alltoall]: 7.30157e-07 [offloading_packed_experts]: 6.9011e-07 [overlap_recompute_and_grad_model_parallel]: 1.29966e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.80097e-07 [overlap_recompute_allgather_and_fa_grad]: 6.89644e-07 [overlap_grad_ring_attention]: 1.57999e-06 [overlap_grad_flash_sp]: 1.188e-05 [begin_end_overlap_inline]: 4.4005e-07 [split_matmul_comm_elemetwise]: 1.37975e-06 [split_layernorm_comm]: 1.20001e-06 [handle_group_info]: 5.79748e-07 [symbol_engine_optimizer]: 7.969e-05, [1] [Cycle 1]: 7.58301e-05, [6] [build]: 4.01028e-06 [elim_shapecalc]: 1.133e-05 [elim_not_effective]: 1.47601e-05 [opt_reshape]: 8.37026e-06 [fold_const_symbol]: 1.25202e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 8.99658e-07 [auto_monad_reorder]: 2.226e-05 [get_jit_bprop_graph]: 3.50177e-07 [rewriter_after_jit_bprop_graph]: 3.29688e-07 [eliminate_special_op_node]: 0.0004821 [distribtued_split]: 3.401e-05 [validate]: 2.98796e-05 [task_emit]: 0.0687079 [execute]: 8.98028e-06 Sums bootstrap : 0.000278s : 0.37% type_inference : 0.002186s : 2.91% auto_monad : 0.000099s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000518s : 0.69% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000215s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000009s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000009s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000055s : 0.07% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000031s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000433s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000139s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000472s : 0.63% optimize.opt_after_cconv.c_1 : 0.000049s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000482s : 0.64% distribtued_split : 0.000034s : 0.05% validate : 0.000030s : 0.04% task_emit : 0.068708s : 91.62% execute : 0.000009s : 0.01% TotalTime = 0.0788472, [21] [bootstrap]: 0.00030477 [type_inference]: 0.00246932 [auto_monad]: 0.00012158 [graph_reusing]: 2.80002e-06 [inline]: 1.59023e-06 [parallel-infer-symbol]: 2.29012e-06 [pre_auto_parallel]: 2.58097e-05 [insert-virtual-dataset]: 2.17045e-06 [parallel-infer-symbol-second]: 4.50294e-07 [dataset_repeat_opt]: 1.33971e-06 [pipeline_split]: 1.32993e-06 [optimize]: 0.00695523, [52] [py_interpret_to_execute]: 1.44499e-05 [rewriter_before_opt_a]: 3.40599e-05 [opt_a]: 0.00527714, [2] [Cycle 1]: 0.00155895, [43] [expand_dump_flag]: 2.83029e-06 [switch_simplify]: 2.9e-05 [loop_unroll]: 1.331e-05 [a_1]: 0.00038408 [recompute_prepare]: 8.90018e-06 [updatestate_depend_eliminate]: 8.32975e-06 [updatestate_assign_eliminate]: 6.39027e-06 [updatestate_loads_eliminate]: 7.03009e-06 [parameter_eliminate]: 3.62005e-06 [a_2]: 0.00011487 [accelerated_algorithm]: 8.57981e-06 [shard]: 2.06009e-06 [meta_shard_fg_expand]: 4.23985e-06 [shard_inline]: 8.10018e-06 [auto_parallel]: 1.17901e-05 [parallel]: 7.54977e-06 [flash_sp]: 1.07e-05 [merge_comm]: 7.7202e-06 [allreduce_fusion]: 5.20982e-06 [matmul_add_comm_reduction]: 1.015e-05 [allreduce_slice_to_reducescatter]: 4.49829e-07 [virtual_shard_identity]: 9.03988e-06 [virtual_dataset]: 7.95024e-06 [get_grad_eliminate_]: 7.49994e-06 [virtual_output]: 7.68015e-06 [merge_forward]: 6.83032e-06 [cell_reuse_recompute_pass]: 1.83005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.60201e-05 [before_grad]: 1.36001e-05 [inplace_validation]: 5.49993e-06 [meta_fg_expand]: 5.50039e-06 [inplace_validation_after_expand]: 5.94975e-06 [flash_sp_send_recv_attached]: 4.81028e-06 [receive_attached]: 2.39024e-06 [after_resolve]: 1.06902e-05 [a_after_grad]: 1.29398e-05 [special_op_eliminate]: 8.09971e-06 [renormalize]: 0.00043913 [add_forward_monad_depend]: 3.72017e-06 [auto_monad_grad]: 2.01957e-06 [auto_monad_eliminator]: 2.97599e-05 [cse]: 3.09697e-05 [a_3]: 5.60801e-05 [Cycle 2]: 0.00076901, [43] [expand_dump_flag]: 1.13994e-06 [switch_simplify]: 9.11998e-06 [loop_unroll]: 7.88039e-06 [a_1]: 0.00019901 [recompute_prepare]: 7.13998e-06 [updatestate_depend_eliminate]: 5.79003e-06 [updatestate_assign_eliminate]: 4.96022e-06 [updatestate_loads_eliminate]: 5.11017e-06 [parameter_eliminate]: 1.37975e-06 [a_2]: 0.00010358 [accelerated_algorithm]: 8.19983e-06 [shard]: 1.39978e-06 [meta_shard_fg_expand]: 2.53972e-06 [shard_inline]: 7.7798e-06 [auto_parallel]: 1.175e-05 [parallel]: 3.55998e-06 [flash_sp]: 3.26009e-06 [merge_comm]: 5.81006e-06 [allreduce_fusion]: 4.99981e-06 [matmul_add_comm_reduction]: 7.66013e-06 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 8.51043e-06 [virtual_dataset]: 7.53999e-06 [get_grad_eliminate_]: 7.09994e-06 [virtual_output]: 6.99004e-06 [merge_forward]: 4.57e-06 [cell_reuse_recompute_pass]: 1.96043e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52802e-05 [before_grad]: 1.213e-05 [inplace_validation]: 4.5402e-06 [meta_fg_expand]: 4.75021e-06 [inplace_validation_after_expand]: 4.92018e-06 [flash_sp_send_recv_attached]: 9.20147e-07 [receive_attached]: 8.49832e-07 [after_resolve]: 9.65968e-06 [a_after_grad]: 1.17398e-05 [special_op_eliminate]: 7.13998e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.70321e-07 [auto_monad_grad]: 1.27964e-06 [auto_monad_eliminator]: 1.83601e-05 [cse]: 1.89096e-05 [a_3]: 4.75398e-05 [py_interpret_to_execute_after_opt_a]: 9.35979e-06 [slice_cell_reuse_recomputed_activation]: 2.04984e-06 [rewriter_after_opt_a]: 0.00013038 [convert_after_rewriter]: 9.95025e-06 [order_py_execute_after_rewriter]: 6.17979e-06 [opt_b]: 0.0002399, [1] [Cycle 1]: 0.00023418, [7] [b_1]: 0.00016028 [b_2]: 9.85991e-06 [updatestate_depend_eliminate]: 5.39003e-06 [updatestate_assign_eliminate]: 4.61005e-06 [updatestate_loads_eliminate]: 5.15999e-06 [renormalize]: 3.1013e-07 [cse]: 1.86497e-05 [optimize_parallel_all_gather_comm]: 8.54023e-06 [overlap_param_gather]: 1.11992e-06 [cconv]: 2.22698e-05 [loop_unroll]: 0.00050199 [opt_after_cconv]: 0.00013865, [1] [Cycle 1]: 0.00013237, [7] [c_1]: 5.84e-05 [parameter_eliminate]: 2.54996e-06 [updatestate_depend_eliminate]: 8.02986e-06 [updatestate_assign_eliminate]: 4.65987e-06 [updatestate_loads_eliminate]: 5.34998e-06 [cse]: 2.10502e-05 [renormalize]: 3.69735e-07 [remove_dup_value]: 1.34101e-05 [tuple_transform]: 6.99097e-05, [1] [Cycle 1]: 6.49504e-05, [2] [d_1]: 5.621e-05 [renormalize]: 1.79745e-07 [partial_unused_args_eliminate]: 2.22027e-06 [add_cache_embedding]: 1.26404e-05 [add_recomputation]: 5.93998e-05 [cse_after_recomputation]: 2.66503e-05, [1] [Cycle 1]: 2.16598e-05, [1] [cse]: 1.69999e-05 [environ_conv]: 7.66991e-06 [swap_dp_allreduce_reducescatter]: 8.15e-06 [bias_add_comm_swap]: 2.30037e-06 [label_micro_interleaved_index]: 1.77044e-06 [label_fine_grained_interleaved_index]: 1.81003e-06 [merge_cast_opt]: 1.34017e-06 [slice_recompute_activation]: 1.58977e-06 [micro_interleaved_order_control]: 1.91992e-06 [assign_add_opt]: 7.63033e-06 [ForceFp32Comm]: 9.69972e-07 [remove_cast_before_assign_add]: 9.99775e-07 [full_micro_interleaved_order_control]: 1.86963e-06 [reorder_send_recv_between_fp_bp]: 1.88034e-06 [comm_op_add_attrs]: 9.30391e-07 [add_comm_op_reuse_tag]: 8.89879e-07 [interleave_split_concat_branches]: 6.00237e-07 [interleave_parallel_branches]: 6.3004e-07 [overlap_opt_shard_in_pipeline]: 1.14972e-06 [overlap_opt_shard_grad_in_pipeline]: 1.8999e-06 [control_data_broadcast_order]: 1.11992e-06 [grouped_pairwise_exchange_alltoall]: 1.05985e-06 [offloading_packed_experts]: 1.17999e-06 [overlap_recompute_and_grad_model_parallel]: 2.11038e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.39936e-07 [overlap_recompute_allgather_and_fa_grad]: 1.07987e-06 [overlap_grad_ring_attention]: 1.59023e-06 [overlap_grad_flash_sp]: 1.47303e-05 [begin_end_overlap_inline]: 7.39936e-07 [split_matmul_comm_elemetwise]: 1.55997e-06 [split_layernorm_comm]: 1.82958e-06 [handle_group_info]: 1.19023e-06 [symbol_engine_optimizer]: 8.186e-05, [1] [Cycle 1]: 7.75298e-05, [6] [build]: 3.76021e-06 [elim_shapecalc]: 1.15102e-05 [elim_not_effective]: 1.64201e-05 [opt_reshape]: 8.2897e-06 [fold_const_symbol]: 1.31098e-05 [renormalize]: 1.49943e-07 [pipeline_parallel_scheduler]: 1.40024e-06 [auto_monad_reorder]: 2.88198e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 3.90224e-07 [eliminate_special_op_node]: 0.00060453 [distribtued_split]: 4.12799e-05 [validate]: 3.48501e-05 [task_emit]: 0.0679989 [execute]: 1.14399e-05 Sums bootstrap : 0.000305s : 0.41% type_inference : 0.002469s : 3.30% auto_monad : 0.000122s : 0.16% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000583s : 0.78% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000218s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000439s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000048s : 0.06% optimize.opt_a.cse : 0.000050s : 0.07% optimize.opt_a.a_3 : 0.000104s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000130s : 0.17% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000502s : 0.67% optimize.opt_after_cconv.c_1 : 0.000058s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000059s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000605s : 0.81% distribtued_split : 0.000041s : 0.06% validate : 0.000035s : 0.05% task_emit : 0.067999s : 90.75% execute : 0.000011s : 0.02% Time group info: ------[substitution.] 0.000108 63 4.43% : 0.000005s : 2: substitution.depend_value_elim 2.00% : 0.000002s : 5: substitution.elim_not_effective 1.85% : 0.000002s : 5: substitution.fold_const_symbol 5.91% : 0.000006s : 6: substitution.graph_param_transform 48.63% : 0.000053s : 1: substitution.inline 4.84% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.29% : 0.000004s : 6: substitution.load_eliminater 2.26% : 0.000002s : 2: substitution.reduce_all_const_elim 6.79% : 0.000007s : 10: substitution.remove_not_recompute_node 2.74% : 0.000003s : 2: substitution.replace_old_param 9.06% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.18% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002162 2 88.46% : 0.001913s : 1: type_inference.infer 11.54% : 0.000250s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000052 1 100.00% : 0.000052s : 1: match.inline ------[predicate.] 0.000223 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.04% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.80% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.26% : 0.000005s : 25: predicate.arithmetic_simplify 0.96% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000000s : 6: predicate.const_output_eliminate 0.52% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.24% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.86% : 0.000004s : 31: predicate.environ_get_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.77% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.31% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 5.64% : 0.000013s : 63: predicate.inline 1.11% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.95% : 0.000002s : 12: predicate.less_batch_normalization 1.79% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.43% : 0.000005s : 38: predicate.load_eliminater 1.30% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.89% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.63% : 0.000001s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000002s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.91% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.45% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.98% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.34% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.25% : 0.000009s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.76% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.84% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.87% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.52% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.86% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000163 4 6.95% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.05% : 0.000152s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087292 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000058s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000110s : 1: auto_monad 0.03% : 0.000028s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000004s : 1: bias_add_comm_swap 0.34% : 0.000300s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000011s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.57% : 0.000495s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000481s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.26% : 0.001102s : 80: opt.transform.opt_a 0.05% : 0.000048s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000029s : 3: opt.transform.special_op_eliminate 0.05% : 0.000044s : 4: opt.transform.symbol_engine_opt 5.87% : 0.005125s : 1: opt_a 0.15% : 0.000127s : 1: opt_after_cconv 0.27% : 0.000239s : 1: opt_b 7.68% : 0.006701s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000025s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000214s : 1: renormalize.infer 0.25% : 0.000215s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000144s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000082s : 1: symbol_engine_optimizer 78.74% : 0.068732s : 1: task_emit 0.08% : 0.000069s : 1: tuple_transform 2.52% : 0.002202s : 1: type_inference 0.07% : 0.000062s : 1: validate Time group info: ------[substitution.] 0.000170 63 3.46% : 0.000006s : 2: substitution.depend_value_elim 1.38% : 0.000002s : 5: substitution.elim_not_effective 1.29% : 0.000002s : 5: substitution.fold_const_symbol 4.08% : 0.000007s : 6: substitution.graph_param_transform 62.74% : 0.000107s : 1: substitution.inline 3.09% : 0.000005s : 10: substitution.j_node_and_user_rematch 2.37% : 0.000004s : 6: substitution.load_eliminater 1.86% : 0.000003s : 2: substitution.reduce_all_const_elim 4.43% : 0.000008s : 10: substitution.remove_not_recompute_node 1.94% : 0.000003s : 2: substitution.replace_old_param 7.05% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 6.29% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002441 2 87.44% : 0.002134s : 1: type_inference.infer 12.56% : 0.000307s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000106 1 100.00% : 0.000106s : 1: match.inline ------[predicate.] 0.000226 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.02% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.31% : 0.000005s : 25: predicate.arithmetic_simplify 0.93% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.38% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.84% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.93% : 0.000004s : 31: predicate.environ_get_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.75% : 0.000013s : 63: predicate.inline 1.08% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.09% : 0.000002s : 12: predicate.less_batch_normalization 1.77% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.44% : 0.000006s : 38: predicate.load_eliminater 1.15% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.90% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.50% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.89% : 0.000002s : 13: predicate.print_const_string_wrapper 0.80% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000002s : 13: predicate.reduce_eliminate 0.61% : 0.000001s : 12: predicate.remove_not_recompute_node 1.21% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 0.97% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.99% : 0.000002s : 12: predicate.shard_identity_eliminate 1.30% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.37% : 0.000010s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.89% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.67% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.59% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.38% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.46% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000208 4 7.41% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.59% : 0.000193s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087656 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000135s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000328s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.01% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000049s : 1: distribtued_split 0.71% : 0.000619s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.58% : 0.000512s : 1: loop_unroll 0.01% : 0.000008s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.29% : 0.001127s : 80: opt.transform.opt_a 0.06% : 0.000057s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.02% : 0.005281s : 1: opt_a 0.16% : 0.000143s : 1: opt_after_cconv 0.28% : 0.000243s : 1: opt_b 7.94% : 0.006963s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.26% : 0.000229s : 1: renormalize.infer 0.23% : 0.000205s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000136s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000085s : 1: symbol_engine_optimizer 77.61% : 0.068026s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.84% : 0.002489s : 1: type_inference 0.08% : 0.000068s : 1: validate TotalTime = 0.0801279, [21] [bootstrap]: 0.00030398 [type_inference]: 0.00247327 [auto_monad]: 0.00012209 [graph_reusing]: 2.28034e-06 [inline]: 1.14972e-06 [parallel-infer-symbol]: 2.08989e-06 [pre_auto_parallel]: 2.39997e-05 [insert-virtual-dataset]: 2.2701e-06 [parallel-infer-symbol-second]: 3.29688e-07 [dataset_repeat_opt]: 1.41002e-06 [pipeline_split]: 1.22003e-06 [optimize]: 0.0070916, [52] [py_interpret_to_execute]: 1.37901e-05 [rewriter_before_opt_a]: 3.439e-05 [opt_a]: 0.00537061, [2] [Cycle 1]: 0.00156311, [43] [expand_dump_flag]: 3.49991e-06 [switch_simplify]: 2.93702e-05 [loop_unroll]: 1.32201e-05 [a_1]: 0.00038116 [recompute_prepare]: 8.71997e-06 [updatestate_depend_eliminate]: 8.35024e-06 [updatestate_assign_eliminate]: 5.78957e-06 [updatestate_loads_eliminate]: 7.18981e-06 [parameter_eliminate]: 3.11015e-06 [a_2]: 0.0001148 [accelerated_algorithm]: 8.19983e-06 [shard]: 1.91992e-06 [meta_shard_fg_expand]: 4.34974e-06 [shard_inline]: 7.80029e-06 [auto_parallel]: 1.15698e-05 [parallel]: 7.8897e-06 [flash_sp]: 1.04699e-05 [merge_comm]: 7.39982e-06 [allreduce_fusion]: 6.12997e-06 [matmul_add_comm_reduction]: 1.01603e-05 [allreduce_slice_to_reducescatter]: 4.49829e-07 [virtual_shard_identity]: 8.67015e-06 [virtual_dataset]: 7.89994e-06 [get_grad_eliminate_]: 7.61962e-06 [virtual_output]: 7.4096e-06 [merge_forward]: 5.98002e-06 [cell_reuse_recompute_pass]: 1.95997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65701e-05 [before_grad]: 1.375e-05 [inplace_validation]: 5.83008e-06 [meta_fg_expand]: 5.49015e-06 [inplace_validation_after_expand]: 6.69993e-06 [flash_sp_send_recv_attached]: 4.42006e-06 [receive_attached]: 2.7502e-06 [after_resolve]: 1.18101e-05 [a_after_grad]: 1.22497e-05 [special_op_eliminate]: 7.62008e-06 [renormalize]: 0.00043277 [add_forward_monad_depend]: 3.53996e-06 [auto_monad_grad]: 1.87987e-06 [auto_monad_eliminator]: 3.23099e-05 [cse]: 2.997e-05 [a_3]: 5.71301e-05 [Cycle 2]: 0.00079845, [43] [expand_dump_flag]: 1.06031e-06 [switch_simplify]: 9.56003e-06 [loop_unroll]: 8.2003e-06 [a_1]: 0.00020378 [recompute_prepare]: 7.65035e-06 [updatestate_depend_eliminate]: 6.14999e-06 [updatestate_assign_eliminate]: 5.15999e-06 [updatestate_loads_eliminate]: 5.15021e-06 [parameter_eliminate]: 1.22003e-06 [a_2]: 0.00010563 [accelerated_algorithm]: 8.78004e-06 [shard]: 1.23028e-06 [meta_shard_fg_expand]: 2.6999e-06 [shard_inline]: 7.98004e-06 [auto_parallel]: 1.16602e-05 [parallel]: 3.87011e-06 [flash_sp]: 3.57023e-06 [merge_comm]: 5.91995e-06 [allreduce_fusion]: 5.03007e-06 [matmul_add_comm_reduction]: 8.02986e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 8.76002e-06 [virtual_dataset]: 8.12998e-06 [get_grad_eliminate_]: 7.81007e-06 [virtual_output]: 7.30995e-06 [merge_forward]: 4.80004e-06 [cell_reuse_recompute_pass]: 1.8198e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50902e-05 [before_grad]: 1.27302e-05 [inplace_validation]: 4.27011e-06 [meta_fg_expand]: 5.20004e-06 [inplace_validation_after_expand]: 5.28013e-06 [flash_sp_send_recv_attached]: 9.99775e-07 [receive_attached]: 8.50298e-07 [after_resolve]: 9.62988e-06 [a_after_grad]: 1.173e-05 [special_op_eliminate]: 7.32997e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 9.29926e-07 [auto_monad_grad]: 1.13994e-06 [auto_monad_eliminator]: 1.851e-05 [cse]: 1.94497e-05 [a_3]: 4.91501e-05 [py_interpret_to_execute_after_opt_a]: 9.28994e-06 [slice_cell_reuse_recomputed_activation]: 2.11969e-06 [rewriter_after_opt_a]: 0.00015063 [convert_after_rewriter]: 8.40984e-06 [order_py_execute_after_rewriter]: 5.47012e-06 [opt_b]: 0.00024632, [1] [Cycle 1]: 0.00023994, [7] [b_1]: 0.00016145 [b_2]: 1.00001e-05 [updatestate_depend_eliminate]: 5.43986e-06 [updatestate_assign_eliminate]: 4.65009e-06 [updatestate_loads_eliminate]: 5.01983e-06 [renormalize]: 2.89641e-07 [cse]: 1.969e-05 [optimize_parallel_all_gather_comm]: 8.40006e-06 [overlap_param_gather]: 1.02026e-06 [cconv]: 2.222e-05 [loop_unroll]: 0.00049954 [opt_after_cconv]: 0.00013706, [1] [Cycle 1]: 0.00013033, [7] [c_1]: 5.343e-05 [parameter_eliminate]: 2.82982e-06 [updatestate_depend_eliminate]: 7.72998e-06 [updatestate_assign_eliminate]: 4.98025e-06 [updatestate_loads_eliminate]: 5.15999e-06 [cse]: 2.17403e-05 [renormalize]: 3.49712e-07 [remove_dup_value]: 1.35601e-05 [tuple_transform]: 7.49798e-05, [1] [Cycle 1]: 6.96401e-05, [2] [d_1]: 6.04e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 1.85007e-06 [add_cache_embedding]: 1.37198e-05 [add_recomputation]: 6.12698e-05 [cse_after_recomputation]: 2.80701e-05, [1] [Cycle 1]: 2.27103e-05, [1] [cse]: 1.75498e-05 [environ_conv]: 7.55023e-06 [swap_dp_allreduce_reducescatter]: 8.11974e-06 [bias_add_comm_swap]: 2.48011e-06 [label_micro_interleaved_index]: 2.10991e-06 [label_fine_grained_interleaved_index]: 2.27988e-06 [merge_cast_opt]: 1.44029e-06 [slice_recompute_activation]: 1.66986e-06 [micro_interleaved_order_control]: 1.74996e-06 [assign_add_opt]: 7.51996e-06 [ForceFp32Comm]: 1.12038e-06 [remove_cast_before_assign_add]: 5.59725e-07 [full_micro_interleaved_order_control]: 2.32039e-06 [reorder_send_recv_between_fp_bp]: 2.54996e-06 [comm_op_add_attrs]: 8.99658e-07 [add_comm_op_reuse_tag]: 1.0198e-06 [interleave_split_concat_branches]: 8.30274e-07 [interleave_parallel_branches]: 7.20378e-07 [overlap_opt_shard_in_pipeline]: 1.31968e-06 [overlap_opt_shard_grad_in_pipeline]: 2.73017e-06 [control_data_broadcast_order]: 1.15018e-06 [grouped_pairwise_exchange_alltoall]: 9.29926e-07 [offloading_packed_experts]: 1.21025e-06 [overlap_recompute_and_grad_model_parallel]: 2.2403e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.69855e-07 [overlap_recompute_allgather_and_fa_grad]: 9.80217e-07 [overlap_grad_ring_attention]: 2.33017e-06 [overlap_grad_flash_sp]: 1.47102e-05 [begin_end_overlap_inline]: 7.70204e-07 [split_matmul_comm_elemetwise]: 1.97999e-06 [split_layernorm_comm]: 1.8701e-06 [handle_group_info]: 1.09989e-06 [symbol_engine_optimizer]: 8.59001e-05, [1] [Cycle 1]: 8.09496e-05, [6] [build]: 3.89991e-06 [elim_shapecalc]: 1.156e-05 [elim_not_effective]: 1.609e-05 [opt_reshape]: 9.33977e-06 [fold_const_symbol]: 1.36402e-05 [renormalize]: 2.99886e-07 [pipeline_parallel_scheduler]: 1.7602e-06 [auto_monad_reorder]: 2.917e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 4.50294e-07 [eliminate_special_op_node]: 0.00058282 [distribtued_split]: 4.12096e-05 [validate]: 3.48398e-05 [task_emit]: 0.0691484 [execute]: 1.09398e-05 Sums bootstrap : 0.000304s : 0.40% type_inference : 0.002473s : 3.25% auto_monad : 0.000122s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000585s : 0.77% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000433s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000049s : 0.06% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000151s : 0.20% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000500s : 0.66% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000060s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000583s : 0.77% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.069148s : 90.87% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000144 63 4.79% : 0.000007s : 2: substitution.depend_value_elim 1.84% : 0.000003s : 5: substitution.elim_not_effective 1.77% : 0.000003s : 5: substitution.fold_const_symbol 4.87% : 0.000007s : 6: substitution.graph_param_transform 54.59% : 0.000078s : 1: substitution.inline 3.50% : 0.000005s : 10: substitution.j_node_and_user_rematch 2.78% : 0.000004s : 6: substitution.load_eliminater 2.34% : 0.000003s : 2: substitution.reduce_all_const_elim 5.16% : 0.000007s : 10: substitution.remove_not_recompute_node 2.41% : 0.000003s : 2: substitution.replace_old_param 8.12% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.85% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002444 2 87.47% : 0.002138s : 1: type_inference.infer 12.53% : 0.000306s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000077 1 100.00% : 0.000077s : 1: match.inline ------[predicate.] 0.000233 1420 0.75% : 0.000002s : 13: predicate.accumulaten_eliminater 1.13% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.68% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.60% : 0.000006s : 25: predicate.arithmetic_simplify 1.05% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.32% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.94% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.24% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.44% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.85% : 0.000004s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.22% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.04% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.41% : 0.000013s : 63: predicate.inline 1.02% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.11% : 0.000003s : 12: predicate.less_batch_normalization 1.79% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.31% : 0.000005s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.78% : 0.000002s : 13: predicate.minmaximum_grad 0.84% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.57% : 0.000001s : 6: predicate.parallel_virtual_node 1.08% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.92% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.88% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.26% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.04% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.24% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.49% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 1.11% : 0.000003s : 13: predicate.transpose_eliminate 1.70% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.68% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.44% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.72% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000145 4 10.69% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.31% : 0.000130s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089073 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000135s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000326s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000049s : 1: distribtued_split 0.67% : 0.000597s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000509s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.27% : 0.001135s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.07% : 0.000059s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.03% : 0.005374s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.28% : 0.000250s : 1: opt_b 7.97% : 0.007100s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000237s : 1: renormalize.infer 0.21% : 0.000191s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000156s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000089s : 1: symbol_engine_optimizer 77.67% : 0.069185s : 1: task_emit 0.09% : 0.000078s : 1: tuple_transform 2.80% : 0.002492s : 1: type_inference 0.08% : 0.000069s : 1: validate TotalTime = 0.0824445, [21] [bootstrap]: 0.00033091 [type_inference]: 0.00273433 [auto_monad]: 0.00014185 [graph_reusing]: 2.48989e-06 [inline]: 1.70013e-06 [parallel-infer-symbol]: 2.2403e-06 [pre_auto_parallel]: 2.77702e-05 [insert-virtual-dataset]: 3.03006e-06 [parallel-infer-symbol-second]: 5.49946e-07 [dataset_repeat_opt]: 1.49012e-06 [pipeline_split]: 1.70013e-06 [optimize]: 0.00790972, [52] [py_interpret_to_execute]: 1.68602e-05 [rewriter_before_opt_a]: 4.00404e-05 [opt_a]: 0.0060066, [2] [Cycle 1]: 0.00174826, [43] [expand_dump_flag]: 4.33018e-06 [switch_simplify]: 3.40799e-05 [loop_unroll]: 1.59298e-05 [a_1]: 0.00040317 [recompute_prepare]: 1.05402e-05 [updatestate_depend_eliminate]: 9.02964e-06 [updatestate_assign_eliminate]: 6.65989e-06 [updatestate_loads_eliminate]: 8.21007e-06 [parameter_eliminate]: 3.53018e-06 [a_2]: 0.00014082 [accelerated_algorithm]: 1.03498e-05 [shard]: 2.19001e-06 [meta_shard_fg_expand]: 4.69992e-06 [shard_inline]: 1.00299e-05 [auto_parallel]: 1.32197e-05 [parallel]: 8.34977e-06 [flash_sp]: 1.17598e-05 [merge_comm]: 9.26014e-06 [allreduce_fusion]: 6.28037e-06 [matmul_add_comm_reduction]: 1.15801e-05 [allreduce_slice_to_reducescatter]: 4.89876e-07 [virtual_shard_identity]: 1.13198e-05 [virtual_dataset]: 1.02101e-05 [get_grad_eliminate_]: 9.70997e-06 [virtual_output]: 9.22987e-06 [merge_forward]: 6.63009e-06 [cell_reuse_recompute_pass]: 1.93017e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.12099e-05 [before_grad]: 1.85603e-05 [inplace_validation]: 5.94975e-06 [meta_fg_expand]: 6.30971e-06 [inplace_validation_after_expand]: 7.7798e-06 [flash_sp_send_recv_attached]: 5.55022e-06 [receive_attached]: 3.05986e-06 [after_resolve]: 1.47801e-05 [a_after_grad]: 1.60597e-05 [special_op_eliminate]: 9.60985e-06 [renormalize]: 0.00048255 [add_forward_monad_depend]: 3.6601e-06 [auto_monad_grad]: 2.14996e-06 [auto_monad_eliminator]: 3.56496e-05 [cse]: 3.72399e-05 [a_3]: 7.01202e-05 [Cycle 2]: 0.00091626, [43] [expand_dump_flag]: 1.22981e-06 [switch_simplify]: 1.08699e-05 [loop_unroll]: 9.43989e-06 [a_1]: 0.00024974 [recompute_prepare]: 8.91974e-06 [updatestate_depend_eliminate]: 6.8103e-06 [updatestate_assign_eliminate]: 5.11995e-06 [updatestate_loads_eliminate]: 5.96e-06 [parameter_eliminate]: 1.20001e-06 [a_2]: 0.00012562 [accelerated_algorithm]: 9.72999e-06 [shard]: 1.26986e-06 [meta_shard_fg_expand]: 3.30014e-06 [shard_inline]: 9.21963e-06 [auto_parallel]: 1.21798e-05 [parallel]: 4.33996e-06 [flash_sp]: 4.06988e-06 [merge_comm]: 6.93975e-06 [allreduce_fusion]: 6.44987e-06 [matmul_add_comm_reduction]: 8.78004e-06 [allreduce_slice_to_reducescatter]: 2.90107e-07 [virtual_shard_identity]: 1.03302e-05 [virtual_dataset]: 9.11998e-06 [get_grad_eliminate_]: 8.74e-06 [virtual_output]: 8.44989e-06 [merge_forward]: 5.43008e-06 [cell_reuse_recompute_pass]: 2.27988e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.88197e-05 [before_grad]: 1.50101e-05 [inplace_validation]: 4.72972e-06 [meta_fg_expand]: 5.76023e-06 [inplace_validation_after_expand]: 6.28037e-06 [flash_sp_send_recv_attached]: 9.29926e-07 [receive_attached]: 7.69738e-07 [after_resolve]: 1.15903e-05 [a_after_grad]: 1.451e-05 [special_op_eliminate]: 8.88994e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 1.07009e-06 [auto_monad_grad]: 1.49012e-06 [auto_monad_eliminator]: 2.14097e-05 [cse]: 2.129e-05 [a_3]: 5.918e-05 [py_interpret_to_execute_after_opt_a]: 1.04001e-05 [slice_cell_reuse_recomputed_activation]: 2.33017e-06 [rewriter_after_opt_a]: 0.00014264 [convert_after_rewriter]: 9.62988e-06 [order_py_execute_after_rewriter]: 7.66991e-06 [opt_b]: 0.00028473, [1] [Cycle 1]: 0.00027914, [7] [b_1]: 0.00019451 [b_2]: 1.19898e-05 [updatestate_depend_eliminate]: 6.17001e-06 [updatestate_assign_eliminate]: 4.97978e-06 [updatestate_loads_eliminate]: 5.71972e-06 [renormalize]: 3.70201e-07 [cse]: 2.05701e-05 [optimize_parallel_all_gather_comm]: 9.41986e-06 [overlap_param_gather]: 1.62981e-06 [cconv]: 2.55196e-05 [loop_unroll]: 0.00056211 [opt_after_cconv]: 0.00014895, [1] [Cycle 1]: 0.00014243, [7] [c_1]: 6.26501e-05 [parameter_eliminate]: 2.71015e-06 [updatestate_depend_eliminate]: 8.55001e-06 [updatestate_assign_eliminate]: 5.28013e-06 [updatestate_loads_eliminate]: 5.89015e-06 [cse]: 2.30898e-05 [renormalize]: 4.69852e-07 [remove_dup_value]: 1.54101e-05 [tuple_transform]: 8.34097e-05, [1] [Cycle 1]: 7.85897e-05, [2] [d_1]: 6.88499e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 2.29012e-06 [add_cache_embedding]: 1.43801e-05 [add_recomputation]: 7.24504e-05 [cse_after_recomputation]: 2.91797e-05, [1] [Cycle 1]: 2.38898e-05, [1] [cse]: 1.87e-05 [environ_conv]: 8.56025e-06 [swap_dp_allreduce_reducescatter]: 8.2897e-06 [bias_add_comm_swap]: 2.80002e-06 [label_micro_interleaved_index]: 2.1602e-06 [label_fine_grained_interleaved_index]: 2.27988e-06 [merge_cast_opt]: 1.85985e-06 [slice_recompute_activation]: 1.87987e-06 [micro_interleaved_order_control]: 2.21003e-06 [assign_add_opt]: 7.60984e-06 [ForceFp32Comm]: 8.60076e-07 [remove_cast_before_assign_add]: 1.0198e-06 [full_micro_interleaved_order_control]: 2.3297e-06 [reorder_send_recv_between_fp_bp]: 2.25985e-06 [comm_op_add_attrs]: 1.11992e-06 [add_comm_op_reuse_tag]: 1.27032e-06 [interleave_split_concat_branches]: 8.801e-07 [interleave_parallel_branches]: 9.39704e-07 [overlap_opt_shard_in_pipeline]: 1.44029e-06 [overlap_opt_shard_grad_in_pipeline]: 2.35997e-06 [control_data_broadcast_order]: 1.4999e-06 [grouped_pairwise_exchange_alltoall]: 1.43005e-06 [offloading_packed_experts]: 1.62981e-06 [overlap_recompute_and_grad_model_parallel]: 2.09035e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.39978e-06 [overlap_recompute_allgather_and_fa_grad]: 1.20979e-06 [overlap_grad_ring_attention]: 2.05031e-06 [overlap_grad_flash_sp]: 1.79601e-05 [begin_end_overlap_inline]: 8.79634e-07 [split_matmul_comm_elemetwise]: 2.12993e-06 [split_layernorm_comm]: 1.85007e-06 [handle_group_info]: 1.39e-06 [symbol_engine_optimizer]: 9.934e-05, [1] [Cycle 1]: 9.45497e-05, [6] [build]: 4.44986e-06 [elim_shapecalc]: 1.38502e-05 [elim_not_effective]: 1.91699e-05 [opt_reshape]: 1.06799e-05 [fold_const_symbol]: 1.83596e-05 [renormalize]: 2.90107e-07 [pipeline_parallel_scheduler]: 1.68011e-06 [auto_monad_reorder]: 3.23802e-05 [get_jit_bprop_graph]: 5.10365e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00052067 [distribtued_split]: 4.688e-05 [validate]: 3.88799e-05 [task_emit]: 0.0703651 [execute]: 1.31e-05 Sums bootstrap : 0.000331s : 0.42% type_inference : 0.002734s : 3.50% auto_monad : 0.000142s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000040s : 0.05% optimize.opt_a.expand_dump_flag : 0.000006s : 0.01% optimize.opt_a.switch_simplify : 0.000045s : 0.06% optimize.opt_a.loop_unroll : 0.000025s : 0.03% optimize.opt_a.a_1 : 0.000653s : 0.84% optimize.opt_a.recompute_prepare : 0.000019s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000266s : 0.34% optimize.opt_a.accelerated_algorithm : 0.000020s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000008s : 0.01% optimize.opt_a.shard_inline : 0.000019s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000013s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000019s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000040s : 0.05% optimize.opt_a.before_grad : 0.000034s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000012s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000031s : 0.04% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000483s : 0.62% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000057s : 0.07% optimize.opt_a.cse : 0.000059s : 0.08% optimize.opt_a.a_3 : 0.000129s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000143s : 0.18% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000008s : 0.01% optimize.opt_b.b_1 : 0.000195s : 0.25% optimize.opt_b.b_2 : 0.000012s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000021s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000026s : 0.03% optimize.loop_unroll : 0.000562s : 0.72% optimize.opt_after_cconv.c_1 : 0.000063s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000069s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000072s : 0.09% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000009s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000019s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000018s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000521s : 0.67% distribtued_split : 0.000047s : 0.06% validate : 0.000039s : 0.05% task_emit : 0.070365s : 90.17% execute : 0.000013s : 0.02% Time group info: ------[substitution.] 0.000150 63 5.11% : 0.000008s : 2: substitution.depend_value_elim 2.21% : 0.000003s : 5: substitution.elim_not_effective 2.06% : 0.000003s : 5: substitution.fold_const_symbol 5.93% : 0.000009s : 6: substitution.graph_param_transform 46.89% : 0.000070s : 1: substitution.inline 5.28% : 0.000008s : 10: substitution.j_node_and_user_rematch 3.40% : 0.000005s : 6: substitution.load_eliminater 2.63% : 0.000004s : 2: substitution.reduce_all_const_elim 7.08% : 0.000011s : 10: substitution.remove_not_recompute_node 2.71% : 0.000004s : 2: substitution.replace_old_param 8.64% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 8.07% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002702 2 85.06% : 0.002299s : 1: type_inference.infer 14.94% : 0.000404s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000069 1 100.00% : 0.000069s : 1: match.inline ------[predicate.] 0.000271 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.21% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.19% : 0.000006s : 25: predicate.arithmetic_simplify 0.80% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.41% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 1.02% : 0.000003s : 13: predicate.dict_get_item_eliminator 0.79% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.48% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000005s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000004s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.25% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.84% : 0.000002s : 12: predicate.incorporate_call 0.73% : 0.000002s : 12: predicate.incorporate_call_switch 6.10% : 0.000017s : 63: predicate.inline 1.14% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000003s : 12: predicate.less_batch_normalization 1.75% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000007s : 38: predicate.load_eliminater 1.37% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.55% : 0.000001s : 6: predicate.parallel_virtual_node 1.11% : 0.000003s : 14: predicate.partial_defer_inline 1.38% : 0.000004s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.80% : 0.000002s : 12: predicate.reduce_all_const_elim 1.00% : 0.000003s : 13: predicate.reduce_eliminate 0.62% : 0.000002s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.76% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 0.94% : 0.000003s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.97% : 0.000003s : 12: predicate.shard_identity_eliminate 1.32% : 0.000004s : 18: predicate.special_op_eliminate 1.10% : 0.000003s : 12: predicate.specialize_transform 1.05% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000006s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.18% : 0.000011s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.75% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.76% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.73% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.65% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000005s : 25: predicate.tuple_to_list_eliminator_ 2.32% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.37% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.51% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000174 4 10.16% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.84% : 0.000157s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092500 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000077s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.17% : 0.000156s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000357s : 1: bootstrap 0.03% : 0.000030s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.01% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000055s : 1: distribtued_split 0.58% : 0.000535s : 1: eliminate_special_op_node 0.01% : 0.000013s : 1: environ_conv 0.02% : 0.000023s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.62% : 0.000572s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.43% : 0.001323s : 80: opt.transform.opt_a 0.07% : 0.000061s : 1: opt.transform.opt_after_cconv 0.20% : 0.000183s : 27: opt.transform.opt_b 0.07% : 0.000067s : 1: opt.transform.opt_trans_graph 0.04% : 0.000037s : 3: opt.transform.special_op_eliminate 0.06% : 0.000057s : 4: opt.transform.symbol_engine_opt 6.50% : 0.006011s : 1: opt_a 0.17% : 0.000153s : 1: opt_after_cconv 0.31% : 0.000288s : 1: opt_b 8.56% : 0.007918s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.01% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000006s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000020s : 1: remove_dup_value 0.28% : 0.000262s : 1: renormalize.infer 0.23% : 0.000214s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000149s : 1: rewriter_after_opt_a 0.05% : 0.000044s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000102s : 1: symbol_engine_optimizer 76.11% : 0.070400s : 1: task_emit 0.09% : 0.000087s : 1: tuple_transform 2.98% : 0.002754s : 1: type_inference 0.08% : 0.000078s : 1: validate TotalTime = 0.0887125, [21] [bootstrap]: 0.00031434 [type_inference]: 0.0024965 [auto_monad]: 0.0001298 [graph_reusing]: 2.40002e-06 [inline]: 1.44029e-06 [parallel-infer-symbol]: 2.42004e-06 [pre_auto_parallel]: 2.485e-05 [insert-virtual-dataset]: 3.18978e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 1.45985e-06 [pipeline_split]: 1.68988e-06 [optimize]: 0.00694908, [52] [py_interpret_to_execute]: 1.52504e-05 [rewriter_before_opt_a]: 3.54201e-05 [opt_a]: 0.0052605, [2] [Cycle 1]: 0.00155957, [43] [expand_dump_flag]: 3.83006e-06 [switch_simplify]: 3.00799e-05 [loop_unroll]: 1.34399e-05 [a_1]: 0.00034479 [recompute_prepare]: 9.0003e-06 [updatestate_depend_eliminate]: 8.31997e-06 [updatestate_assign_eliminate]: 5.80028e-06 [updatestate_loads_eliminate]: 6.77956e-06 [parameter_eliminate]: 3.24007e-06 [a_2]: 0.00011519 [accelerated_algorithm]: 8.2003e-06 [shard]: 2.04006e-06 [meta_shard_fg_expand]: 3.57023e-06 [shard_inline]: 8.50996e-06 [auto_parallel]: 1.20196e-05 [parallel]: 6.50017e-06 [flash_sp]: 8.88994e-06 [merge_comm]: 6.8401e-06 [allreduce_fusion]: 5.3402e-06 [matmul_add_comm_reduction]: 9.94978e-06 [allreduce_slice_to_reducescatter]: 4.4005e-07 [virtual_shard_identity]: 9.23965e-06 [virtual_dataset]: 7.72998e-06 [get_grad_eliminate_]: 7.30995e-06 [virtual_output]: 7.65035e-06 [merge_forward]: 5.57024e-06 [cell_reuse_recompute_pass]: 1.70013e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.69198e-05 [before_grad]: 1.32499e-05 [inplace_validation]: 4.89969e-06 [meta_fg_expand]: 5.59026e-06 [inplace_validation_after_expand]: 6.20959e-06 [flash_sp_send_recv_attached]: 4.43961e-06 [receive_attached]: 2.45962e-06 [after_resolve]: 1.09901e-05 [a_after_grad]: 1.27102e-05 [special_op_eliminate]: 7.50041e-06 [renormalize]: 0.00047424 [add_forward_monad_depend]: 3.49013e-06 [auto_monad_grad]: 1.74996e-06 [auto_monad_eliminator]: 3.28398e-05 [cse]: 3.199e-05 [a_3]: 5.83199e-05 [Cycle 2]: 0.00076753, [43] [expand_dump_flag]: 1.20979e-06 [switch_simplify]: 9.04035e-06 [loop_unroll]: 7.65035e-06 [a_1]: 0.0001985 [recompute_prepare]: 7.25035e-06 [updatestate_depend_eliminate]: 6.01029e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.26011e-06 [parameter_eliminate]: 1.45007e-06 [a_2]: 0.00010409 [accelerated_algorithm]: 8.23988e-06 [shard]: 1.34995e-06 [meta_shard_fg_expand]: 2.36975e-06 [shard_inline]: 7.7202e-06 [auto_parallel]: 1.12099e-05 [parallel]: 3.48967e-06 [flash_sp]: 3.87011e-06 [merge_comm]: 6.21006e-06 [allreduce_fusion]: 4.88991e-06 [matmul_add_comm_reduction]: 7.93021e-06 [allreduce_slice_to_reducescatter]: 2.59839e-07 [virtual_shard_identity]: 8.2301e-06 [virtual_dataset]: 7.29971e-06 [get_grad_eliminate_]: 7.06967e-06 [virtual_output]: 7.03987e-06 [merge_forward]: 4.56022e-06 [cell_reuse_recompute_pass]: 2.25008e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51498e-05 [before_grad]: 1.21002e-05 [inplace_validation]: 3.99025e-06 [meta_fg_expand]: 4.6799e-06 [inplace_validation_after_expand]: 5.24987e-06 [flash_sp_send_recv_attached]: 9.10368e-07 [receive_attached]: 7.79983e-07 [after_resolve]: 9.28016e-06 [a_after_grad]: 1.14399e-05 [special_op_eliminate]: 7.00029e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.801e-07 [auto_monad_grad]: 1.14972e-06 [auto_monad_eliminator]: 1.826e-05 [cse]: 1.94502e-05 [a_3]: 4.705e-05 [py_interpret_to_execute_after_opt_a]: 9.81009e-06 [slice_cell_reuse_recomputed_activation]: 2.29012e-06 [rewriter_after_opt_a]: 0.00014429 [convert_after_rewriter]: 9.22009e-06 [order_py_execute_after_rewriter]: 5.89015e-06 [opt_b]: 0.00024097, [1] [Cycle 1]: 0.00023489, [7] [b_1]: 0.00016032 [b_2]: 9.43989e-06 [updatestate_depend_eliminate]: 5.4501e-06 [updatestate_assign_eliminate]: 4.57e-06 [updatestate_loads_eliminate]: 5.41005e-06 [renormalize]: 2.90107e-07 [cse]: 1.84402e-05 [optimize_parallel_all_gather_comm]: 7.75e-06 [overlap_param_gather]: 1.8999e-06 [cconv]: 2.17804e-05 [loop_unroll]: 0.00049299 [opt_after_cconv]: 0.00013175, [1] [Cycle 1]: 0.00012555, [7] [c_1]: 5.265e-05 [parameter_eliminate]: 2.48989e-06 [updatestate_depend_eliminate]: 7.92043e-06 [updatestate_assign_eliminate]: 4.52017e-06 [updatestate_loads_eliminate]: 5.32018e-06 [cse]: 2.09599e-05 [renormalize]: 3.20375e-07 [remove_dup_value]: 1.23703e-05 [tuple_transform]: 7.316e-05, [1] [Cycle 1]: 6.86701e-05, [2] [d_1]: 5.918e-05 [renormalize]: 2.39816e-07 [partial_unused_args_eliminate]: 1.89012e-06 [add_cache_embedding]: 1.34301e-05 [add_recomputation]: 6.377e-05 [cse_after_recomputation]: 2.67001e-05, [1] [Cycle 1]: 2.19401e-05, [1] [cse]: 1.70101e-05 [environ_conv]: 7.03987e-06 [swap_dp_allreduce_reducescatter]: 7.02031e-06 [bias_add_comm_swap]: 2.22027e-06 [label_micro_interleaved_index]: 2.23005e-06 [label_fine_grained_interleaved_index]: 2.33995e-06 [merge_cast_opt]: 1.30013e-06 [slice_recompute_activation]: 1.68988e-06 [micro_interleaved_order_control]: 2.14996e-06 [assign_add_opt]: 8.2301e-06 [ForceFp32Comm]: 8.60076e-07 [remove_cast_before_assign_add]: 6.9011e-07 [full_micro_interleaved_order_control]: 2.12993e-06 [reorder_send_recv_between_fp_bp]: 2.31015e-06 [comm_op_add_attrs]: 1.06962e-06 [add_comm_op_reuse_tag]: 9.99775e-07 [interleave_split_concat_branches]: 6.70087e-07 [interleave_parallel_branches]: 6.79865e-07 [overlap_opt_shard_in_pipeline]: 1.42027e-06 [overlap_opt_shard_grad_in_pipeline]: 2.57976e-06 [control_data_broadcast_order]: 1.07009e-06 [grouped_pairwise_exchange_alltoall]: 1.36998e-06 [offloading_packed_experts]: 1.07987e-06 [overlap_recompute_and_grad_model_parallel]: 2.18023e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.39704e-07 [overlap_recompute_allgather_and_fa_grad]: 8.40053e-07 [overlap_grad_ring_attention]: 1.4198e-06 [overlap_grad_flash_sp]: 1.47098e-05 [begin_end_overlap_inline]: 3.7998e-07 [split_matmul_comm_elemetwise]: 1.79978e-06 [split_layernorm_comm]: 1.79e-06 [handle_group_info]: 9.4017e-07 [symbol_engine_optimizer]: 8.27298e-05, [1] [Cycle 1]: 7.83899e-05, [6] [build]: 3.85009e-06 [elim_shapecalc]: 1.12304e-05 [elim_not_effective]: 1.58204e-05 [opt_reshape]: 8.78982e-06 [fold_const_symbol]: 1.369e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.77976e-06 [auto_monad_reorder]: 2.86996e-05 [get_jit_bprop_graph]: 6.00237e-07 [rewriter_after_jit_bprop_graph]: 4.30271e-07 [eliminate_special_op_node]: 0.00053693 [distribtued_split]: 4.13698e-05 [validate]: 3.597e-05 [task_emit]: 0.0778716 [execute]: 1.40001e-05 Sums bootstrap : 0.000314s : 0.37% type_inference : 0.002497s : 2.94% auto_monad : 0.000130s : 0.15% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.02% optimize.opt_a.a_1 : 0.000543s : 0.64% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.01% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000219s : 0.26% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000014s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.02% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000474s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.06% optimize.opt_a.cse : 0.000051s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.12% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000144s : 0.17% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.19% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000493s : 0.58% optimize.opt_after_cconv.c_1 : 0.000053s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.01% optimize.tuple_transform.d_1 : 0.000059s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.03% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000537s : 0.63% distribtued_split : 0.000041s : 0.05% validate : 0.000036s : 0.04% task_emit : 0.077872s : 91.84% execute : 0.000014s : 0.02% Time group info: ------[substitution.] 0.000127 63 5.01% : 0.000006s : 2: substitution.depend_value_elim 1.75% : 0.000002s : 5: substitution.elim_not_effective 1.98% : 0.000003s : 5: substitution.fold_const_symbol 5.75% : 0.000007s : 6: substitution.graph_param_transform 50.27% : 0.000064s : 1: substitution.inline 4.07% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.10% : 0.000004s : 6: substitution.load_eliminater 2.53% : 0.000003s : 2: substitution.reduce_all_const_elim 6.16% : 0.000008s : 10: substitution.remove_not_recompute_node 2.35% : 0.000003s : 2: substitution.replace_old_param 8.95% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.09% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002467 2 88.75% : 0.002189s : 1: type_inference.infer 11.25% : 0.000277s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000226 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.13% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.26% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.38% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.52% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 1.97% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.39% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.16% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.39% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.84% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.03% : 0.000002s : 12: predicate.less_batch_normalization 1.64% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000005s : 38: predicate.load_eliminater 1.33% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.12% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.72% : 0.000002s : 13: predicate.minmaximum_grad 0.87% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.11% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.01% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.23% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.79% : 0.000002s : 13: predicate.reshape_eliminate 0.76% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.80% : 0.000002s : 12: predicate.shard_identity_eliminate 1.28% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.42% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.65% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.43% : 0.000010s : 43: predicate.switch_simplify 0.88% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.83% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.84% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.30% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.60% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.60% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000196 4 7.77% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.23% : 0.000181s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.097505 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000142s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000340s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.57% : 0.000551s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000024s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000006s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.52% : 0.000503s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.12% : 0.001089s : 80: opt.transform.opt_a 0.05% : 0.000051s : 1: opt.transform.opt_after_cconv 0.15% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000058s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 5.40% : 0.005264s : 1: opt_a 0.14% : 0.000136s : 1: opt_after_cconv 0.25% : 0.000244s : 1: opt_b 7.14% : 0.006957s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.24% : 0.000236s : 1: renormalize.infer 0.24% : 0.000233s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000150s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000086s : 1: symbol_engine_optimizer 79.90% : 0.077908s : 1: task_emit 0.08% : 0.000076s : 1: tuple_transform 2.58% : 0.002515s : 1: type_inference 0.07% : 0.000070s : 1: validate TotalTime = 0.0773729, [21] [bootstrap]: 0.00028387 [type_inference]: 0.00218658 [auto_monad]: 9.77297e-05 [graph_reusing]: 1.68988e-06 [inline]: 1.11992e-06 [parallel-infer-symbol]: 1.32993e-06 [pre_auto_parallel]: 2.12998e-05 [insert-virtual-dataset]: 1.85007e-06 [parallel-infer-symbol-second]: 3.49712e-07 [dataset_repeat_opt]: 1.01002e-06 [pipeline_split]: 1.09989e-06 [optimize]: 0.00664605, [52] [py_interpret_to_execute]: 1.30404e-05 [rewriter_before_opt_a]: 3.02601e-05 [opt_a]: 0.00509453, [2] [Cycle 1]: 0.00138551, [43] [expand_dump_flag]: 2.12016e-06 [switch_simplify]: 2.57702e-05 [loop_unroll]: 1.27899e-05 [a_1]: 0.0003263 [recompute_prepare]: 8.99984e-06 [updatestate_depend_eliminate]: 7.23032e-06 [updatestate_assign_eliminate]: 5.34998e-06 [updatestate_loads_eliminate]: 5.60004e-06 [parameter_eliminate]: 1.97999e-06 [a_2]: 0.00011216 [accelerated_algorithm]: 8.80007e-06 [shard]: 1.60001e-06 [meta_shard_fg_expand]: 2.78978e-06 [shard_inline]: 8.27992e-06 [auto_parallel]: 1.13002e-05 [parallel]: 5.11017e-06 [flash_sp]: 6.11972e-06 [merge_comm]: 6.59004e-06 [allreduce_fusion]: 5.30994e-06 [matmul_add_comm_reduction]: 8.08015e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 9.2499e-06 [virtual_dataset]: 7.91019e-06 [get_grad_eliminate_]: 7.56001e-06 [virtual_output]: 7.51019e-06 [merge_forward]: 4.76977e-06 [cell_reuse_recompute_pass]: 1.22981e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.56201e-05 [before_grad]: 1.27899e-05 [inplace_validation]: 4.62029e-06 [meta_fg_expand]: 4.84008e-06 [inplace_validation_after_expand]: 4.74975e-06 [flash_sp_send_recv_attached]: 2.56998e-06 [receive_attached]: 1.74996e-06 [after_resolve]: 1.04001e-05 [a_after_grad]: 1.24602e-05 [special_op_eliminate]: 7.79005e-06 [renormalize]: 0.00037845 [add_forward_monad_depend]: 2.71993e-06 [auto_monad_grad]: 1.51992e-06 [auto_monad_eliminator]: 2.31499e-05 [cse]: 2.60398e-05 [a_3]: 5.62998e-05 [Cycle 2]: 0.00081543, [43] [expand_dump_flag]: 9.49949e-07 [switch_simplify]: 8.82009e-06 [loop_unroll]: 7.73976e-06 [a_1]: 0.00019902 [recompute_prepare]: 7.29971e-06 [updatestate_depend_eliminate]: 5.49015e-06 [updatestate_assign_eliminate]: 4.76977e-06 [updatestate_loads_eliminate]: 4.97e-06 [parameter_eliminate]: 1.01002e-06 [a_2]: 0.00010309 [accelerated_algorithm]: 8.18027e-06 [shard]: 1.09011e-06 [meta_shard_fg_expand]: 2.35019e-06 [shard_inline]: 7.66013e-06 [auto_parallel]: 9.90974e-06 [parallel]: 3.13995e-06 [flash_sp]: 2.33995e-06 [merge_comm]: 5.98002e-06 [allreduce_fusion]: 4.6799e-06 [matmul_add_comm_reduction]: 6.98026e-06 [allreduce_slice_to_reducescatter]: 2.60305e-07 [virtual_shard_identity]: 8.66968e-06 [virtual_dataset]: 7.47992e-06 [get_grad_eliminate_]: 7.29971e-06 [virtual_output]: 6.90017e-06 [merge_forward]: 4.57e-06 [cell_reuse_recompute_pass]: 1.69966e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49501e-05 [before_grad]: 1.26697e-05 [inplace_validation]: 4.18024e-06 [meta_fg_expand]: 4.60027e-06 [inplace_validation_after_expand]: 4.71016e-06 [flash_sp_send_recv_attached]: 8.10251e-07 [receive_attached]: 6.20261e-07 [after_resolve]: 9.22009e-06 [a_after_grad]: 1.14399e-05 [special_op_eliminate]: 7.24988e-06 [renormalize]: 7.96281e-08 [add_forward_monad_depend]: 7.39936e-07 [auto_monad_grad]: 5.49001e-05 [auto_monad_eliminator]: 1.607e-05 [cse]: 1.84001e-05 [a_3]: 4.827e-05 [py_interpret_to_execute_after_opt_a]: 8.99984e-06 [slice_cell_reuse_recomputed_activation]: 1.60001e-06 [rewriter_after_opt_a]: 0.00012449 [convert_after_rewriter]: 7.68015e-06 [order_py_execute_after_rewriter]: 4.84008e-06 [opt_b]: 0.00023842, [1] [Cycle 1]: 0.00023345, [7] [b_1]: 0.00016006 [b_2]: 9.61963e-06 [updatestate_depend_eliminate]: 5.12972e-06 [updatestate_assign_eliminate]: 4.25009e-06 [updatestate_loads_eliminate]: 5.12041e-06 [renormalize]: 2.5006e-07 [cse]: 1.82199e-05 [optimize_parallel_all_gather_comm]: 7.49016e-06 [overlap_param_gather]: 7.70204e-07 [cconv]: 1.22199e-05 [loop_unroll]: 0.00046676 [opt_after_cconv]: 0.00013108, [1] [Cycle 1]: 0.00012564, [7] [c_1]: 5.09201e-05 [parameter_eliminate]: 1.70013e-06 [updatestate_depend_eliminate]: 1.13999e-05 [updatestate_assign_eliminate]: 4.71016e-06 [updatestate_loads_eliminate]: 4.94998e-06 [cse]: 1.96798e-05 [renormalize]: 4.10248e-07 [remove_dup_value]: 8.72975e-06 [tuple_transform]: 6.69598e-05, [1] [Cycle 1]: 6.278e-05, [2] [d_1]: 5.40703e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.30991e-06 [add_cache_embedding]: 9.84035e-06 [add_recomputation]: 4.83999e-05 [cse_after_recomputation]: 2.49399e-05, [1] [Cycle 1]: 2.085e-05, [1] [cse]: 1.58697e-05 [environ_conv]: 5.48968e-06 [swap_dp_allreduce_reducescatter]: 6.91041e-06 [bias_add_comm_swap]: 1.55019e-06 [label_micro_interleaved_index]: 1.07009e-06 [label_fine_grained_interleaved_index]: 9.89996e-07 [merge_cast_opt]: 5.49946e-07 [slice_recompute_activation]: 8.50298e-07 [micro_interleaved_order_control]: 1.11992e-06 [assign_add_opt]: 5.81983e-06 [ForceFp32Comm]: 4.29805e-07 [remove_cast_before_assign_add]: 4.20026e-07 [full_micro_interleaved_order_control]: 8.40053e-07 [reorder_send_recv_between_fp_bp]: 9.00123e-07 [comm_op_add_attrs]: 4.4005e-07 [add_comm_op_reuse_tag]: 4.00003e-07 [interleave_split_concat_branches]: 4.29805e-07 [interleave_parallel_branches]: 4.69852e-07 [overlap_opt_shard_in_pipeline]: 8.60076e-07 [overlap_opt_shard_grad_in_pipeline]: 1.79e-06 [control_data_broadcast_order]: 5.80214e-07 [grouped_pairwise_exchange_alltoall]: 7.00355e-07 [offloading_packed_experts]: 6.10016e-07 [overlap_recompute_and_grad_model_parallel]: 1.10967e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.10248e-07 [overlap_recompute_allgather_and_fa_grad]: 5.89993e-07 [overlap_grad_ring_attention]: 1.13994e-06 [overlap_grad_flash_sp]: 1.171e-05 [begin_end_overlap_inline]: 3.59956e-07 [split_matmul_comm_elemetwise]: 1.13016e-06 [split_layernorm_comm]: 8.49832e-07 [handle_group_info]: 5.10365e-07 [symbol_engine_optimizer]: 8.12998e-05, [1] [Cycle 1]: 7.72397e-05, [6] [build]: 3.22983e-06 [elim_shapecalc]: 1.13398e-05 [elim_not_effective]: 1.51298e-05 [opt_reshape]: 8.7698e-06 [fold_const_symbol]: 1.28904e-05 [renormalize]: 2.19792e-07 [pipeline_parallel_scheduler]: 8.69855e-07 [auto_monad_reorder]: 2.23299e-05 [get_jit_bprop_graph]: 3.19909e-07 [rewriter_after_jit_bprop_graph]: 2.5006e-07 [eliminate_special_op_node]: 0.00047927 [distribtued_split]: 3.06102e-05 [validate]: 2.80901e-05 [task_emit]: 0.0673397 [execute]: 8.10996e-06 Sums bootstrap : 0.000284s : 0.39% type_inference : 0.002187s : 2.97% auto_monad : 0.000098s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000525s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000215s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000008s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000015s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000009s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000009s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000379s : 0.51% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000056s : 0.08% optimize.opt_a.auto_monad_eliminator : 0.000039s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000124s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000012s : 0.02% optimize.loop_unroll : 0.000467s : 0.63% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000011s : 0.02% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000010s : 0.01% optimize.add_recomputation : 0.000048s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000005s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000006s : 0.01% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000000s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000000s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000479s : 0.65% distribtued_split : 0.000031s : 0.04% validate : 0.000028s : 0.04% task_emit : 0.067340s : 91.57% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000106 63 3.94% : 0.000004s : 2: substitution.depend_value_elim 1.94% : 0.000002s : 5: substitution.elim_not_effective 1.67% : 0.000002s : 5: substitution.fold_const_symbol 5.61% : 0.000006s : 6: substitution.graph_param_transform 50.75% : 0.000054s : 1: substitution.inline 4.47% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.46% : 0.000004s : 6: substitution.load_eliminater 2.09% : 0.000002s : 2: substitution.reduce_all_const_elim 6.59% : 0.000007s : 10: substitution.remove_not_recompute_node 2.13% : 0.000002s : 2: substitution.replace_old_param 9.18% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.18% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002162 2 90.10% : 0.001948s : 1: type_inference.infer 9.90% : 0.000214s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000226 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.10% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.22% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.89% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.27% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.24% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.50% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_depend_swap 1.98% : 0.000004s : 31: predicate.environ_get_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000000s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.68% : 0.000013s : 63: predicate.inline 1.09% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000002s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.36% : 0.000005s : 38: predicate.load_eliminater 1.18% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.85% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.09% : 0.000002s : 14: predicate.partial_defer_inline 1.31% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.92% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000002s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.01% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.47% : 0.000003s : 18: predicate.special_op_eliminate 1.00% : 0.000002s : 12: predicate.specialize_transform 1.03% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.05% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.39% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.95% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.15% : 0.000009s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.82% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.92% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.60% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.74% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.51% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000117 4 6.64% : 0.000008s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.36% : 0.000109s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.085737 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000013s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000053s : 1: add_recomputation 0.01% : 0.000009s : 1: assign_add_opt 0.13% : 0.000109s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000306s : 1: bootstrap 0.02% : 0.000016s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000011s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.04% : 0.000038s : 1: distribtued_split 0.57% : 0.000492s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000476s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001064s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.18% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000044s : 4: opt.transform.symbol_engine_opt 5.95% : 0.005098s : 1: opt_a 0.16% : 0.000135s : 1: opt_after_cconv 0.28% : 0.000241s : 1: opt_b 7.76% : 0.006654s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000008s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.01% : 0.000012s : 1: remove_dup_value 0.24% : 0.000204s : 1: renormalize.infer 0.20% : 0.000170s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000130s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000084s : 1: symbol_engine_optimizer 78.57% : 0.067363s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.57% : 0.002203s : 1: type_inference 0.07% : 0.000057s : 1: validate TotalTime = 0.0794403, [21] [bootstrap]: 0.00028287 [type_inference]: 0.00215847 [auto_monad]: 0.00010038 [graph_reusing]: 2.09967e-06 [inline]: 1.13994e-06 [parallel-infer-symbol]: 1.2801e-06 [pre_auto_parallel]: 1.99e-05 [insert-virtual-dataset]: 1.99024e-06 [parallel-infer-symbol-second]: 3.59956e-07 [dataset_repeat_opt]: 6.59842e-07 [pipeline_split]: 1.05985e-06 [optimize]: 0.00662107, [52] [py_interpret_to_execute]: 1.19801e-05 [rewriter_before_opt_a]: 2.97702e-05 [opt_a]: 0.00506704, [2] [Cycle 1]: 0.00137933, [43] [expand_dump_flag]: 2.04006e-06 [switch_simplify]: 2.37101e-05 [loop_unroll]: 1.31698e-05 [a_1]: 0.0003134 [recompute_prepare]: 8.10996e-06 [updatestate_depend_eliminate]: 7.07014e-06 [updatestate_assign_eliminate]: 5.43986e-06 [updatestate_loads_eliminate]: 6.59004e-06 [parameter_eliminate]: 2.14018e-06 [a_2]: 0.00011276 [accelerated_algorithm]: 8.00006e-06 [shard]: 1.75042e-06 [meta_shard_fg_expand]: 3.13995e-06 [shard_inline]: 7.87014e-06 [auto_parallel]: 1.13202e-05 [parallel]: 6.25011e-06 [flash_sp]: 8.88016e-06 [merge_comm]: 6.82985e-06 [allreduce_fusion]: 4.93973e-06 [matmul_add_comm_reduction]: 9.30997e-06 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 8.73022e-06 [virtual_dataset]: 8.29017e-06 [get_grad_eliminate_]: 7.36024e-06 [virtual_output]: 7.45011e-06 [merge_forward]: 5.17024e-06 [cell_reuse_recompute_pass]: 1.42027e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.58199e-05 [before_grad]: 1.329e-05 [inplace_validation]: 4.80004e-06 [meta_fg_expand]: 4.96022e-06 [inplace_validation_after_expand]: 5.49993e-06 [flash_sp_send_recv_attached]: 3.76021e-06 [receive_attached]: 2.21003e-06 [after_resolve]: 1.05603e-05 [a_after_grad]: 1.25603e-05 [special_op_eliminate]: 7.89994e-06 [renormalize]: 0.0003902 [add_forward_monad_depend]: 2.31015e-06 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 2.17799e-05 [cse]: 2.29301e-05 [a_3]: 5.50202e-05 [Cycle 2]: 0.00080975, [43] [expand_dump_flag]: 8.99658e-07 [switch_simplify]: 8.88016e-06 [loop_unroll]: 7.59028e-06 [a_1]: 0.00019756 [recompute_prepare]: 7.25035e-06 [updatestate_depend_eliminate]: 5.4799e-06 [updatestate_assign_eliminate]: 4.8303e-06 [updatestate_loads_eliminate]: 4.78979e-06 [parameter_eliminate]: 9.59728e-07 [a_2]: 0.00010245 [accelerated_algorithm]: 8.21007e-06 [shard]: 1.09989e-06 [meta_shard_fg_expand]: 2.46009e-06 [shard_inline]: 8.04011e-06 [auto_parallel]: 9.6499e-06 [parallel]: 3.1502e-06 [flash_sp]: 2.94019e-06 [merge_comm]: 5.53019e-06 [allreduce_fusion]: 4.44986e-06 [matmul_add_comm_reduction]: 7.3798e-06 [allreduce_slice_to_reducescatter]: 2.79862e-07 [virtual_shard_identity]: 8.27014e-06 [virtual_dataset]: 7.24988e-06 [get_grad_eliminate_]: 7.13998e-06 [virtual_output]: 7.00029e-06 [merge_forward]: 4.28036e-06 [cell_reuse_recompute_pass]: 1.66986e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.45198e-05 [before_grad]: 1.20797e-05 [inplace_validation]: 3.87989e-06 [meta_fg_expand]: 4.48013e-06 [inplace_validation_after_expand]: 4.92018e-06 [flash_sp_send_recv_attached]: 8.30274e-07 [receive_attached]: 7.79983e-07 [after_resolve]: 9.35001e-06 [a_after_grad]: 1.17798e-05 [special_op_eliminate]: 7.28993e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 6.80331e-07 [auto_monad_grad]: 9.99775e-07 [auto_monad_eliminator]: 7.42003e-05 [cse]: 1.80998e-05 [a_3]: 4.71901e-05 [py_interpret_to_execute_after_opt_a]: 8.90018e-06 [slice_cell_reuse_recomputed_activation]: 1.87987e-06 [rewriter_after_opt_a]: 0.0001292 [convert_after_rewriter]: 7.51996e-06 [order_py_execute_after_rewriter]: 5.13997e-06 [opt_b]: 0.00023347, [1] [Cycle 1]: 0.00022851, [7] [b_1]: 0.00015925 [b_2]: 9.43011e-06 [updatestate_depend_eliminate]: 4.99981e-06 [updatestate_assign_eliminate]: 4.0899e-06 [updatestate_loads_eliminate]: 4.55976e-06 [renormalize]: 2.79862e-07 [cse]: 1.65501e-05 [optimize_parallel_all_gather_comm]: 7.97026e-06 [overlap_param_gather]: 1.22003e-06 [cconv]: 1.59401e-05 [loop_unroll]: 0.00046533 [opt_after_cconv]: 0.00012238, [1] [Cycle 1]: 0.0001169, [7] [c_1]: 4.92199e-05 [parameter_eliminate]: 1.70991e-06 [updatestate_depend_eliminate]: 6.83963e-06 [updatestate_assign_eliminate]: 4.31994e-06 [updatestate_loads_eliminate]: 4.89037e-06 [cse]: 1.965e-05 [renormalize]: 3.39933e-07 [remove_dup_value]: 1.015e-05 [tuple_transform]: 6.60098e-05, [1] [Cycle 1]: 6.19101e-05, [2] [d_1]: 5.34197e-05 [renormalize]: 1.79745e-07 [partial_unused_args_eliminate]: 1.64984e-06 [add_cache_embedding]: 1.16099e-05 [add_recomputation]: 5.23902e-05 [cse_after_recomputation]: 2.43401e-05, [1] [Cycle 1]: 2.01999e-05, [1] [cse]: 1.55e-05 [environ_conv]: 5.62007e-06 [swap_dp_allreduce_reducescatter]: 6.85034e-06 [bias_add_comm_swap]: 2.1304e-06 [label_micro_interleaved_index]: 1.83005e-06 [label_fine_grained_interleaved_index]: 1.34017e-06 [merge_cast_opt]: 7.89762e-07 [slice_recompute_activation]: 1.51992e-06 [micro_interleaved_order_control]: 1.56974e-06 [assign_add_opt]: 6.97002e-06 [ForceFp32Comm]: 9.20147e-07 [remove_cast_before_assign_add]: 8.60076e-07 [full_micro_interleaved_order_control]: 1.49012e-06 [reorder_send_recv_between_fp_bp]: 1.22981e-06 [comm_op_add_attrs]: 6.20261e-07 [add_comm_op_reuse_tag]: 6.70087e-07 [interleave_split_concat_branches]: 6.3004e-07 [interleave_parallel_branches]: 6.39819e-07 [overlap_opt_shard_in_pipeline]: 9.00123e-07 [overlap_opt_shard_grad_in_pipeline]: 2.04006e-06 [control_data_broadcast_order]: 6.99889e-07 [grouped_pairwise_exchange_alltoall]: 6.9011e-07 [offloading_packed_experts]: 5.39701e-07 [overlap_recompute_and_grad_model_parallel]: 1.29035e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.0012e-07 [overlap_recompute_allgather_and_fa_grad]: 5.09899e-07 [overlap_grad_ring_attention]: 1.17999e-06 [overlap_grad_flash_sp]: 1.13202e-05 [begin_end_overlap_inline]: 4.89876e-07 [split_matmul_comm_elemetwise]: 1.28988e-06 [split_layernorm_comm]: 1.44029e-06 [handle_group_info]: 7.89762e-07 [symbol_engine_optimizer]: 8.00202e-05, [1] [Cycle 1]: 7.59698e-05, [6] [build]: 3.45986e-06 [elim_shapecalc]: 1.07698e-05 [elim_not_effective]: 1.53999e-05 [opt_reshape]: 8.48994e-06 [fold_const_symbol]: 1.27498e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 9.39704e-07 [auto_monad_reorder]: 2.31299e-05 [get_jit_bprop_graph]: 3.39933e-07 [rewriter_after_jit_bprop_graph]: 3.19909e-07 [eliminate_special_op_node]: 0.0004765 [distribtued_split]: 3.40799e-05 [validate]: 2.921e-05 [task_emit]: 0.0694546 [execute]: 8.61008e-06 Sums bootstrap : 0.000283s : 0.37% type_inference : 0.002158s : 2.85% auto_monad : 0.000100s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000033s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000511s : 0.68% optimize.opt_a.recompute_prepare : 0.000015s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.02% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000215s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000009s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000030s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000009s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000390s : 0.52% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000096s : 0.13% optimize.opt_a.cse : 0.000041s : 0.05% optimize.opt_a.a_3 : 0.000102s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000129s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000159s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000465s : 0.62% optimize.opt_after_cconv.c_1 : 0.000049s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.01% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000003s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000477s : 0.63% distribtued_split : 0.000034s : 0.05% validate : 0.000029s : 0.04% task_emit : 0.069455s : 91.82% execute : 0.000009s : 0.01% TotalTime = 0.0796563, [21] [bootstrap]: 0.00031116 [type_inference]: 0.0023813 [auto_monad]: 0.00011807 [graph_reusing]: 2.39024e-06 [inline]: 1.62004e-06 [parallel-infer-symbol]: 1.70013e-06 [pre_auto_parallel]: 2.44402e-05 [insert-virtual-dataset]: 2.88989e-06 [parallel-infer-symbol-second]: 4.20026e-07 [dataset_repeat_opt]: 1.40024e-06 [pipeline_split]: 1.39e-06 [optimize]: 0.00702647, [52] [py_interpret_to_execute]: 1.44499e-05 [rewriter_before_opt_a]: 3.50201e-05 [opt_a]: 0.00533609, [2] [Cycle 1]: 0.00152705, [43] [expand_dump_flag]: 2.86009e-06 [switch_simplify]: 2.91001e-05 [loop_unroll]: 1.41799e-05 [a_1]: 0.00033545 [recompute_prepare]: 8.49972e-06 [updatestate_depend_eliminate]: 8.89972e-06 [updatestate_assign_eliminate]: 6.21006e-06 [updatestate_loads_eliminate]: 6.99004e-06 [parameter_eliminate]: 3.39979e-06 [a_2]: 0.00011359 [accelerated_algorithm]: 7.92975e-06 [shard]: 2.16998e-06 [meta_shard_fg_expand]: 3.83984e-06 [shard_inline]: 8.15e-06 [auto_parallel]: 1.22301e-05 [parallel]: 7.43009e-06 [flash_sp]: 1.03698e-05 [merge_comm]: 7.53021e-06 [allreduce_fusion]: 5.24009e-06 [matmul_add_comm_reduction]: 9.87994e-06 [allreduce_slice_to_reducescatter]: 4.4005e-07 [virtual_shard_identity]: 8.73022e-06 [virtual_dataset]: 7.83009e-06 [get_grad_eliminate_]: 7.49016e-06 [virtual_output]: 7.43009e-06 [merge_forward]: 5.52041e-06 [cell_reuse_recompute_pass]: 2.02982e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59596e-05 [before_grad]: 1.37403e-05 [inplace_validation]: 5.11995e-06 [meta_fg_expand]: 5.13019e-06 [inplace_validation_after_expand]: 5.87013e-06 [flash_sp_send_recv_attached]: 4.63985e-06 [receive_attached]: 2.35019e-06 [after_resolve]: 1.097e-05 [a_after_grad]: 1.211e-05 [special_op_eliminate]: 7.68015e-06 [renormalize]: 0.00046086 [add_forward_monad_depend]: 3.86965e-06 [auto_monad_grad]: 1.85007e-06 [auto_monad_eliminator]: 3.01898e-05 [cse]: 3.10498e-05 [a_3]: 5.67897e-05 [Cycle 2]: 0.00081999, [43] [expand_dump_flag]: 1.13994e-06 [switch_simplify]: 9.31043e-06 [loop_unroll]: 7.60984e-06 [a_1]: 0.00024911 [recompute_prepare]: 7.47992e-06 [updatestate_depend_eliminate]: 6.09038e-06 [updatestate_assign_eliminate]: 4.71994e-06 [updatestate_loads_eliminate]: 5.18002e-06 [parameter_eliminate]: 1.4999e-06 [a_2]: 0.00010369 [accelerated_algorithm]: 7.95024e-06 [shard]: 1.23028e-06 [meta_shard_fg_expand]: 2.46987e-06 [shard_inline]: 7.46967e-06 [auto_parallel]: 1.10203e-05 [parallel]: 3.51993e-06 [flash_sp]: 3.34019e-06 [merge_comm]: 6.10016e-06 [allreduce_fusion]: 4.52995e-06 [matmul_add_comm_reduction]: 7.91019e-06 [allreduce_slice_to_reducescatter]: 3.1013e-07 [virtual_shard_identity]: 8.74e-06 [virtual_dataset]: 7.50041e-06 [get_grad_eliminate_]: 7.22986e-06 [virtual_output]: 6.91973e-06 [merge_forward]: 4.70039e-06 [cell_reuse_recompute_pass]: 2.01957e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.49501e-05 [before_grad]: 1.19302e-05 [inplace_validation]: 4.40003e-06 [meta_fg_expand]: 4.69992e-06 [inplace_validation_after_expand]: 5.05988e-06 [flash_sp_send_recv_attached]: 9.60194e-07 [receive_attached]: 7.10133e-07 [after_resolve]: 9.43011e-06 [a_after_grad]: 1.17202e-05 [special_op_eliminate]: 7.16979e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 8.00006e-07 [auto_monad_grad]: 1.27964e-06 [auto_monad_eliminator]: 1.84998e-05 [cse]: 1.91499e-05 [a_3]: 4.75398e-05 [py_interpret_to_execute_after_opt_a]: 8.80985e-06 [slice_cell_reuse_recomputed_activation]: 2.09035e-06 [rewriter_after_opt_a]: 0.00013684 [convert_after_rewriter]: 9.31975e-06 [order_py_execute_after_rewriter]: 6.48992e-06 [opt_b]: 0.00023702, [1] [Cycle 1]: 0.00023132, [7] [b_1]: 0.0001585 [b_2]: 9.57027e-06 [updatestate_depend_eliminate]: 5.23031e-06 [updatestate_assign_eliminate]: 4.38979e-06 [updatestate_loads_eliminate]: 5.30994e-06 [renormalize]: 2.89641e-07 [cse]: 1.84001e-05 [optimize_parallel_all_gather_comm]: 8.2599e-06 [overlap_param_gather]: 1.03004e-06 [cconv]: 2.25697e-05 [loop_unroll]: 0.0004878 [opt_after_cconv]: 0.00013043, [1] [Cycle 1]: 0.00012435, [7] [c_1]: 5.08097e-05 [parameter_eliminate]: 2.55974e-06 [updatestate_depend_eliminate]: 8.00006e-06 [updatestate_assign_eliminate]: 4.84008e-06 [updatestate_loads_eliminate]: 5.10039e-06 [cse]: 2.209e-05 [renormalize]: 4.49829e-07 [remove_dup_value]: 1.32397e-05 [tuple_transform]: 6.72401e-05, [1] [Cycle 1]: 6.27101e-05, [2] [d_1]: 5.341e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 1.62981e-06 [add_cache_embedding]: 1.24699e-05 [add_recomputation]: 5.96601e-05 [cse_after_recomputation]: 2.601e-05, [1] [Cycle 1]: 2.10102e-05, [1] [cse]: 1.626e-05 [environ_conv]: 7.39982e-06 [swap_dp_allreduce_reducescatter]: 7.32997e-06 [bias_add_comm_swap]: 2.14996e-06 [label_micro_interleaved_index]: 2.04006e-06 [label_fine_grained_interleaved_index]: 1.91014e-06 [merge_cast_opt]: 1.58045e-06 [slice_recompute_activation]: 1.42027e-06 [micro_interleaved_order_control]: 1.60001e-06 [assign_add_opt]: 7.53999e-06 [ForceFp32Comm]: 8.2003e-07 [remove_cast_before_assign_add]: 8.29808e-07 [full_micro_interleaved_order_control]: 1.95019e-06 [reorder_send_recv_between_fp_bp]: 1.74996e-06 [comm_op_add_attrs]: 7.59959e-07 [add_comm_op_reuse_tag]: 1.26008e-06 [interleave_split_concat_branches]: 6.10016e-07 [interleave_parallel_branches]: 5.89993e-07 [overlap_opt_shard_in_pipeline]: 1.17021e-06 [overlap_opt_shard_grad_in_pipeline]: 1.68988e-06 [control_data_broadcast_order]: 1.39e-06 [grouped_pairwise_exchange_alltoall]: 1.33971e-06 [offloading_packed_experts]: 1.05007e-06 [overlap_recompute_and_grad_model_parallel]: 1.68988e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.00024e-06 [overlap_recompute_allgather_and_fa_grad]: 1.06031e-06 [overlap_grad_ring_attention]: 1.22003e-06 [overlap_grad_flash_sp]: 1.45803e-05 [begin_end_overlap_inline]: 8.10251e-07 [split_matmul_comm_elemetwise]: 2.00002e-06 [split_layernorm_comm]: 1.8198e-06 [handle_group_info]: 7.30157e-07 [symbol_engine_optimizer]: 9.68101e-05, [1] [Cycle 1]: 9.22801e-05, [6] [build]: 3.88036e-06 [elim_shapecalc]: 1.13002e-05 [elim_not_effective]: 2.79797e-05 [opt_reshape]: 8.72975e-06 [fold_const_symbol]: 1.367e-05 [renormalize]: 2.20258e-07 [pipeline_parallel_scheduler]: 1.51992e-06 [auto_monad_reorder]: 2.93902e-05 [get_jit_bprop_graph]: 4.70318e-07 [rewriter_after_jit_bprop_graph]: 4.19561e-07 [eliminate_special_op_node]: 0.00050665 [distribtued_split]: 4.09801e-05 [validate]: 3.53199e-05 [task_emit]: 0.068907 [execute]: 1.08397e-05 Sums bootstrap : 0.000311s : 0.41% type_inference : 0.002381s : 3.15% auto_monad : 0.000118s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000585s : 0.77% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000217s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000461s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000050s : 0.07% optimize.opt_a.a_3 : 0.000104s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000137s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000158s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000488s : 0.64% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000028s : 0.04% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000507s : 0.67% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.068907s : 91.07% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000107 63 4.73% : 0.000005s : 2: substitution.depend_value_elim 2.15% : 0.000002s : 5: substitution.elim_not_effective 2.00% : 0.000002s : 5: substitution.fold_const_symbol 6.04% : 0.000006s : 6: substitution.graph_param_transform 46.25% : 0.000049s : 1: substitution.inline 4.69% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.57% : 0.000004s : 6: substitution.load_eliminater 2.33% : 0.000002s : 2: substitution.reduce_all_const_elim 7.02% : 0.000007s : 10: substitution.remove_not_recompute_node 2.79% : 0.000003s : 2: substitution.replace_old_param 10.03% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.42% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002135 2 89.87% : 0.001919s : 1: type_inference.infer 10.13% : 0.000216s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000048 1 100.00% : 0.000048s : 1: match.inline ------[predicate.] 0.000221 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 0.97% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.83% : 0.000002s : 13: predicate.addn_zero_filter 0.82% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.11% : 0.000005s : 25: predicate.arithmetic_simplify 0.95% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.11% : 0.000002s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.92% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000002s : 19: predicate.environ_get_depend_swap 1.92% : 0.000004s : 31: predicate.environ_get_eliminate 1.10% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.87% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.26% : 0.000003s : 14: predicate.float_depend_g_call 0.78% : 0.000002s : 12: predicate.float_environ_get_switch 1.14% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.31% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 5.51% : 0.000012s : 63: predicate.inline 1.09% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.77% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.52% : 0.000006s : 38: predicate.load_eliminater 1.25% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.80% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.83% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.29% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.97% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.12% : 0.000002s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.86% : 0.000002s : 12: predicate.shard_identity_eliminate 1.34% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.20% : 0.000009s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.79% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.78% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.77% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.43% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000122 4 8.79% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.21% : 0.000112s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087764 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.06% : 0.000056s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.13% : 0.000111s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000307s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000027s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.56% : 0.000488s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000003s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000474s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000003s : 1: offloading_packed_experts 0.02% : 0.000013s : 1: opt.transform.loop_unroll_optimizer 1.19% : 0.001042s : 80: opt.transform.opt_a 0.05% : 0.000048s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000044s : 4: opt.transform.symbol_engine_opt 5.78% : 0.005070s : 1: opt_a 0.14% : 0.000126s : 1: opt_after_cconv 0.27% : 0.000237s : 1: opt_b 7.55% : 0.006629s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000008s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000003s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000026s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000211s : 1: renormalize.infer 0.20% : 0.000174s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000135s : 1: rewriter_after_opt_a 0.04% : 0.000034s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000083s : 1: symbol_engine_optimizer 79.17% : 0.069480s : 1: task_emit 0.08% : 0.000069s : 1: tuple_transform 2.48% : 0.002174s : 1: type_inference 0.07% : 0.000060s : 1: validate Time group info: ------[substitution.] 0.000128 63 4.96% : 0.000006s : 2: substitution.depend_value_elim 2.09% : 0.000003s : 5: substitution.elim_not_effective 1.98% : 0.000003s : 5: substitution.fold_const_symbol 5.28% : 0.000007s : 6: substitution.graph_param_transform 50.29% : 0.000064s : 1: substitution.inline 3.89% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.22% : 0.000004s : 6: substitution.load_eliminater 3.06% : 0.000004s : 2: substitution.reduce_all_const_elim 6.02% : 0.000008s : 10: substitution.remove_not_recompute_node 2.65% : 0.000003s : 2: substitution.replace_old_param 8.68% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.89% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002352 2 89.01% : 0.002094s : 1: type_inference.infer 10.99% : 0.000258s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000227 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 0.95% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.83% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.21% : 0.000005s : 25: predicate.arithmetic_simplify 0.93% : 0.000002s : 13: predicate.cast_eliminate 0.76% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.48% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.90% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 1.88% : 0.000004s : 31: predicate.environ_get_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.86% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.51% : 0.000012s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000005s : 38: predicate.load_eliminater 1.18% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.39% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicat TotalTime = 0.079942, [21] [bootstrap]: 0.00030151 [type_inference]: 0.00238925 [auto_monad]: 0.00013044 [graph_reusing]: 2.2999e-06 [inline]: 1.04029e-06 [parallel-infer-symbol]: 2.08989e-06 [pre_auto_parallel]: 2.62302e-05 [insert-virtual-dataset]: 2.69013e-06 [parallel-infer-symbol-second]: 3.89758e-07 [dataset_repeat_opt]: 1.20001e-06 [pipeline_split]: 1.41002e-06 [optimize]: 0.00692122, [52] [py_interpret_to_execute]: 1.472e-05 [rewriter_before_opt_a]: 3.50396e-05 [opt_a]: 0.00528106, [2] [Cycle 1]: 0.00149272, [43] [expand_dump_flag]: 2.21003e-06 [switch_simplify]: 2.91201e-05 [loop_unroll]: 1.28802e-05 [a_1]: 0.00033906 [recompute_prepare]: 8.80007e-06 [updatestate_depend_eliminate]: 9.07993e-06 [updatestate_assign_eliminate]: 5.79003e-06 [updatestate_loads_eliminate]: 7.16001e-06 [parameter_eliminate]: 3.11993e-06 [a_2]: 0.00011642 [accelerated_algorithm]: 8.21007e-06 [shard]: 1.34995e-06 [meta_shard_fg_expand]: 3.75975e-06 [shard_inline]: 8.19983e-06 [auto_parallel]: 1.18702e-05 [parallel]: 4.97978e-06 [flash_sp]: 1.11302e-05 [merge_comm]: 7.47014e-06 [allreduce_fusion]: 5.60004e-06 [matmul_add_comm_reduction]: 1.013e-05 [allreduce_slice_to_reducescatter]: 5.49946e-07 [virtual_shard_identity]: 8.97003e-06 [virtual_dataset]: 7.96979e-06 [get_grad_eliminate_]: 7.75e-06 [virtual_output]: 7.34022e-06 [merge_forward]: 5.93998e-06 [cell_reuse_recompute_pass]: 1.8198e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59498e-05 [before_grad]: 1.32499e-05 [inplace_validation]: 5.03985e-06 [meta_fg_expand]: 5.35976e-06 [inplace_validation_after_expand]: 6.14999e-06 [flash_sp_send_recv_attached]: 4.63007e-06 [receive_attached]: 2.63983e-06 [after_resolve]: 1.08699e-05 [a_after_grad]: 1.253e-05 [special_op_eliminate]: 7.35978e-06 [renormalize]: 0.00042353 [add_forward_monad_depend]: 3.60003e-06 [auto_monad_grad]: 2.00002e-06 [auto_monad_eliminator]: 3.064e-05 [cse]: 3.32999e-05 [a_3]: 5.91297e-05 [Cycle 2]: 0.00079527, [43] [expand_dump_flag]: 1.09011e-06 [switch_simplify]: 8.92021e-06 [loop_unroll]: 7.58003e-06 [a_1]: 0.00022812 [recompute_prepare]: 7.26013e-06 [updatestate_depend_eliminate]: 6.00982e-06 [updatestate_assign_eliminate]: 4.44986e-06 [updatestate_loads_eliminate]: 5.03007e-06 [parameter_eliminate]: 1.16974e-06 [a_2]: 0.00010316 [accelerated_algorithm]: 7.87992e-06 [shard]: 1.20001e-06 [meta_shard_fg_expand]: 2.5304e-06 [shard_inline]: 7.37002e-06 [auto_parallel]: 1.11097e-05 [parallel]: 3.2899e-06 [flash_sp]: 2.96999e-06 [merge_comm]: 6.05965e-06 [allreduce_fusion]: 5.03007e-06 [matmul_add_comm_reduction]: 8.33999e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 8.40006e-06 [virtual_dataset]: 7.41007e-06 [get_grad_eliminate_]: 7.09016e-06 [virtual_output]: 7.41985e-06 [merge_forward]: 4.57e-06 [cell_reuse_recompute_pass]: 1.87987e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.453e-05 [before_grad]: 1.26204e-05 [inplace_validation]: 4.73997e-06 [meta_fg_expand]: 4.67012e-06 [inplace_validation_after_expand]: 5.13997e-06 [flash_sp_send_recv_attached]: 8.49832e-07 [receive_attached]: 7.59959e-07 [after_resolve]: 1.011e-05 e.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.84% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.22% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.22% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.52% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.28% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.58% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.40% : 0.000010s : 43: predicate.switch_simplify 0.84% : 0.000002s : 13: predicate.tile_eliminate 0.89% : 0.000002s : 13: predicate.transpose_eliminate 1.90% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.79% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.91% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.52% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.44% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.50% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.49% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.57% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000146 4 10.92% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.08% : 0.000130s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088527 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.15% : 0.000131s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000335s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.00004[a_after_grad]: 1.14799e-05 [special_op_eliminate]: 7.09016e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 9.09902e-07 [auto_monad_grad]: 1.17021e-06 [auto_monad_eliminator]: 1.84202e-05 [cse]: 1.965e-05 [a_3]: 4.77801e-05 [py_interpret_to_execute_after_opt_a]: 9.26992e-06 [slice_cell_reuse_recomputed_activation]: 2.00002e-06 [rewriter_after_opt_a]: 0.00015471 [convert_after_rewriter]: 1.09598e-05 [order_py_execute_after_rewriter]: 6.29993e-06 [opt_b]: 0.00023704, [1] [Cycle 1]: 0.00023159, [7] [b_1]: 0.00015827 [b_2]: 9.13022e-06 [updatestate_depend_eliminate]: 5.51995e-06 [updatestate_assign_eliminate]: 4.56022e-06 [updatestate_loads_eliminate]: 4.97e-06 [renormalize]: 2.5006e-07 [cse]: 1.906e-05 [optimize_parallel_all_gather_comm]: 8.59005e-06 [overlap_param_gather]: 1.43982e-06 [cconv]: 2.382e-05 [loop_unroll]: 0.00046081 [opt_after_cconv]: 0.0001344, [1] [Cycle 1]: 0.00012862, [7] [c_1]: 5.08698e-05 [parameter_eliminate]: 2.47965e-06 [updatestate_depend_eliminate]: 7.93021e-06 [updatestate_assign_eliminate]: 4.48013e-06 [updatestate_loads_eliminate]: 1.011e-05 [cse]: 2.15e-05 [renormalize]: 4.20026e-07 [remove_dup_value]: 1.35703e-05 [tuple_transform]: 6.72503e-05, [1] [Cycle 1]: 6.27399e-05, [2] [d_1]: 5.35301e-05 [renormalize]: 1.79745e-07 [partial_unused_args_eliminate]: 1.89012e-06 [add_cache_embedding]: 1.22399e-05 [add_recomputation]: 5.96801e-05 [cse_after_recomputation]: 2.59201e-05, [1] [Cycle 1]: 2.14698e-05, [1] [cse]: 1.649e-05 [environ_conv]: 6.78981e-06 [swap_dp_allreduce_reducescatter]: 7.47992e-06 [bias_add_comm_swap]: 2.05031e-06 [label_micro_interleaved_index]: 1.68988e-06 [label_fine_grained_interleaved_index]: 1.68988e-06 [merge_cast_opt]: 1.17999e-06 [slice_recompute_activation]: 7.79983e-07 [micro_interleaved_order_control]: 9.89996e-07 [assign_add_opt]: 7.07014e-06 [ForceFp32Comm]: 6.99889e-07 [remove_cast_before_assign_add]: 8.2003e-07 [full_micro_interleaved_order_control]: 1.78022e-06 [reorder_send_recv_between_fp_bp]: 1.0198e-06 [comm_op_add_attrs]: 4.60073e-07 [add_comm_op_reuse_tag]: 4.90341e-07 [interleave_split_concat_branches]: 4.4005e-07 [interleave_parallel_branches]: 5.19678e-07 [overlap_opt_shard_in_pipeline]: 6.89644e-07 [overlap_opt_shard_grad_in_pipeline]: 8.40053e-07 [control_data_broadcast_order]: 6.40284e-07 [grouped_pairwise_exchange_alltoall]: 1.30991e-06 [offloading_packed_experts]: 9.99775e-07 [overlap_recompute_and_grad_model_parallel]: 1.89012e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.60191e-07 [overlap_recompute_allgather_and_fa_grad]: 1.09011e-06 [overlap_grad_ring_attention]: 1.62981e-06 [overlap_grad_flash_sp]: 1.45095e-05 [begin_end_overlap_inline]: 7.19912e-07 [split_matmul_comm_elemetwise]: 2.02004e-06 [split_layernorm_comm]: 1.62004e-06 [handle_group_info]: 9.49949e-07 [symbol_engine_optimizer]: 8.16602e-05, [1] [Cycle 1]: 7.70399e-05, [6] [build]: 4.10993e-06 [elim_shapecalc]: 1.16699e-05 [elim_not_effective]: 1.59098e-05 [opt_reshape]: 8.25012e-06 [fold_const_symbol]: 1.26399e-05 [renormalize]: 2.20258e-07 [pipeline_parallel_scheduler]: 1.22003e-06 [auto_monad_reorder]: 2.87201e-05 [get_jit_bprop_graph]: 4.20026e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00048042 [distribtued_split]: 3.96301e-05 [validate]: 3.36799e-05 [task_emit]: 0.0693211 [execute]: 1.10301e-05 Sums bootstrap : 0.000302s : 0.40% type_inference : 0.002389s : 3.14% auto_monad 9s : 1: distribtued_split 0.59% : 0.000520s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000003s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000498s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.27% : 0.001124s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000149s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.07% : 0.000058s : 4: opt.transform.symbol_engine_opt 6.03% : 0.005340s : 1: opt_a 0.15% : 0.000134s : 1: opt_after_cconv 0.27% : 0.000240s : 1: opt_b 7.95% : 0.007034s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.28% : 0.000251s : 1: renormalize.infer 0.23% : 0.000204s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000142s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000100s : 1: symbol_engine_optimizer 77.88% : 0.068945s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.71% : 0.002399s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.0800168, [21] [bootstrap]: 0.00031696 [type_inference]: 0.0024269 [auto_monad]: 0.00012913 [graph_reusing]: 3.33972e-06 [inline]: 1.2801e-06 [parallel-infer-symbol]: 1.89012e-06 [pre_auto_parallel]: 2.582e-05 [insert-virtual-dataset]: 2.78978e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 1.53994e-06 [pipeline_split]: 1.75042e-06 [optimize]: 0.00710019, [52] [py_interpret_to_execute]: 1.571e-05 [rewriter_before_opt_a]: 3.51099e-05 [opt_a]: 0.00539986, [2] [Cycle 1]: 0.00160399, [43] [expand_dump_flag]: 4.14997e-06 [switch_simplify]: 2.97399e-05 [loop_unroll]: 1.31899e-05 [a_1]: 0.00034299 [recompute_prepare]: 9.22987e-06 [updatestate_depend_eliminate]: 8.38004e-06 [updatestate_assign_eliminate]: 5.4501e-06 [updatestate_loads_eliminate]: 7.22008e-06 [parameter_eliminate]: 3.03006e-06 [a_2]: 0.00011621 [accelerated_algorithm]: 8.40984e-06 [shard]: 2.11038e-06 [meta_shard_fg_expand]: 4.5998e-06 [shard_inline]: 8.33999e-06 [auto_parallel]: 1.27601e-05 [parallel]: 8.23988e-06 [flash_sp]: 1.26101e-05 [merge_comm]: 9.60007e-06 [allreduce_fusion]: 6.08014e-06 [matmul_add_comm_reduction]: 1.13901e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 9.66992e-06 [virtual_dataset]: 7.92975e-06 [get_grad_eliminate_]: 7.95024e-06 [virtual_output]: 7.41985e-06 [merge_forward]: 6.27991e-06 [cell_reuse_recompute_pass]: 1.91992e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65198e-05 [before_grad]: 1.42497e-05 [inplace_validation]: 6.06012e-06 [meta_fg_expand]: 5.81006e-06 [inplace_validation_after_expand]: 6.62031e-06 [flash_sp_send_recv_attached]: 5.1097e-06 [receive_attached]: 2.78e-06 [after_resolve]: 1.09901e-05 [a_after_grad]: 1.28602e-05 [special_op_eliminate]: 7.57026e-06 [renormalize]: 0.000433 [add_forward_monad_depend]: 3.86033e-06 [auto_monad_grad]: 2.27988e-06 [auto_monad_eliminator]: 3.15402e-05 [cse]: 9.759e-05 [a_3]: 5.81499e-05 [Cycle 2]: 0.00078203, [43] [expand_dump_flag]: 1.30991e-06 [switch_simplify]: 8.80985e-06 [loop_unroll]: 7.7202e-06 [a_1]: 0.00019923 [recompute_prepare]: 7.19028e-06 [updatestate_depend_eliminate]: 5.75976e-06 [updatestate_assign_eliminate]: 5.0799e-06 [updatestate_loads_eliminate]: 5.39003e-06 [parameter_eliminate]: 1.3602e-06 [a_2]: 0.00010343 [accelerated_algorithm]: 8.20961e-06 [shard]: 1.28988e-06 [meta_shard_fg_expand]: 2.43029e-06 [shard_inline]: 7.83987e-06 [auto_parallel]: 1.12499e-05 [parallel]: 3.81004e-06 [flash_sp]: 3.95998e-06 [merge_comm]: 5.91017e-06 [allreduce_fusion]: 4.96022e-06 [matmul_add_comm_reduction]: 7.83987e-06 [allreduce_slice_to_reducescatter]: 2.19792e-07 [virtual_shard_identity]: 7.87992e-06 [virtual_dataset]: 7.18003e-06 [get_grad_eliminate_]: 7.16001e-06 [virtual_output]: 6.99004e-06 [merge_forward]: 4.46988e-06 [cell_reuse_recompute_pass]: 2.00979e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51899e-05 [before_grad]: 1.26399e-05 [inplace_validation]: 4.25987e-06 [meta_fg_expand]: 4.89969e-06 [inplace_validation_after_expand]: 5.33974e-06 [flash_sp_send_recv_attached]: 9.4017e-07 [receive_attached]: 7.19912e-07 [after_resolve]: 9.66992e-06 : 0.000130s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000020s : 0.03% optimize.opt_a.a_1 : 0.000567s : 0.75% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000030s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000014s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000155s : 0.20% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000158s : 0.21% optimize.opt_b.b_2 : 0.000009 [a_after_grad]: 1.17603e-05 [special_op_eliminate]: 7.31973e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 9.59728e-07 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 1.90297e-05 [cse]: 1.96602e-05 [a_3]: 4.686e-05 [py_interpret_to_execute_after_opt_a]: 9.26992e-06 [slice_cell_reuse_recomputed_activation]: 2.48989e-06 [rewriter_after_opt_a]: 0.00015175 [convert_after_rewriter]: 8.81031e-06 [order_py_execute_after_rewriter]: 6.34976e-06 [opt_b]: 0.00024259, [1] [Cycle 1]: 0.00023648, [7] [b_1]: 0.00015974 [b_2]: 1.01798e-05 [updatestate_depend_eliminate]: 5.39981e-06 [updatestate_assign_eliminate]: 4.5104e-06 [updatestate_loads_eliminate]: 5.28013e-06 [renormalize]: 2.49594e-07 [cse]: 1.85897e-05 [optimize_parallel_all_gather_comm]: 8.33021e-06 [overlap_param_gather]: 1.39e-06 [cconv]: 2.17701e-05 [loop_unroll]: 0.00049266 [opt_after_cconv]: 0.00013173, [1] [Cycle 1]: 0.00012502, [7] [c_1]: 5.10104e-05 [parameter_eliminate]: 2.46009e-06 [updatestate_depend_eliminate]: 7.75e-06 [updatestate_assign_eliminate]: 4.42984e-06 [updatestate_loads_eliminate]: 5.20004e-06 [cse]: 2.089e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 1.48499e-05 [tuple_transform]: 6.86501e-05, [1] [Cycle 1]: 6.34696e-05, [2] [d_1]: 5.425e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 2.12993e-06 [add_cache_embedding]: 1.37198e-05 [add_recomputation]: 6.12801e-05 [cse_after_recomputation]: 2.74898e-05, [1] [Cycle 1]: 2.20798e-05, [1] [cse]: 1.68798e-05 [environ_conv]: 7.20005e-06 [swap_dp_allreduce_reducescatter]: 7.86036e-06 [bias_add_comm_swap]: 2.46009e-06 [label_micro_interleaved_index]: 2.21003e-06 [label_fine_grained_interleaved_index]: 2.22027e-06 [merge_cast_opt]: 1.4198e-06 [slice_recompute_activation]: 2.00979e-06 [micro_interleaved_order_control]: 1.93994e-06 [assign_add_opt]: 8.00006e-06 [ForceFp32Comm]: 1.26008e-06 [remove_cast_before_assign_add]: 9.20147e-07 [full_micro_interleaved_order_control]: 2.33017e-06 [reorder_send_recv_between_fp_bp]: 2.31992e-06 [comm_op_add_attrs]: 1.09011e-06 [add_comm_op_reuse_tag]: 1.05007e-06 [interleave_split_concat_branches]: 8.49832e-07 [interleave_parallel_branches]: 7.79983e-07 [overlap_opt_shard_in_pipeline]: 1.32993e-06 [overlap_opt_shard_grad_in_pipeline]: 2.47965e-06 [control_data_broadcast_order]: 1.22981e-06 [grouped_pairwise_exchange_alltoall]: 1.55997e-06 [offloading_packed_experts]: 8.99658e-07 [overlap_recompute_and_grad_model_parallel]: 2.31992e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.60076e-07 [overlap_recompute_allgather_and_fa_grad]: 1.33971e-06 [overlap_grad_ring_attention]: 2.63005e-06 [overlap_grad_flash_sp]: 1.48299e-05 [begin_end_overlap_inline]: 7.30157e-07 [split_matmul_comm_elemetwise]: 1.90036e-06 [split_layernorm_comm]: 1.76998e-06 [handle_group_info]: 1.03004e-06 [symbol_engine_optimizer]: 8.39797e-05, [1] [Cycle 1]: 7.90698e-05, [6] [build]: 3.68012e-06 [elim_shapecalc]: 1.16499e-05 [elim_not_effective]: 1.56402e-05 [opt_reshape]: 8.40984e-06 [fold_const_symbol]: 1.33002e-05 [renormalize]: 2.39816e-07 [pipeline_parallel_scheduler]: 1.97999e-06 [auto_monad_reorder]: 2.97502e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.50294e-07 [eliminate_special_op_node]: 0.00050616 [distribtued_split]: 4.13102e-05 [validate]: 3.68296e-05 [task_emit]: 0.0691304 [execute]: 1.06599e-05 Sums bootstrap : 0.000317s : 0.42% type_inference : 0.002427s : 3.19% auto_mos : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000461s : 0.61% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000010s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000000s : 0.00% optimize.add_comm_op_reuse_tag : 0.000000s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.folnad : 0.000129s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000542s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000017s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000433s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000117s : 0.15% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000152s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : d_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000480s : 0.63% distribtued_split : 0.000040s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.069321s : 91.23% execute : 0.000011s : 0.01% 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000493s : 0.65% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000506s : 0.67% distribtued_split : 0.000041s : 0.05% validate : 0.000037s : 0.05% task_emit : 0.069130s : 90.97% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000129 63 4.12% : 0.000005s : 2: substitution.depend_value_elim 1.91% : 0.000002s : 5: substitution.elim_not_effective 1.40% : 0.000002s : 5: substitution.fold_const_symbol 4.93% : 0.000006s : 6: substitution.graph_param_transform 52.74% : 0.000068s : 1: substitution.inline 4.04% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.35% : 0.000004s : 6: substitution.load_eliminater 2.63% : 0.000003s : 2: substitution.reduce_all_const_elim 5.75% : 0.000007s : 10: substitution.remove_not_recompute_node 2.14% : 0.000003s : 2: substitution.replace_old_param 8.89% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.12% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002361 2 88.69% : 0.002094s : 1: type_inference.infer 11.31% : 0.000267s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000254 1420 0.72% : 0.000002s : 13: predicate.accumulaten_eliminater 1.06% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.62% : 0.000002s : 12: predicate.addn_check_dump 0.70% : 0.000002s : 13: predicate.addn_zero_filter 0.64% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.91% : 0.000005s : 25: predicate.arithmetic_simplify 0.68% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.66% : 0.000002s : 12: predicate.compare_switch_simplify 0.20% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.33% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.74% : 0.000002s : 12: predicate.depend_value_elim 0.71% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.79% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.23% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 0.99% : 0.000003s : 19: predicate.environ_add_const_eliminate 0.96% : 0.000002s : 19: predicate.environ_get_add_eliminate 0.92% : 0.000002s : 19: predicate.environ_get_depend_swap 1.74% : 0.000004s : 31: predicate.environ_get_eliminate 0.94% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.73% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.14% : 0.000003s : 14: predicate.float_depend_g_call 0.66% : 0.000002s : 12: predicate.float_environ_get_switch 0.96% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.74% : 0.000002s : 12: predicate.get_grad_eliminate 0.25% : 0.000001s : 6: predicate.graph_param_transform 0.70% : 0.000002s : 12: predicate.incorporate_call 0.61% : 0.000002s : 12: predicate.incorporate_call_switch 4.98% : 0.000013s : 63: predicate.inline 0.87% : 0.000002s : 12: predicate.inline_without_move 0.35% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.89% : 0.000002s : 12: predicate.less_batch_normalization 1.51% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.09% : 0.000005s : 38: predicate.load_eliminater 1.06% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.09% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.71% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.66% : 0.000002s : 12: predicate.merge_addn 0.73% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.71% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.69% : 0.000002s : 13: predicate.minmaximum_grad 0.65% : 0.000002s : 6: predicate.mutable_eliminate 0.37% : 0.000001s : 6: predicate.opt_reshape 0.40% : 0.000001s : 6: predicate.parallel_virtual_node 1.04% : 0.000003s : 14: predicate.partial_defer_inline 1.10% : 0.000003s : 19: predicate.partial_eliminate 0.74% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 0.95% : 0.000002s : 13: predicate.reduce_eliminate 0.45% : 0.000001s : 12: predicate.remove_not_recompute_node 1.06% : 0.000003s : 25: predicate.replace_applicator 0.42% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.77% : 0.000002s : 13: predicate.reshape_eliminate 0.70% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 0.91% : 0.000002s : 12: predicate.same_eliminate 0.42% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.83% : 0.000002s : 12: predicate.shard_identity_eliminate 1.18% : 0.000003s : 18: predicate.special_op_eliminate 0.87% : 0.000002s : 12: predicate.specialize_transform 0.95% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.85% : 0.000002s : 12: predicate.stack_unstack_eliminate 1.98% : 0.000005s : 38: predicate.stopgrad_eliminater 0.37% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.77% : 0.000002s : 14: predicate.switch_defer_inline 1.54% : 0.000004s : 26: predicate.switch_layer_defer_inline 15.39% : 0.000039s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.73% : 0.000002s : 13: predicate.transpose_eliminate 1.50% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.43% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.37% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.29% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.52% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.12% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 2.96% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.70% : 0.000002s : 12: predicate.virtual_output_eliminate 0.42% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000152 4 11.00% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.00% : 0.000135s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088668 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000003s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000010s : 1: assign_add_opt 0.16% : 0.000143s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000328s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000003s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.56% : 0.000493s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000008s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.53% : 0.000470s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001113s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000149s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 5.96% : 0.005285s : 1: opt_a 0.16% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000240s : 1: opt_b 7.81% : 0.006929s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000228s : 1: renormalize.infer 0.21% : 0.000190s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000160s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000084s : 1: symbol_engine_optimizer 78.22% : 0.069354s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.72% : 0.002408s : 1: type_inference 0.08% : 0.000067s : 1: validate Time group info: ------[substitution.] 0.000132 63 5.19% : 0.000007s : 2: substitution.depend_value_elim 1.71% : 0.000002s : 5: substitution.elim_not_effective 1.92% : 0.000003s : 5: substitution.fold_const_symbol 5.38% : 0.000007s : 6: substitution.graph_param_transform 50.15% : 0.000066s : 1: substitution.inline 4.52% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.19% : 0.000004s : 6: substitution.load_eliminater 2.83% : 0.000004s : 2: substitution.reduce_all_const_elim 5.95% : 0.000008s : 10: substitution.remove_not_recompute_node 2.16% : 0.000003s : 2: substitution.replace_old_param 8.74% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.27% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002396 2 88.58% : 0.002123s : 1: type_inference.infer 11.42% : 0.000274s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000224 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.00% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.23% : 0.000005s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.39% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.93% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 1.92% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.32% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.82% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.70% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.50% : 0.000006s : 38: predicate.load_eliminater 1.27% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.25% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.59% : 0.000001s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.89% : 0.000002s : 13: predicate.reshape_eliminate 0.85% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.20% : 0.000003s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000002s : 12: predicate.shard_identity_eliminate 1.28% : 0.000003s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 0.94% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.09% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.36% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.57% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.43% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.70% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.58% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.76% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.59% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000145 4 10.42% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.58% : 0.000130s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088916 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.16% : 0.000144s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000341s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000050s : 1: distribtued_split 0.58% : 0.000520s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000010s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.56% : 0.000502s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001090s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005404s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.28% : 0.000246s : 1: opt_b 7.99% : 0.007109s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.27% : 0.000237s : 1: renormalize.infer 0.21% : 0.000191s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000157s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000087s : 1: symbol_engine_optimizer 77.78% : 0.069159s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.75% : 0.002446s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.080242, [21] [bootstrap]: 0.00036898 [type_inference]: 0.00267765 [auto_monad]: 0.00013489 [graph_reusing]: 2.6701e-06 [inline]: 1.72993e-06 [parallel-infer-symbol]: 2.87034e-06 [pre_auto_parallel]: 2.79304e-05 [insert-virtual-dataset]: 3.62005e-06 [parallel-infer-symbol-second]: 4.30271e-07 [dataset_repeat_opt]: 1.57999e-06 [pipeline_split]: 1.49012e-06 [optimize]: 0.00706513, [52] [py_interpret_to_execute]: 1.51098e-05 [rewriter_before_opt_a]: 7.828e-05 [opt_a]: 0.00533615, [2] [Cycle 1]: 0.00154652, [43] [expand_dump_flag]: 3.35975e-06 [switch_simplify]: 2.87802e-05 [loop_unroll]: 1.27601e-05 [a_1]: 0.00034369 [recompute_prepare]: 8.7698e-06 [updatestate_depend_eliminate]: 1.09803e-05 [updatestate_assign_eliminate]: 5.81983e-06 [updatestate_loads_eliminate]: 7.38027e-06 [parameter_eliminate]: 3.79002e-06 [a_2]: 0.0001177 [accelerated_algorithm]: 8.1202e-06 [shard]: 2.09967e-06 [meta_shard_fg_expand]: 4.19002e-06 [shard_inline]: 8.53976e-06 [auto_parallel]: 1.13798e-05 [parallel]: 8.80007e-06 [flash_sp]: 1.18501e-05 [merge_comm]: 8.3996e-06 [allreduce_fusion]: 5.32018e-06 [matmul_add_comm_reduction]: 1.095e-05 [allreduce_slice_to_reducescatter]: 5.20144e-07 [virtual_shard_identity]: 9.54e-06 [virtual_dataset]: 8.02008e-06 [get_grad_eliminate_]: 7.39982e-06 [virtual_output]: 7.81985e-06 [merge_forward]: 6.55977e-06 [cell_reuse_recompute_pass]: 1.90968e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.92798e-05 [before_grad]: 1.36499e-05 [inplace_validation]: 5.78025e-06 [meta_fg_expand]: 5.32996e-06 [inplace_validation_after_expand]: 6.57002e-06 [flash_sp_send_recv_attached]: 5.49015e-06 [receive_attached]: 3.22005e-06 [after_resolve]: 1.14902e-05 [a_after_grad]: 2.77902e-05 [special_op_eliminate]: 7.81985e-06 [renormalize]: 0.00043065 [add_forward_monad_depend]: 3.53996e-06 [auto_monad_grad]: 1.89012e-06 [auto_monad_eliminator]: 3.29902e-05 [cse]: 3.39597e-05 [a_3]: 5.59501e-05 [Cycle 2]: 0.00076878, [43] [expand_dump_flag]: 1.07009e-06 [switch_simplify]: 9.12976e-06 [loop_unroll]: 7.45011e-06 [a_1]: 0.00019899 [recompute_prepare]: 7.45011e-06 [updatestate_depend_eliminate]: 5.91017e-06 [updatestate_assign_eliminate]: 4.78001e-06 [updatestate_loads_eliminate]: 5.0799e-06 [parameter_eliminate]: 1.41002e-06 [a_2]: 0.00010354 [accelerated_algorithm]: 7.94977e-06 [shard]: 1.22003e-06 [meta_shard_fg_expand]: 2.59979e-06 [shard_inline]: 7.47014e-06 [auto_parallel]: 1.09198e-05 [parallel]: 3.64985e-06 [flash_sp]: 3.58e-06 [merge_comm]: 5.98002e-06 [allreduce_fusion]: 4.79026e-06 [matmul_add_comm_reduction]: 7.6401e-06 [allreduce_slice_to_reducescatter]: 2.99886e-07 [virtual_shard_identity]: 8.6003e-06 [virtual_dataset]: 7.58003e-06 [get_grad_eliminate_]: 7.75e-06 [virtual_output]: 6.90995e-06 [merge_forward]: 4.78001e-06 [cell_reuse_recompute_pass]: 1.64984e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.53198e-05 [before_grad]: 1.255e-05 [inplace_validation]: 4.29992e-06 [meta_fg_expand]: 4.92996e-06 [inplace_validation_after_expand]: 4.92996e-06 [flash_sp_send_recv_attached]: 8.99658e-07 [receive_attached]: 8.29808e-07 [after_resolve]: 9.26992e-06 [a_after_grad]: 1.133e-05 [special_op_eliminate]: 6.99004e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 7.79983e-07 [auto_monad_grad]: 9.99775e-07 [auto_monad_eliminator]: 1.811e-05 [cse]: 1.79997e-05 [a_3]: 4.852e-05 [py_interpret_to_execute_after_opt_a]: 9.16002e-06 [slice_cell_reuse_recomputed_activation]: 2.39024e-06 [rewriter_after_opt_a]: 0.00015064 [convert_after_rewriter]: 8.99984e-06 [order_py_execute_after_rewriter]: 6.06012e-06 [opt_b]: 0.00024049, [1] [Cycle 1]: 0.00023439, [7] [b_1]: 0.00016077 [b_2]: 9.61963e-06 [updatestate_depend_eliminate]: 5.53997e-06 [updatestate_assign_eliminate]: 4.44986e-06 [updatestate_loads_eliminate]: 5.08036e-06 [renormalize]: 2.10013e-07 [cse]: 1.78302e-05 [optimize_parallel_all_gather_comm]: 8.06991e-06 [overlap_param_gather]: 1.55019e-06 [cconv]: 2.34698e-05 [loop_unroll]: 0.00048657 [opt_after_cconv]: 0.00013142, [1] [Cycle 1]: 0.00012521, [7] [c_1]: 5.267e-05 [parameter_eliminate]: 2.35997e-06 [updatestate_depend_eliminate]: 7.65035e-06 [updatestate_assign_eliminate]: 4.64031e-06 [updatestate_loads_eliminate]: 5.18002e-06 [cse]: 2.07201e-05 [renormalize]: 3.7998e-07 [remove_dup_value]: 1.232e-05 [tuple_transform]: 6.73798e-05, [1] [Cycle 1]: 6.28503e-05, [2] [d_1]: 5.425e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 1.97999e-06 [add_cache_embedding]: 1.30902e-05 [add_recomputation]: 6.31702e-05 [cse_after_recomputation]: 2.42298e-05, [1] [Cycle 1]: 1.99103e-05, [1] [cse]: 1.51703e-05 [environ_conv]: 7.60984e-06 [swap_dp_allreduce_reducescatter]: 7.65966e-06 [bias_add_comm_swap]: 2.25008e-06 [label_micro_interleaved_index]: 2.4396e-06 [label_fine_grained_interleaved_index]: 2.14996e-06 [merge_cast_opt]: 1.37975e-06 [slice_recompute_activation]: 1.94972e-06 [micro_interleaved_order_control]: 1.87987e-06 [assign_add_opt]: 7.52974e-06 [ForceFp32Comm]: 9.29926e-07 [remove_cast_before_assign_add]: 1.11992e-06 [full_micro_interleaved_order_control]: 2.59979e-06 [reorder_send_recv_between_fp_bp]: 1.95019e-06 [comm_op_add_attrs]: 8.69855e-07 [add_comm_op_reuse_tag]: 1.19023e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 8.19564e-07 [overlap_opt_shard_in_pipeline]: 3.2899e-06 [overlap_opt_shard_grad_in_pipeline]: 2.39024e-06 [control_data_broadcast_order]: 1.10036e-06 [grouped_pairwise_exchange_alltoall]: 1.62004e-06 [offloading_packed_experts]: 1.11992e-06 [overlap_recompute_and_grad_model_parallel]: 2.12016e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.20147e-07 [overlap_recompute_allgather_and_fa_grad]: 1.17021e-06 [overlap_grad_ring_attention]: 1.68011e-06 [overlap_grad_flash_sp]: 1.48602e-05 [begin_end_overlap_inline]: 5.99772e-07 [split_matmul_comm_elemetwise]: 2.09035e-06 [split_layernorm_comm]: 1.84029e-06 [handle_group_info]: 1.15996e-06 [symbol_engine_optimizer]: 8.34698e-05, [1] [Cycle 1]: 7.88304e-05, [6] [build]: 5.01005e-06 [elim_shapecalc]: 1.12699e-05 [elim_not_effective]: 1.55498e-05 [opt_reshape]: 8.57003e-06 [fold_const_symbol]: 1.31703e-05 [renormalize]: 2.90107e-07 [pipeline_parallel_scheduler]: 1.53994e-06 [auto_monad_reorder]: 3.04999e-05 [get_jit_bprop_graph]: 4.49829e-07 [rewriter_after_jit_bprop_graph]: 4.49829e-07 [eliminate_special_op_node]: 0.00050103 [distribtued_split]: 4.91799e-05 [validate]: 3.54899e-05 [task_emit]: 0.0690861 [execute]: 1.06599e-05 Sums bootstrap : 0.000369s : 0.48% type_inference : 0.002678s : 3.51% auto_monad : 0.000135s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000004s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000078s : 0.10% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000020s : 0.03% optimize.opt_a.a_1 : 0.000543s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000221s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000035s : 0.05% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.01% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000039s : 0.05% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000431s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000104s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000151s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000487s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000015s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000003s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000011s : 0.01% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000501s : 0.66% distribtued_split : 0.000049s : 0.06% validate : 0.000035s : 0.05% task_emit : 0.069086s : 90.61% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000135 63 5.07% : 0.000007s : 2: substitution.depend_value_elim 1.79% : 0.000002s : 5: substitution.elim_not_effective 1.83% : 0.000002s : 5: substitution.fold_const_symbol 5.23% : 0.000007s : 6: substitution.graph_param_transform 49.61% : 0.000067s : 1: substitution.inline 3.97% : 0.000005s : 10: substitution.j_node_and_user_rematch 2.98% : 0.000004s : 6: substitution.load_eliminater 2.55% : 0.000003s : 2: substitution.reduce_all_const_elim 7.46% : 0.000010s : 10: substitution.remove_not_recompute_node 2.71% : 0.000004s : 2: substitution.replace_old_param 8.61% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.20% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002646 2 89.39% : 0.002365s : 1: type_inference.infer 10.61% : 0.000281s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000244 1420 0.73% : 0.000002s : 13: predicate.accumulaten_eliminater 1.04% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.67% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.03% : 0.000005s : 25: predicate.arithmetic_simplify 0.76% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.68% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.36% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.78% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.84% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.78% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.52% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.08% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.05% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.03% : 0.000003s : 19: predicate.environ_get_depend_swap 1.95% : 0.000005s : 31: predicate.environ_get_eliminate 1.02% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.76% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.38% : 0.000003s : 14: predicate.float_depend_g_call 0.68% : 0.000002s : 12: predicate.float_environ_get_switch 1.03% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.76% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.72% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.28% : 0.000013s : 63: predicate.inline 6.97% : 0.000017s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.88% : 0.000002s : 12: predicate.less_batch_normalization 1.52% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.29% : 0.000006s : 38: predicate.load_eliminater 1.26% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.10% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.72% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.68% : 0.000002s : 12: predicate.merge_addn 0.71% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.69% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.06% : 0.000003s : 14: predicate.partial_defer_inline 1.16% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.80% : 0.000002s : 12: predicate.reduce_all_const_elim 1.12% : 0.000003s : 13: predicate.reduce_eliminate 0.51% : 0.000001s : 12: predicate.remove_not_recompute_node 1.06% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.22% : 0.000001s : 6: predicate.reset_defer_inline 0.72% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.45% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.86% : 0.000002s : 12: predicate.shard_identity_eliminate 1.28% : 0.000003s : 18: predicate.special_op_eliminate 0.86% : 0.000002s : 12: predicate.specialize_transform 0.91% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.16% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.81% : 0.000002s : 14: predicate.switch_defer_inline 1.59% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.99% : 0.000010s : 43: predicate.switch_simplify 0.69% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.67% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.73% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.32% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.50% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.30% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.31% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.91% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000155 4 10.88% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.12% : 0.000139s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089122 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.08% : 0.000067s : 1: add_recomputation 0.02% : 0.000015s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.44% : 0.000394s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000027s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000058s : 1: distribtued_split 0.58% : 0.000515s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000496s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001108s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 5.99% : 0.005340s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.27% : 0.000243s : 1: opt_b 7.94% : 0.007073s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000007s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.26% : 0.000234s : 1: renormalize.infer 0.21% : 0.000191s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000156s : 1: rewriter_after_opt_a 0.09% : 0.000083s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000087s : 1: symbol_engine_optimizer 77.55% : 0.069111s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 3.03% : 0.002696s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.0815849, [21] [bootstrap]: 0.00030185 [type_inference]: 0.00238946 [auto_monad]: 0.0001302 [graph_reusing]: 2.78978e-06 [inline]: 1.51992e-06 [parallel-infer-symbol]: 2.11038e-06 [pre_auto_parallel]: 2.61799e-05 [insert-virtual-dataset]: 2.97977e-06 [parallel-infer-symbol-second]: 3.60422e-07 [dataset_repeat_opt]: 1.36998e-06 [pipeline_split]: 1.79e-06 [optimize]: 0.00692145, [52] [py_interpret_to_execute]: 1.48299e-05 [rewriter_before_opt_a]: 3.47798e-05 [opt_a]: 0.00528151, [2] [Cycle 1]: 0.00149499, [43] [expand_dump_flag]: 3.36999e-06 [switch_simplify]: 3.02298e-05 [loop_unroll]: 1.34297e-05 [a_1]: 0.0003389 [recompute_prepare]: 8.17981e-06 [updatestate_depend_eliminate]: 8.84989e-06 [updatestate_assign_eliminate]: 5.62984e-06 [updatestate_loads_eliminate]: 7.56001e-06 [parameter_eliminate]: 3.11993e-06 [a_2]: 0.00011652 [accelerated_algorithm]: 7.87014e-06 [shard]: 2.14018e-06 [meta_shard_fg_expand]: 4.02005e-06 [shard_inline]: 7.95024e-06 [auto_parallel]: 1.156e-05 [parallel]: 7.93021e-06 [flash_sp]: 1.129e-05 [merge_comm]: 7.83009e-06 [allreduce_fusion]: 5.28013e-06 [matmul_add_comm_reduction]: 1.04997e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 9.17027e-06 [virtual_dataset]: 7.74022e-06 [get_grad_eliminate_]: 7.18003e-06 [virtual_output]: 7.77002e-06 [merge_forward]: 6.51972e-06 [cell_reuse_recompute_pass]: 1.72993e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.60998e-05 [before_grad]: 1.28499e-05 [inplace_validation]: 5.04032e-06 [meta_fg_expand]: 5.24009e-06 [inplace_validation_after_expand]: 5.92973e-06 [flash_sp_send_recv_attached]: 4.87966e-06 [receive_attached]: 2.7502e-06 [after_resolve]: 1.06297e-05 [a_after_grad]: 1.21398e-05 [special_op_eliminate]: 7.70995e-06 [renormalize]: 0.00042367 [add_forward_monad_depend]: 3.03006e-06 [auto_monad_grad]: 1.66008e-06 [auto_monad_eliminator]: 3.10801e-05 [cse]: 3.26801e-05 [a_3]: 5.63604e-05 [Cycle 2]: 0.0007986, [43] [expand_dump_flag]: 1.11014e-06 [switch_simplify]: 9.07015e-06 [loop_unroll]: 7.55023e-06 [a_1]: 0.00023475 [recompute_prepare]: 7.50972e-06 [updatestate_depend_eliminate]: 6.10994e-06 [updatestate_assign_eliminate]: 4.80982e-06 [updatestate_loads_eliminate]: 5.55022e-06 [parameter_eliminate]: 1.33971e-06 [a_2]: 0.00010367 [accelerated_algorithm]: 7.79983e-06 [shard]: 1.09989e-06 [meta_shard_fg_expand]: 2.60025e-06 [shard_inline]: 7.47992e-06 [auto_parallel]: 1.13701e-05 [parallel]: 3.67966e-06 [flash_sp]: 2.23983e-06 [merge_comm]: 5.89993e-06 [allreduce_fusion]: 4.69992e-06 [matmul_add_comm_reduction]: 7.47992e-06 [allreduce_slice_to_reducescatter]: 3.09665e-07 [virtual_shard_identity]: 8.00006e-06 [virtual_dataset]: 7.22986e-06 [get_grad_eliminate_]: 7.09994e-06 [virtual_output]: 6.90995e-06 [merge_forward]: 4.24962e-06 [cell_reuse_recompute_pass]: 1.73971e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.468e-05 [before_grad]: 1.21901e-05 [inplace_validation]: 3.93996e-06 [meta_fg_expand]: 4.62029e-06 [inplace_validation_after_expand]: 5.05988e-06 [flash_sp_send_recv_attached]: 9.09902e-07 [receive_attached]: 7.5018e-07 [after_resolve]: 9.49018e-06 [a_after_grad]: 1.10804e-05 [special_op_eliminate]: 6.9798e-06 [renormalize]: 6.98492e-08 [add_forward_monad_depend]: 8.2003e-07 [auto_monad_grad]: 1.13994e-06 [auto_monad_eliminator]: 1.81003e-05 [cse]: 2.07601e-05 [a_3]: 4.806e-05 [py_interpret_to_execute_after_opt_a]: 9.41008e-06 [slice_cell_reuse_recomputed_activation]: 1.8198e-06 [rewriter_after_opt_a]: 0.00015602 [convert_after_rewriter]: 7.98004e-06 [order_py_execute_after_rewriter]: 5.69969e-06 [opt_b]: 0.0002409, [1] [Cycle 1]: 0.00023497, [7] [b_1]: 0.00016035 [b_2]: 9.77982e-06 [updatestate_depend_eliminate]: 5.03007e-06 [updatestate_assign_eliminate]: 4.25009e-06 [updatestate_loads_eliminate]: 5.24987e-06 [renormalize]: 2.20258e-07 [cse]: 1.94199e-05 [optimize_parallel_all_gather_comm]: 7.62986e-06 [overlap_param_gather]: 6.99889e-07 [cconv]: 2.15797e-05 [loop_unroll]: 0.00046406 [opt_after_cconv]: 0.00013226, [1] [Cycle 1]: 0.00012598, [7] [c_1]: 5.166e-05 [parameter_eliminate]: 2.2403e-06 [updatestate_depend_eliminate]: 7.87992e-06 [updatestate_assign_eliminate]: 4.46988e-06 [updatestate_loads_eliminate]: 5.64987e-06 [cse]: 2.29003e-05 [renormalize]: 3.50177e-07 [remove_dup_value]: 1.16201e-05 [tuple_transform]: 6.76299e-05, [1] [Cycle 1]: 6.30999e-05, [2] [d_1]: 5.41601e-05 [renormalize]: 1.80211e-07 [partial_unused_args_eliminate]: 1.53994e-06 [add_cache_embedding]: 1.25398e-05 [add_recomputation]: 5.97001e-05 [cse_after_recomputation]: 2.64896e-05, [1] [Cycle 1]: 2.184e-05, [1] [cse]: 1.69999e-05 [environ_conv]: 6.97002e-06 [swap_dp_allreduce_reducescatter]: 6.64964e-06 [bias_add_comm_swap]: 1.97021e-06 [label_micro_interleaved_index]: 1.68988e-06 [label_fine_grained_interleaved_index]: 1.47009e-06 [merge_cast_opt]: 1.0198e-06 [slice_recompute_activation]: 1.43982e-06 [micro_interleaved_order_control]: 1.33039e-06 [assign_add_opt]: 7.16001e-06 [ForceFp32Comm]: 5.09899e-07 [remove_cast_before_assign_add]: 6.20261e-07 [full_micro_interleaved_order_control]: 1.85007e-06 [reorder_send_recv_between_fp_bp]: 1.8999e-06 [comm_op_add_attrs]: 1.06031e-06 [add_comm_op_reuse_tag]: 8.29808e-07 [interleave_split_concat_branches]: 1.07009e-06 [interleave_parallel_branches]: 7.79983e-07 [overlap_opt_shard_in_pipeline]: 8.00006e-07 [overlap_opt_shard_grad_in_pipeline]: 2.07033e-06 [control_data_broadcast_order]: 9.80217e-07 [grouped_pairwise_exchange_alltoall]: 1.45007e-06 [offloading_packed_experts]: 1.0496e-06 [overlap_recompute_and_grad_model_parallel]: 1.64006e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.2003e-07 [overlap_recompute_allgather_and_fa_grad]: 1.05985e-06 [overlap_grad_ring_attention]: 1.62981e-06 [overlap_grad_flash_sp]: 1.44299e-05 [begin_end_overlap_inline]: 4.79631e-07 [split_matmul_comm_elemetwise]: 1.97021e-06 [split_layernorm_comm]: 1.34995e-06 [handle_group_info]: 9.89996e-07 [symbol_engine_optimizer]: 8.15103e-05, [1] [Cycle 1]: 7.69896e-05, [6] [build]: 4.23985e-06 [elim_shapecalc]: 1.16602e-05 [elim_not_effective]: 1.533e-05 [opt_reshape]: 8.3996e-06 [fold_const_symbol]: 1.29999e-05 [renormalize]: 3.19909e-07 [pipeline_parallel_scheduler]: 1.47009e-06 [auto_monad_reorder]: 2.87299e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 4.29805e-07 [eliminate_special_op_node]: 0.00048214 [distribtued_split]: 3.673e-05 [validate]: 3.42098e-05 [task_emit]: 0.0709827 [execute]: 6.73998e-06 Sums bootstrap : 0.000302s : 0.39% type_inference : 0.002389s : 3.08% auto_monad : 0.000130s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000574s : 0.74% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000015s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000017s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000014s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000025s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000023s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000104s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000156s : 0.20% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000160s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000464s : 0.60% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000007s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000001s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000015s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000482s : 0.62% distribtued_split : 0.000037s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.070983s : 91.41% execute : 0.000007s : 0.01% Time group info: ------[substitution.] 0.000133 63 5.09% : 0.000007s : 2: substitution.depend_value_elim 1.76% : 0.000002s : 5: substitution.elim_not_effective 1.70% : 0.000002s : 5: substitution.fold_const_symbol 5.44% : 0.000007s : 6: substitution.graph_param_transform 51.45% : 0.000068s : 1: substitution.inline 3.68% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.25% : 0.000004s : 6: substitution.load_eliminater 2.95% : 0.000004s : 2: substitution.reduce_all_const_elim 5.81% : 0.000008s : 10: substitution.remove_not_recompute_node 2.54% : 0.000003s : 2: substitution.replace_old_param 8.79% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.54% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002360 2 88.76% : 0.002095s : 1: type_inference.infer 11.24% : 0.000265s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000226 1420 0.91% : 0.000002s : 13: predicate.accumulaten_eliminater 1.04% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.10% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.53% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000002s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.77% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.84% : 0.000002s : 12: predicate.get_grad_eliminate 0.26% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.74% : 0.000013s : 63: predicate.inline 0.97% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.00% : 0.000002s : 12: predicate.less_batch_normalization 1.67% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.26% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.28% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.29% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.19% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.85% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.56% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.33% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.07% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.43% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.96% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.66% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.74% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.34% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000147 4 10.50% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.50% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090322 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.01% : 0.000011s : 1: assign_add_opt 0.16% : 0.000143s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000328s : 1: bootstrap 0.03% : 0.000025s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000045s : 1: distribtued_split 0.55% : 0.000496s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000014s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.52% : 0.000474s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001115s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000045s : 4: opt.transform.symbol_engine_opt 5.85% : 0.005285s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.27% : 0.000244s : 1: opt_b 7.67% : 0.006929s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000003s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.25% : 0.000230s : 1: renormalize.infer 0.21% : 0.000188s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000163s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.09% : 0.000084s : 1: symbol_engine_optimizer 78.61% : 0.071004s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.67% : 0.002408s : 1: type_inference 0.07% : 0.000067s : 1: validate TotalTime = 0.0819579, [21] [bootstrap]: 0.00032893 [type_inference]: 0.00259854 [auto_monad]: 0.00014535 [graph_reusing]: 2.66032e-06 [inline]: 1.50036e-06 [parallel-infer-symbol]: 2.30037e-06 [pre_auto_parallel]: 2.824e-05 [insert-virtual-dataset]: 2.94996e-06 [parallel-infer-symbol-second]: 4.69852e-07 [dataset_repeat_opt]: 1.46963e-06 [pipeline_split]: 1.71037e-06 [optimize]: 0.00782227, [52] [py_interpret_to_execute]: 1.59596e-05 [rewriter_before_opt_a]: 4.04003e-05 [opt_a]: 0.00598553, [2] [Cycle 1]: 0.00181119, [43] [expand_dump_flag]: 4.19002e-06 [switch_simplify]: 3.33902e-05 [loop_unroll]: 1.59796e-05 [a_1]: 0.00040685 [recompute_prepare]: 1.06399e-05 [updatestate_depend_eliminate]: 9.54e-06 [updatestate_assign_eliminate]: 6.04987e-06 [updatestate_loads_eliminate]: 8.36002e-06 [parameter_eliminate]: 3.65963e-06 [a_2]: 0.00014243 [accelerated_algorithm]: 1.013e-05 [shard]: 1.95997e-06 [meta_shard_fg_expand]: 4.02983e-06 [shard_inline]: 1.06599e-05 [auto_parallel]: 1.22902e-05 [parallel]: 8.02008e-06 [flash_sp]: 1.21803e-05 [merge_comm]: 8.97003e-06 [allreduce_fusion]: 6.73998e-06 [matmul_add_comm_reduction]: 1.20699e-05 [allreduce_slice_to_reducescatter]: 4.69852e-07 [virtual_shard_identity]: 1.11498e-05 [virtual_dataset]: 9.84967e-06 [get_grad_eliminate_]: 9.24012e-06 [virtual_output]: 9.37982e-06 [merge_forward]: 7.748e-05 [cell_reuse_recompute_pass]: 2.07033e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.19699e-05 [before_grad]: 1.81501e-05 [inplace_validation]: 6.3302e-06 [meta_fg_expand]: 7.05989e-06 [inplace_validation_after_expand]: 7.87992e-06 [flash_sp_send_recv_attached]: 5.64009e-06 [receive_attached]: 2.78e-06 [after_resolve]: 1.413e-05 [a_after_grad]: 1.56299e-05 [special_op_eliminate]: 9.05013e-06 [renormalize]: 0.00047073 [add_forward_monad_depend]: 3.58978e-06 [auto_monad_grad]: 1.95997e-06 [auto_monad_eliminator]: 3.576e-05 [cse]: 3.677e-05 [a_3]: 6.95996e-05 [Cycle 2]: 0.0009111, [43] [expand_dump_flag]: 1.05985e-06 [switch_simplify]: 1.09202e-05 [loop_unroll]: 9.68995e-06 [a_1]: 0.00024772 [recompute_prepare]: 9.09995e-06 [updatestate_depend_eliminate]: 6.35022e-06 [updatestate_assign_eliminate]: 5.12972e-06 [updatestate_loads_eliminate]: 5.79003e-06 [parameter_eliminate]: 1.4198e-06 [a_2]: 0.00012569 [accelerated_algorithm]: 9.86038e-06 [shard]: 1.35973e-06 [meta_shard_fg_expand]: 3.09013e-06 [shard_inline]: 9.58005e-06 [auto_parallel]: 1.171e-05 [parallel]: 3.85009e-06 [flash_sp]: 3.73973e-06 [merge_comm]: 7.13021e-06 [allreduce_fusion]: 5.69969e-06 [matmul_add_comm_reduction]: 8.32975e-06 [allreduce_slice_to_reducescatter]: 3.1013e-07 [virtual_shard_identity]: 1.04103e-05 [virtual_dataset]: 9.1698e-06 [get_grad_eliminate_]: 8.82009e-06 [virtual_output]: 8.82987e-06 [merge_forward]: 4.92996e-06 [cell_reuse_recompute_pass]: 1.95997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.929e-05 [before_grad]: 1.60099e-05 [inplace_validation]: 4.57978e-06 [meta_fg_expand]: 5.59026e-06 [inplace_validation_after_expand]: 5.72018e-06 [flash_sp_send_recv_attached]: 1.15018e-06 [receive_attached]: 8.00006e-07 [after_resolve]: 1.19e-05 [a_after_grad]: 1.46101e-05 [special_op_eliminate]: 8.78982e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.05985e-06 [auto_monad_grad]: 1.18976e-06 [auto_monad_eliminator]: 2.03298e-05 [cse]: 2.16202e-05 [a_3]: 5.85597e-05 [py_interpret_to_execute_after_opt_a]: 9.79006e-06 [slice_cell_reuse_recomputed_activation]: 2.27988e-06 [rewriter_after_opt_a]: 0.00015078 [convert_after_rewriter]: 9.85991e-06 [order_py_execute_after_rewriter]: 7.20005e-06 [opt_b]: 0.00028383, [1] [Cycle 1]: 0.00027781, [7] [b_1]: 0.00019568 [b_2]: 1.19801e-05 [updatestate_depend_eliminate]: 5.87013e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.63031e-06 [renormalize]: 3.09665e-07 [cse]: 2.089e-05 [optimize_parallel_all_gather_comm]: 8.68971e-06 [overlap_param_gather]: 1.30991e-06 [cconv]: 2.58698e-05 [loop_unroll]: 0.00049211 [opt_after_cconv]: 0.00015088, [1] [Cycle 1]: 0.00014483, [7] [c_1]: 6.31497e-05 [parameter_eliminate]: 2.56998e-06 [updatestate_depend_eliminate]: 9.37004e-06 [updatestate_assign_eliminate]: 4.97e-06 [updatestate_loads_eliminate]: 5.98002e-06 [cse]: 2.39001e-05 [renormalize]: 4.10248e-07 [remove_dup_value]: 1.62101e-05 [tuple_transform]: 8.39303e-05, [1] [Cycle 1]: 7.908e-05, [2] [d_1]: 6.83102e-05 [renormalize]: 3.1013e-07 [partial_unused_args_eliminate]: 2.22027e-06 [add_cache_embedding]: 1.41999e-05 [add_recomputation]: 6.80196e-05 [cse_after_recomputation]: 2.97199e-05, [1] [Cycle 1]: 2.45697e-05, [1] [cse]: 1.92798e-05 [environ_conv]: 7.98982e-06 [swap_dp_allreduce_reducescatter]: 8.04989e-06 [bias_add_comm_swap]: 2.4098e-06 [label_micro_interleaved_index]: 2.04984e-06 [label_fine_grained_interleaved_index]: 2.36975e-06 [merge_cast_opt]: 1.3602e-06 [slice_recompute_activation]: 2.24961e-06 [micro_interleaved_order_control]: 1.87987e-06 [assign_add_opt]: 8.38982e-06 [ForceFp32Comm]: 9.79751e-07 [remove_cast_before_assign_add]: 1.05007e-06 [full_micro_interleaved_order_control]: 2.52994e-06 [reorder_send_recv_between_fp_bp]: 2.21003e-06 [comm_op_add_attrs]: 1.37975e-06 [add_comm_op_reuse_tag]: 1.30013e-06 [interleave_split_concat_branches]: 9.59728e-07 [interleave_parallel_branches]: 9.69972e-07 [overlap_opt_shard_in_pipeline]: 1.43005e-06 [overlap_opt_shard_grad_in_pipeline]: 2.35997e-06 [control_data_broadcast_order]: 1.24006e-06 [grouped_pairwise_exchange_alltoall]: 1.3602e-06 [offloading_packed_experts]: 1.39978e-06 [overlap_recompute_and_grad_model_parallel]: 2.59001e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.2003e-07 [overlap_recompute_allgather_and_fa_grad]: 1.53016e-06 [overlap_grad_ring_attention]: 2.04006e-06 [overlap_grad_flash_sp]: 1.75401e-05 [begin_end_overlap_inline]: 8.00006e-07 [split_matmul_comm_elemetwise]: 2.65008e-06 [split_layernorm_comm]: 1.91014e-06 [handle_group_info]: 1.28988e-06 [symbol_engine_optimizer]: 9.76999e-05, [1] [Cycle 1]: 9.30596e-05, [6] [build]: 4.5104e-06 [elim_shapecalc]: 1.34003e-05 [elim_not_effective]: 2.00099e-05 [opt_reshape]: 1.03097e-05 [fold_const_symbol]: 1.66502e-05 [renormalize]: 3.70201e-07 [pipeline_parallel_scheduler]: 1.97021e-06 [auto_monad_reorder]: 3.27099e-05 [get_jit_bprop_graph]: 5.50412e-07 [rewriter_after_jit_bprop_graph]: 4.89876e-07 [eliminate_special_op_node]: 0.00059126 [distribtued_split]: 4.59598e-05 [validate]: 3.82401e-05 [task_emit]: 0.0700311 [execute]: 1.37896e-05 Sums bootstrap : 0.000329s : 0.42% type_inference : 0.002599s : 3.35% auto_monad : 0.000145s : 0.19% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000040s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000044s : 0.06% optimize.opt_a.loop_unroll : 0.000026s : 0.03% optimize.opt_a.a_1 : 0.000655s : 0.84% optimize.opt_a.recompute_prepare : 0.000020s : 0.03% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000268s : 0.35% optimize.opt_a.accelerated_algorithm : 0.000020s : 0.03% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000020s : 0.03% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000019s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000082s : 0.11% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000041s : 0.05% optimize.opt_a.before_grad : 0.000034s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000013s : 0.02% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.04% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000471s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000056s : 0.07% optimize.opt_a.cse : 0.000058s : 0.08% optimize.opt_a.a_3 : 0.000128s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000151s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000196s : 0.25% optimize.opt_b.b_2 : 0.000012s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000021s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000026s : 0.03% optimize.loop_unroll : 0.000492s : 0.63% optimize.opt_after_cconv.c_1 : 0.000063s : 0.08% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000016s : 0.02% optimize.tuple_transform.d_1 : 0.000068s : 0.09% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000068s : 0.09% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000008s : 0.01% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000001s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000020s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000591s : 0.76% distribtued_split : 0.000046s : 0.06% validate : 0.000038s : 0.05% task_emit : 0.070031s : 90.21% execute : 0.000014s : 0.02% Time group info: ------[substitution.] 0.000151 63 5.23% : 0.000008s : 2: substitution.depend_value_elim 2.16% : 0.000003s : 5: substitution.elim_not_effective 1.93% : 0.000003s : 5: substitution.fold_const_symbol 5.85% : 0.000009s : 6: substitution.graph_param_transform 47.74% : 0.000072s : 1: substitution.inline 4.57% : 0.000007s : 10: substitution.j_node_and_user_rematch 3.42% : 0.000005s : 6: substitution.load_eliminater 2.65% : 0.000004s : 2: substitution.reduce_all_const_elim 7.15% : 0.000011s : 10: substitution.remove_not_recompute_node 2.58% : 0.000004s : 2: substitution.replace_old_param 8.76% : 0.000013s : 6: substitution.updatestate_pure_node_eliminater 7.94% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002567 2 87.84% : 0.002255s : 1: type_inference.infer 12.16% : 0.000312s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000071 1 100.00% : 0.000071s : 1: match.inline ------[predicate.] 0.000268 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.11% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.17% : 0.000006s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.33% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.84% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.80% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.93% : 0.000005s : 31: predicate.environ_get_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000004s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.84% : 0.000002s : 12: predicate.incorporate_call 0.72% : 0.000002s : 12: predicate.incorporate_call_switch 6.05% : 0.000016s : 63: predicate.inline 1.19% : 0.000003s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000003s : 12: predicate.less_batch_normalization 1.84% : 0.000005s : 25: predicate.list_to_tuple_eliminator_ 2.44% : 0.000007s : 38: predicate.load_eliminater 1.22% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.27% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.35% : 0.000004s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.98% : 0.000003s : 12: predicate.reduce_all_const_elim 0.99% : 0.000003s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.53% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 0.96% : 0.000003s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.85% : 0.000002s : 12: predicate.shard_identity_eliminate 1.40% : 0.000004s : 18: predicate.special_op_eliminate 1.03% : 0.000003s : 12: predicate.specialize_transform 1.13% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.31% : 0.000006s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.84% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.17% : 0.000011s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.84% : 0.000005s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.74% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.52% : 0.000007s : 37: predicate.tuple_list_set_item_eliminator 1.72% : 0.000005s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.43% : 0.000009s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.49% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000173 4 10.80% : 0.000019s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.20% : 0.000155s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091920 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.00% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000073s : 1: add_recomputation 0.01% : 0.000012s : 1: assign_add_opt 0.17% : 0.000160s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000356s : 1: bootstrap 0.03% : 0.000030s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.04% : 0.000033s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000054s : 1: distribtued_split 0.66% : 0.000605s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.03% : 0.000023s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000502s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000018s : 1: opt.transform.loop_unroll_optimizer 1.44% : 0.001325s : 80: opt.transform.opt_a 0.07% : 0.000061s : 1: opt.transform.opt_after_cconv 0.20% : 0.000184s : 27: opt.transform.opt_b 0.07% : 0.000066s : 1: opt.transform.opt_trans_graph 0.04% : 0.000041s : 3: opt.transform.special_op_eliminate 0.06% : 0.000056s : 4: opt.transform.symbol_engine_opt 6.52% : 0.005990s : 1: opt_a 0.17% : 0.000155s : 1: opt_after_cconv 0.31% : 0.000287s : 1: opt_b 8.52% : 0.007831s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000035s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.02% : 0.000020s : 1: remove_dup_value 0.28% : 0.000253s : 1: renormalize.infer 0.23% : 0.000211s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000157s : 1: rewriter_after_opt_a 0.05% : 0.000045s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000101s : 1: symbol_engine_optimizer 76.22% : 0.070066s : 1: task_emit 0.10% : 0.000088s : 1: tuple_transform 2.85% : 0.002617s : 1: type_inference 0.08% : 0.000075s : 1: validate distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. ...... =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 =============================== warnings summary ============================================================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") =============================== warnings summary ===============================/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad"). /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81-- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81-- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") ======================= 1 passed, 18 warnings in 56.16s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") ======================= 1 passed, 18 warnings in 55.87s ========================/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50======================= 1 passed, 18 warnings in 55.56s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 -- Docs: https://docs.pytest.org/en/latest/warnings.html-- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") ======================= 1 passed, 18 warnings in 56.02s =============================================== 1 passed, 18 warnings in 55.40s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143======================= 1 passed, 18 warnings in 56.30s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") ./home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 55.72s ======================== =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 55.26s ======================== [WARNING] DEVICE(32856,ffffa9615c10,python3.7):2025-02-07-15:51:00.124.821 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x46bd3f20 is not exist. [WARNING] DEVICE(32835,ffff93ec6c10,python3.7):2025-02-07-15:51:00.214.791 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x415ac930 is not exist. [WARNING] DEVICE(32881,ffff9da47c10,python3.7):2025-02-07-15:51:00.246.551 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x29bd7a20 is not exist. [WARNING] DEVICE(32903,ffff80a30c10,python3.7):2025-02-07-15:51:00.250.255 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x16830f20 is not exist. [WARNING] DEVICE(32811,ffff81c64c10,python3.7):2025-02-07-15:51:02.316.754 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x291d3ce0 is not exist. [WARNING] DEVICE(32868,ffffaff3fc10,python3.7):2025-02-07-15:51:02.346.488 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4abb94b0 is not exist. [WARNING] DEVICE(32826,ffff93305c10,python3.7):2025-02-07-15:51:02.413.212 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x52ef98e0 is not exist. [WARNING] DEVICE(32916,ffffaa25ac10,python3.7):2025-02-07-15:51:02.441.728 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x51044f80 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 82.60s (0:01:22) =================== ff8c39f2e51611efac92c4447d93fe45/pass/test_all_test_hccl_all_to_all_v.log0000644000175400017540000007414414751343157025476 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collected 1 item test_all.py ============================= test session starts =========================================================== test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sinkrootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... collecting ... [WARNING] DISTRIBUTED(184413,ffffad3abc10,python3.7):2025-02-07-15:55:51.850.149 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(184414,ffff93a15c10,python3.7):2025-02-07-15:55:52.073.213 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(184413,fffee17f20f0,python3.7):2025-02-07-15:55:52.885.456 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(184414,fffec7fef0f0,python3.7):2025-02-07-15:55:52.885.454 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(184413,fffe9affd0f0,python3.7):2025-02-07-15:55:52.930.888 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(184414,fffec77ee0f0,python3.7):2025-02-07-15:55:53.032.979 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(184413,fffe9affd0f0,python3.7):2025-02-07-15:55:53.338.312 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(184413,fffee17f20f0,python3.7):2025-02-07-15:55:53.338.618 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group  collecting 3 items  collected 3 items  test_all_to_all_v.py [WARNING] DEVICE(184414,fffec77ee0f0,python3.7):2025-02-07-15:55:53.515.757 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(184414,fffec7fef0f0,python3.7):2025-02-07-15:55:53.516.041 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group  collecting 3 items  collected 3 items  test_all_to_all_v.py [WARNING] PROFILER(184413,fffe99ffb0f0,python3.7):2025-02-07-15:55:53.595.418 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(184414,fffec6fed0f0,python3.7):2025-02-07-15:55:53.750.958 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory ..... =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 3 passed, 18 warnings in 51.09s ======================== . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 3 passed, 18 warnings in 51.19s ======================== [WARNING] DEVICE(184414,ffff93a15c10,python3.7):2025-02-07-15:56:03.261.000 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x523bf240 is not exist. [WARNING] DEVICE(184414,ffff93a15c10,python3.7):2025-02-07-15:56:03.262.325 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0xfffe714d2a50 is not exist. [WARNING] DEVICE(184413,ffffad3abc10,python3.7):2025-02-07-15:56:05.530.674 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x4ce3b440 is not exist. [WARNING] DEVICE(184413,ffffad3abc10,python3.7):2025-02-07-15:56:05.532.000 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0xfffe894d29e0 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 75.19s (0:01:15) =================== ff8c39f2e51611efac92c4447d93fe45/pass/test_entry_full_ps_lenet_test_full_ps_lenet_ascend.log0000644000175400017540000000102114751343157031512 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/ps/full_ps, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: anyio-3.7.1, timeout-2.1.0, repeat-0.9.1, ordering-0.6, forked-1.1.3, xdist-1.32.0 collected 1 item test_entry_full_ps_lenet.py . ========================= 1 passed in 72.59s (0:01:12) ========================= ././@LongLink0000644000000000000000000000016000000000000011600 Lustar rootrootff8c39f2e51611efac92c4447d93fe45/pass/test_pynative_resnet50_ascend_8p_test_pynative_resnet50_ascend_8p_mpi.logff8c39f2e51611efac92c4447d93fe45/pass/test_pynative_resnet50_ascend_8p_test_pynative_resnet50_ascend0000644000175400017540000000300114751343157032717 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/pynative/network, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: timeout-2.1.0, ordering-0.6, anyio-3.7.1, forked-1.1.3, xdist-1.32.0 collected 1 item test_pynative_resnet50_ascend_8p.py ======================= 1 passed, 19 warnings in 31.75s ======================== ======================= 1 passed, 19 warnings in 31.95s ======================== ======================= 1 passed, 19 warnings in 32.12s ======================== ======================= 1 passed, 19 warnings in 32.26s ======================== ======================= 1 passed, 19 warnings in 32.38s ======================== ======================= 1 passed, 19 warnings in 32.43s ======================== ======================= 1 passed, 19 warnings in 32.57s ======================== ======================= 1 passed, 19 warnings in 32.87s ======================== . ============================== 1 passed in 44.42s ============================== ff8c39f2e51611efac92c4447d93fe45/pass/test_all_test_hccl_broadcast.log0000644000175400017540000007446014751343157025012 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collected 1 item test_all.py ============================= test session starts ============================== ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sinkrootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/nontask_sink plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collecting ... collecting ... [WARNING] ME(186033:281473222130704,MainProcess):2025-02-07-15:56:36.255.481 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] ME(186032:281473041505296,MainProcess):2025-02-07-15:56:36.323.370 [mindspore/context.py:1326] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. [WARNING] DISTRIBUTED(186033,ffff976b3c10,python3.7):2025-02-07-15:57:16.885.643 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(186032,ffff8ca71c10,python3.7):2025-02-07-15:57:16.961.102 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1}, async: 0, submit_now: 1 [WARNING] DISTRIBUTED(186033,fffecbfef0f0,python3.7):2025-02-07-15:57:17.376.615 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DISTRIBUTED(186032,fffe83fff0f0,python3.7):2025-02-07-15:57:17.376.617 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(186032,fffe81ffb0f0,python3.7):2025-02-07-15:57:17.454.488 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(186033,fffecb7ee0f0,python3.7):2025-02-07-15:57:17.524.046 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DEVICE(186032,fffe81ffb0f0,python3.7):2025-02-07-15:57:17.815.144 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(186032,fffe83fff0f0,python3.7):2025-02-07-15:57:17.815.473 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(186033,fffecb7ee0f0,python3.7):2025-02-07-15:57:18.018.710 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(186033,fffecbfef0f0,python3.7):2025-02-07-15:57:18.019.027 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PROFILER(186032,fffe80ff90f0,python3.7):2025-02-07-15:57:18.068.581 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [WARNING] PROFILER(186033,fffecafed0f0,python3.7):2025-02-07-15:57:18.244.287 [mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_symbol.cc:30] LoadMstxApiSymbol] Dlopen /usr/local/Ascend/latest/lib64/libms_tools_ext.so failed! /usr/local/Ascend/latest/lib64/libms_tools_ext.so: cannot open shared object file: No such file or directory [[0. 1. 2. 3.] [4. 5. 6. 7.]]  collecting 2 items  collected 2 items  test_broadcast.py [[0. 1. 2. 3.] [4. 5. 6. 7.]]  collecting 2 items  collected 2 items  test_broadcast.py [[0. 1. 2. 3.] [4. 5. 6. 7.]] .[[0. 1. 2. 3.] [4. 5. 6. 7.]] .[[0. 1. 2. 3.] [4. 5. 6. 7.]] . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 2 passed, 18 warnings in 48.37s ======================== [[0. 1. 2. 3.] [4. 5. 6. 7.]] . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 2 passed, 18 warnings in 48.46s ======================== [WARNING] DEVICE(186033,ffff976b3c10,python3.7):2025-02-07-15:57:25.216.946 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2fd3cfa0 is not exist. [WARNING] DEVICE(186032,ffff8ca71c10,python3.7):2025-02-07-15:57:26.080.547 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2f3a6c20 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 70.23s (0:01:10) =================== ff8c39f2e51611efac92c4447d93fe45/pass/test_entry_msrun_test_msrun_with_correct_hostname.log0000644000175400017540000002102114751343157031473 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/msrun, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collected 1 item test_entry_msrun.py The hostname of this node is ascend85, ip address is 8.92.9.85. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 43.71s ======================== ff8c39f2e51611efac92c4447d93fe45/pass/test_remove_redundancy_test_remove_redundancy_1_1.log0000644000175400017540001154775714751343157031211 0ustar jenkinsjenkins============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load, inifile: /home/jenkins/sault/virtual_test/virtualenv_0013/sault/config/pytest.ini plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 collected 1 item test_remove_redundancy.py [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:35.749.597 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:0, log file:worker_0.log. Environment variable [RANK_ID] is exported. [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:35.894.762 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:1, log file:worker_1.log. Environment variable [RANK_ID] is exported. [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:36.517.00 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:2, log file:worker_2.log. Environment variable [RANK_ID] is exported. [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:36.222.199 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:3, log file:worker_3.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:36.403.262 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:4, log file:worker_4.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:36.589.536 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:5, log file:worker_5.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:36.757.100 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:6, log file:worker_6.log. Environment variable [RANK_ID] is exported. [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:36.909.358 [mindspore/parallel/cluster/process_entity/_api.py:264] Start worker process with rank id:7, log file:worker_7.log. Environment variable [RANK_ID] is exported. ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] ME(169167:281472893824016,MainProcess):2025-02-07-15:53:37.709.20 [mindspore/parallel/cluster/process_entity/_api.py:223] Distributed job is spawned. Waiting all processes to exit... ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 ============================= test session starts ============================== platform linux -- Python 3.7.5, pytest-5.4.3, py-1.8.1, pluggy-0.13.1 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/export_and_load plugins: ordering-0.6, anyio-3.7.1, timeout-2.1.0, xdist-1.32.0, forked-1.1.3 [WARNING] DISTRIBUTED(169242,ffff165a40f0,python3.7):2025-02-07-15:53:41.813.546 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50316 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:41.813.549 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50316, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:41.813.722 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50318, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169242,ffff175a60f0,python3.7):2025-02-07-15:53:41.813.752 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50318 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:41.813.770 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:41.992.393 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50322, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169260,ffff0bfff0f0,python3.7):2025-02-07-15:53:41.992.393 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50322 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:41.992.474 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(169251,fffefb7fe0f0,python3.7):2025-02-07-15:53:42.125.578 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50324 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:42.125.572 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50324, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:42.125.722 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50326, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169251,ffff00e250f0,python3.7):2025-02-07-15:53:42.125.749 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50326 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:42.125.763 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:42.314.717 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:42.386.486 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50328, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169273,ffff152540f0,python3.7):2025-02-07-15:53:42.386.487 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50328 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:42.386.583 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:42.401.399 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50330, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169285,ffff16d2c0f0,python3.7):2025-02-07-15:53:42.401.405 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50330 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:42.401.448 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:42.492.710 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50332, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:42.492.741 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(169260,ffff112510f0,python3.7):2025-02-07-15:53:42.492.738 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50332 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:42.626.388 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:42.672.884 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50334, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169297,ffff1b7fe0f0,python3.7):2025-02-07-15:53:42.672.885 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50334 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:42.672.980 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:42.814.886 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:42.886.822 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50336, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:42.886.859 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(169273,ffff162560f0,python3.7):2025-02-07-15:53:42.886.853 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50336 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:42.901.651 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50338, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:42.901.679 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(169285,ffff17d2e0f0,python3.7):2025-02-07-15:53:42.901.680 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50338 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:42.908.476 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50340, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169309,ffff03fff0f0,python3.7):2025-02-07-15:53:42.908.476 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50340 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:42.908.584 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:42.993.155 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:43.047.469 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 19 source: 127.0.0.1:50342, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169323,ffff022980f0,python3.7):2025-02-07-15:53:43.047.469 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50342 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:43.047.535 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 1 [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:43.126.502 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:43.173.302 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50344, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169297,ffff208130f0,python3.7):2025-02-07-15:53:43.173.322 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50344 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:43.173.337 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:43.315.039 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:43.387.258 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:43.401.986 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:43.408.753 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50346, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:43.408.793 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(169309,ffff096d60f0,python3.7):2025-02-07-15:53:43.408.793 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50346 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:43.493.246 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:43.547.710 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:485] Connect] Connection 20 source: 127.0.0.1:50348, destination: 127.0.0.1:8118 [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:43.547.738 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:494] Connect] Waiting for the state of the connection to 127.0.0.1:8118 to be connected...Retry number: 2 [WARNING] DISTRIBUTED(169323,ffff0329a0f0,python3.7):2025-02-07-15:53:43.547.738 [mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.cc:79] ConnectedEventHandler] Connection from 127.0.0.1:50348 to 127.0.0.1:8118 is successfully created. System errno: Success [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:43.626.631 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:43.673.714 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:43.815.146 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:43.887.345 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:43.902.069 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:43.909.255 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:43.993.331 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:44.048.098 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(1/1200). [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:44.126.737 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:44.173.804 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:44.315.230 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(5/1200). [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:44.387.422 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:44.402.145 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(3/1200). [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:44.409.355 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(2/1200). [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:44.493.414 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:245] BuildCluster] Topology build timed out., retry(4/1200). [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:44.548.220 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:44.548.255 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 7 rank id: 7 [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:44.626.839 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:44.626.870 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 1 rank id: 1 [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:44.673.902 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:44.673.931 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 5 rank id: 5 [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:44.815.333 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:44.815.365 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 0 rank id: 0 [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:44.887.530 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:44.887.560 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 3 rank id: 3 [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:44.902.235 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:44.902.261 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 4 rank id: 4 [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:44.909.514 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:44.909.565 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 6 rank id: 6 [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:44.993.525 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:248] BuildCluster] Cluster is successfully initialized. [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:44.993.560 [mindspore/ccsrc/distributed/cluster/cluster_context.cc:355] PostProcess] This node 2 rank id: 2 [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:50.249.964 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(169251,ffff805f5c10,python3.7):2025-02-07-15:53:50.252.693 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(169251,fffe737fe0f0,python3.7):2025-02-07-15:53:50.258.209 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 distribute network. collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. distribute network train. [WARNING] DEVICE(169251,fffe737fe0f0,python3.7):2025-02-07-15:53:50.759.773 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 2 [WARNING] DEVICE(169251,fffe737fe0f0,python3.7):2025-02-07-15:53:51.261.565 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 397/400, sleep 2 [WARNING] DEVICE(169251,fffe737fe0f0,python3.7):2025-02-07-15:53:51.763.436 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 396/400, sleep 1 [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:51.848.411 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(169323,ffff82a65c10,python3.7):2025-02-07-15:53:51.850.620 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(169323,fffe7e7fc0f0,python3.7):2025-02-07-15:53:51.859.318 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 distribute network. collected 1 item remove_redundancy.py [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:52.176.476 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 distribute network shard. [WARNING] DISTRIBUTED(169260,ffff90a16c10,python3.7):2025-02-07-15:53:52.179.225 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(169260,fffe94ff90f0,python3.7):2025-02-07-15:53:52.187.009 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:52.201.628 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:52.203.494 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(169297,fffe937fe0f0,python3.7):2025-02-07-15:53:52.211.160 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 1 distribute network. distribute network create dataset. distribute network. [WARNING] DEVICE(169251,fffe737fe0f0,python3.7):2025-02-07-15:53:52.268.616 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 395/400, sleep 2 [WARNING] DEVICE(169323,fffe7e7fc0f0,python3.7):2025-02-07-15:53:52.362.982 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 distribute network train. collected 1 item collected 1 item remove_redundancy.py distribute network shard. distribute network create dataset. remove_redundancy.py [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:52.579.735 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(169273,ffff95a23c10,python3.7):2025-02-07-15:53:52.582.576 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(169273,fffe997fa0f0,python3.7):2025-02-07-15:53:52.593.088 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 399/400, sleep 2 distribute network shard. distribute network. distribute network create dataset. [WARNING] DEVICE(169260,fffe94ff90f0,python3.7):2025-02-07-15:53:52.689.335 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 1 distribute network train. [WARNING] DEVICE(169297,fffe937fe0f0,python3.7):2025-02-07-15:53:52.714.284 [mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.cc:240] QueryUniqueID] Retry to lookup the unique id for group hccl_world_group from the meta server node...Retry time: 398/400, sleep 2 [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:52.730.320 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(169242,ffff96d80c10,python3.7):2025-02-07-15:53:52.733.169 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(169242,fffe8affd0f0,python3.7):2025-02-07-15:53:52.764.760 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169242,fffe88ff90f0,python3.7):2025-02-07-15:53:52.768.761 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(169251,fffe737fe0f0,python3.7):2025-02-07-15:53:52.772.021 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group distribute network train. [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:52.770.804 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(169285,ffff97501c10,python3.7):2025-02-07-15:53:52.772.320 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DEVICE(169251,fffe2bfff0f0,python3.7):2025-02-07-15:53:52.777.597 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:52.778.349 [mindspore/ccsrc/distributed/collective/collective_manager.cc:332] CreateCommunicationGroup] Start to create communication group: hccl_world_group [const vector]{0, 1, 2, 3, 4, 5, 6, 7}, async: 1, submit_now: 1 [WARNING] DISTRIBUTED(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:52.780.287 [mindspore/ccsrc/distributed/collective/collective_manager.cc:381] CreateCommunicationGroup] This group's communicator is async created hccl_world_group [WARNING] DISTRIBUTED(169285,fffe8effd0f0,python3.7):2025-02-07-15:53:52.782.606 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169285,fffe8e7fc0f0,python3.7):2025-02-07-15:53:52.788.717 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(169309,fffe88ff90f0,python3.7):2025-02-07-15:53:52.791.467 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169309,fffe6bfff0f0,python3.7):2025-02-07-15:53:52.795.775 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 collected 1 item distribute network. distribute network. [WARNING] DISTRIBUTED(169323,fffe7e7fc0f0,python3.7):2025-02-07-15:53:52.866.657 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169323,fffdab7fe0f0,python3.7):2025-02-07-15:53:52.871.034 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 remove_redundancy.py distribute network. distribute network shard. distribute network create dataset. collected 1 item remove_redundancy.py distribute network shard. [WARNING] DISTRIBUTED(169273,fffe997fa0f0,python3.7):2025-02-07-15:53:53.096.031 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169273,fffe98ff90f0,python3.7):2025-02-07-15:53:53.099.790 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network train. collected 1 item distribute network create dataset. collected 1 item [WARNING] DISTRIBUTED(169260,fffe94ff90f0,python3.7):2025-02-07-15:53:53.191.211 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169260,fffdc5ffb0f0,python3.7):2025-02-07-15:53:53.193.204 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 [WARNING] DISTRIBUTED(169297,fffe937fe0f0,python3.7):2025-02-07-15:53:53.217.047 [mindspore/ccsrc/distributed/collective/collective_manager.cc:777] CreateDeviceCommunicator] Begin initialize communication group on the device side: hccl_world_group remove_redundancy.py [WARNING] DEVICE(169297,fffddcff90f0,python3.7):2025-02-07-15:53:53.219.812 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:140] InitializeByRootInfoConfig] Start to initialize communicator by HcclCommInitRootInfo for hccl_world_group, hcclBufferSize is 200 MB. hcclDeterministic is 0 distribute network train. distribute network shard. remove_redundancy.py distribute network shard. distribute network create dataset. distribute network create dataset. distribute network train. distribute network train. [WARNING] DEVICE(169242,fffe88ff90f0,python3.7):2025-02-07-15:53:53.482.920 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(169242,fffe8affd0f0,python3.7):2025-02-07-15:53:53.483.341 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169285,fffe8e7fc0f0,python3.7):2025-02-07-15:53:53.501.732 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(169285,fffe8effd0f0,python3.7):2025-02-07-15:53:53.502.109 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169309,fffe6bfff0f0,python3.7):2025-02-07-15:53:53.507.786 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(169309,fffe88ff90f0,python3.7):2025-02-07-15:53:53.509.349 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169273,fffe98ff90f0,python3.7):2025-02-07-15:53:53.544.146 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(169273,fffe997fa0f0,python3.7):2025-02-07-15:53:53.544.423 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169323,fffdab7fe0f0,python3.7):2025-02-07-15:53:53.590.310 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(169323,fffe7e7fc0f0,python3.7):2025-02-07-15:53:53.590.578 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169260,fffdc5ffb0f0,python3.7):2025-02-07-15:53:53.596.414 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(169260,fffe94ff90f0,python3.7):2025-02-07-15:53:53.596.695 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169251,fffe2bfff0f0,python3.7):2025-02-07-15:53:53.636.970 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(169251,fffe737fe0f0,python3.7):2025-02-07-15:53:53.638.656 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] DEVICE(169297,fffddcff90f0,python3.7):2025-02-07-15:53:53.680.881 [mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm/ascend_communication_group.cc:158] InitializeByRootInfoConfig] End to initialize communicator by HcclCommInitRootInfo for hccl_world_group [WARNING] DISTRIBUTED(169297,fffe937fe0f0,python3.7):2025-02-07-15:53:53.681.236 [mindspore/ccsrc/distributed/collective/collective_manager.cc:788] CreateDeviceCommunicator] End initialize communication group on the device side: hccl_world_group [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:53:54.044.878 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:53:54.556.565 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:54.637.254 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:53:54.641.540 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:53:54.661.969 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:53:54.667.867 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:53:54.716.145 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:54.739.597 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:53:54.765.776 [mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.cc:42] Init] Now, Dataset broadcast optimize pass only support O0 and O1 jit level. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:53:55.154.765 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:53:55.252.053 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:53:55.271.173 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:53:55.274.625 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:53:55.312.551 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:53:55.338.250 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:53:55.352.380 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 14.5405, [21] [bootstrap]: 0.068945 [type_inference]: 1.00696 [auto_monad]: 0.00197372 [graph_reusing]: 2.262e-05 [inline]: 0.0435112, [2] [rewriter_before_opt_a]: 0.00155247 [a1a2]: 0.0418873, [2] [Cycle 1]: 0.0284774, [11] [expand_dump_flag]: 4.219e-05 [switch_simplify]: 0.00114909 [loop_unroll]: 0.00066939 [a_1]: 0.0222229 [recompute_prepare]: 0.00016638 [updatestate_depend_eliminate]: 0.00037424 [updatestate_assign_eliminate]: 0.00011339 [updatestate_loads_eliminate]: 0.00020512 [parameter_eliminate]: 5.56e-06 [a_2]: 0.00321186 [parallel_inline_pass]: 0.00010052 [Cycle 2]: 0.00541499, [11] [expand_dump_flag]: 2.29001e-06 [switch_simplify]: 9.308e-05 [loop_unroll]: 9.219e-05 [a_1]: 0.0031452 [recompute_prepare]: 9.847e-05 [updatestate_depend_eliminate]: 7.88099e-05 [updatestate_assign_eliminate]: 5.935e-05 [updatestate_loads_eliminate]: 6.496e-05 [parameter_eliminate]: 2.90002e-06 [a_2]: 0.0016053 [parallel_inline_pass]: 9.937e-05 [parallel-infer-symbol]: 0.00023736 [pre_auto_parallel]: 0.00010232 [insert-virtual-dataset]: 0.00143769 [parallel-infer-symbol-second]: 1.89e-06 [dataset_repeat_opt]: 9.50199e-05 [pipeline_split]: 9.289e-05 [optimize]: 0.597665, [52] [py_interpret_to_execute]: 0.00014527 [rewriter_before_opt_a]: 0.00027797 [opt_a]: 0.583267, [3] [Cycle 1]: 0.503694, [46] [expand_dump_flag]: 1.61002e-06 [switch_simplify]: 0.00012336 [loop_unroll]: 0.00011415 [a_1]: 0.00338394 [recompute_prepare]: 0.00010259 [updatestate_depend_eliminate]: 0.00021074 [updatestate_assign_eliminate]: 6.61301e-05 [updatestate_loads_eliminate]: 6.55201e-05 [parameter_eliminate]: 2.68e-06 [a_2]: 0.0015463 [accelerated_algorithm]: 0.00021798 [shard]: 2.14006e-06 [meta_shard_fg_expand]: 5.494e-05 [shard_inline]: 0.00010569 [auto_parallel]: 7.255e-05 [parallel]: 0.01548 [flash_sp]: 5.406e-05 [merge_comm]: 0.00012633 [allreduce_fusion]: 7.604e-05 [matmul_add_comm_reduction]: 9.84501e-05 [allreduce_slice_to_reducescatter]: 4.00003e-07 [virtual_shard_identity]: 0.00012827 [virtual_dataset]: 0.00016264 [get_grad_eliminate_]: 0.000116 [virtual_output]: 0.0001123 [merge_forward]: 7.579e-05 [cell_reuse_recompute_pass]: 2.84996e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020746 [before_grad]: 0.00021722 [inplace_validation]: 0.00012477 [parallel_renormalize]: 0.0220451 [update_top_fg]: 6.49947e-07 [cast_eliminate]: 0.00014638 [meta_fg_expand]: 0.265595 [inplace_validation_after_expand]: 0.00153793 [flash_sp_send_recv_attached]: 0.0012011 [receive_attached]: 8.449e-05 [after_resolve]: 0.00197662 [a_after_grad]: 0.0039562 [special_op_eliminate]: 0.00181773 [renormalize]: 0.1505 [add_forward_monad_depend]: 0.00035843 [auto_monad_grad]: 0.0002104 [auto_monad_eliminator]: 0.00180272 [cse]: 0.00417578 [a_3]: 0.0246704 [Cycle 2]: 0.0678808, [46] [expand_dump_flag]: 5.14199e-05 [switch_simplify]: 0.00180817 [loop_unroll]: 0.00152972 [a_1]: 0.0305578 [recompute_prepare]: 0.00017152 [updatestate_depend_eliminate]: 0.00022287 [updatestate_assign_eliminate]: 0.00010364 [updatestate_loads_eliminate]: 0.00015647 [parameter_eliminate]: 3.31993e-06 [a_2]: 0.00429629 [accelerated_algorithm]: 0.00016216 [shard]: 1.72993e-06 [meta_shard_fg_expand]: 7.17599e-05 [shard_inline]: 0.00016335 [auto_parallel]: 0.00011601 [parallel]: 1.166e-05 [flash_sp]: 0.00012121 [merge_comm]: 0.00011001 [allreduce_fusion]: 9.439e-05 [matmul_add_comm_reduction]: 0.00011266 [allreduce_slice_to_reducescatter]: 3.7998e-07 [virtual_shard_identity]: 0.00014369 [virtual_dataset]: 0.00013751 [get_grad_eliminate_]: 0.00013348 [virtual_output]: 0.00013494 [merge_forward]: 9.258e-05 [cell_reuse_recompute_pass]: 2.03005e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025141 [before_grad]: 0.00023947 [inplace_validation]: 8.57001e-05 [parallel_renormalize]: 1.60071e-07 [update_top_fg]: 5.00004e-07 [cast_eliminate]: 0.00015469 [meta_fg_expand]: 0.00027939 [inplace_validation_after_expand]: 0.00017714 [flash_sp_send_recv_attached]: 1.61992e-06 [receive_attached]: 1.91992e-06 [after_resolve]: 0.00015848 [a_after_grad]: 0.0002285 [special_op_eliminate]: 0.00013648 [renormalize]: 0.0175084 [add_forward_monad_depend]: 5.66e-06 [auto_monad_grad]: 2.64996e-06 [auto_monad_eliminator]: 0.00029419 [cse]: 0.00646494 [a_3]: 0.00096674 [Cycle 3]: 0.0116722, [46] [expand_dump_flag]: 2.05997e-06 [switch_simplify]: 0.00013191 [loop_unroll]: 0.00012724 [a_1]: 0.00422194 [recompute_prepare]: 0.0001362 [updatestate_depend_eliminate]: 0.00015464 [updatestate_assign_eliminate]: 9.449e-05 [updatestate_loads_eliminate]: 9.063e-05 [parameter_eliminate]: 3.25998e-06 [a_2]: 0.00204797 [accelerated_algorithm]: 0.0001552 [shard]: 1.45996e-06 [meta_shard_fg_expand]: 5.06301e-05 [shard_inline]: 0.00013048 [auto_parallel]: 0.00012307 [parallel]: 1.007e-05 [flash_sp]: 2.29001e-06 [merge_comm]: 0.00010703 [allreduce_fusion]: 9.45301e-05 [matmul_add_comm_reduction]: 0.00011903 [allreduce_slice_to_reducescatter]: 4.49945e-07 [virtual_shard_identity]: 0.00013837 [virtual_dataset]: 0.00013191 [get_grad_eliminate_]: 0.00012748 [virtual_output]: 0.0001291 [merge_forward]: 9.339e-05 [cell_reuse_recompute_pass]: 2.81993e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0002448 [before_grad]: 0.00023198 [inplace_validation]: 8.87e-05 [parallel_renormalize]: 1.20024e-07 [update_top_fg]: 5.49946e-07 [cast_eliminate]: 0.00014588 [meta_fg_expand]: 0.00011499 [inplace_validation_after_expand]: 0.00011368 [flash_sp_send_recv_attached]: 1.43005e-06 [receive_attached]: 1.16997e-06 [after_resolve]: 0.00014654 [a_after_grad]: 0.00021842 [special_op_eliminate]: 0.0001285 [renormalize]: 1.10012e-07 [add_forward_monad_depend]: 1.75997e-06 [auto_monad_grad]: 1.61002e-06 [auto_monad_eliminator]: 0.00016703 [cse]: 0.0003995 [a_3]: 0.00094293 [py_interpret_to_execute_after_opt_a]: 0.00013738 [slice_cell_reuse_recomputed_activation]: 1.92004e-06 [rewriter_after_opt_a]: 0.00098976 [convert_after_rewriter]: 0.00011665 [order_py_execute_after_rewriter]: 8.693e-05 [opt_b]: 0.00390795, [1] [Cycle 1]: 0.00389751, [7] [b_1]: 0.00304112 [b_2]: 0.00013526 [updatestate_depend_eliminate]: 9.659e-05 [updatestate_assign_eliminate]: 8.565e-05 [updatestate_loads_eliminate]: 8.902e-05 [renormalize]: 3.7998e-07 [cse]: 0.00039581 [optimize_parallel_all_gather_comm]: 0.00013892 [overlap_param_gather]: 4.21004e-06 [cconv]: 6.402e-05 [loop_unroll]: 0.00100895 [opt_after_cconv]: 0.00157289, [1] [Cycle 1]: 0.00156629, [7] [c_1]: 0.00077441 [parameter_eliminate]: 2.42004e-06 [updatestate_depend_eliminate]: 0.0001292 [updatestate_assign_eliminate]: 9.194e-05 [updatestate_loads_eliminate]: 9.279e-05 [cse]: 0.0004217 [renormalize]: 4.20026e-07 [remove_dup_value]: 0.00058046 [tuple_transform]: 0.00092961, [1] [Cycle 1]: 0.0009231, [2] [d_1]: 0.00090766 [renormalize]: 4.10015e-07 [partial_unused_args_eliminate]: 2.23005e-06 [add_cache_embedding]: 0.0001523 [add_recomputation]: 0.00068412 [cse_after_recomputation]: 0.00029999, [1] [Cycle 1]: 0.00029324, [1] [cse]: 0.00028208 [environ_conv]: 8.83701e-05 [swap_dp_allreduce_reducescatter]: 0.00012672 [bias_add_comm_swap]: 2.07999e-06 [label_micro_interleaved_index]: 1.37999e-06 [label_fine_grained_interleaved_index]: 0.00050926 [merge_cast_opt]: 1.34006e-06 [slice_recompute_activation]: 0.00014968 [micro_interleaved_order_control]: 1.54995e-06 [assign_add_opt]: 0.000408 [ForceFp32Comm]: 1.30991e-06 [remove_cast_before_assign_add]: 0.00010374 [full_micro_interleaved_order_control]: 2.22004e-06 [reorder_send_recv_between_fp_bp]: 1.06997e-06 [comm_op_add_attrs]: 0.00014644 [add_comm_op_reuse_tag]: 0.00014216 [interleave_split_concat_branches]: 7.59959e-07 [interleave_parallel_branches]: 6.39935e-07 [overlap_opt_shard_in_pipeline]: 3.00601e-05 [overlap_opt_shard_grad_in_pipeline]: 2.24996e-06 [control_data_broadcast_order]: 7.70087e-07 [grouped_pairwise_exchange_alltoall]: 9.9201e-06 [offloading_packed_experts]: 1.46998e-06 [overlap_recompute_and_grad_model_parallel]: 1.40991e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.50062e-07 [overlap_recompute_allgather_and_fa_grad]: 7.57801e-05 [overlap_grad_ring_attention]: 0.00015309 [overlap_grad_flash_sp]: 0.000118 [begin_end_overlap_inline]: 5.60074e-07 [split_matmul_comm_elemetwise]: 1.22993e-06 [split_layernorm_comm]: 1.16997e-06 [handle_group_info]: 6.80005e-06 [symbol_engine_optimizer]: 0.00079356, [1] [Cycle 1]: 0.00078744, [6] [build]: 5.186e-05 [elim_shapecalc]: 0.00014016 [elim_not_effective]: 0.00021939 [opt_reshape]: 0.00013027 [fold_const_symbol]: 0.00020836 [renormalize]: 5.50062e-07 [pipeline_parallel_scheduler]: 2.89991e-06 [auto_monad_reorder]: 0.00032987 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 3.69968e-07 [eliminate_special_op_node]: 0.00137785 [distribtued_split]: 0.00039458 [validate]: 0.00027742 [task_emit]: 12.8154 [execute]: 9.37004e-06 Sums bootstrap : 0.068945s : 0.47% type_inference : 1.006964s : 6.93% auto_monad : 0.001974s : 0.01% graph_reusing : 0.000023s : 0.00% inline.rewriter_before_opt_a : 0.001552s : 0.01% inline.a1a2.expand_dump_flag : 0.000044s : 0.00% inline.a1a2.switch_simplify : 0.001242s : 0.01% inline.a1a2.loop_unroll : 0.000762s : 0.01% inline.a1a2.a_1 : 0.025368s : 0.17% inline.a1a2.recompute_prepare : 0.000265s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000453s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000173s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000270s : 0.00% inline.a1a2.parameter_eliminate : 0.000008s : 0.00% inline.a1a2.a_2 : 0.004817s : 0.03% inline.a1a2.parallel_inline_pass : 0.000200s : 0.00% parallel-infer-symbol : 0.000237s : 0.00% pre_auto_parallel : 0.000102s : 0.00% insert-virtual-dataset : 0.001438s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000095s : 0.00% pipeline_split : 0.000093s : 0.00% optimize.py_interpret_to_execute : 0.000145s : 0.00% optimize.rewriter_before_opt_a : 0.000278s : 0.00% optimize.opt_a.expand_dump_flag : 0.000055s : 0.00% optimize.opt_a.switch_simplify : 0.002063s : 0.01% optimize.opt_a.loop_unroll : 0.001771s : 0.01% optimize.opt_a.a_1 : 0.038164s : 0.26% optimize.opt_a.recompute_prepare : 0.000410s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000588s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000264s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000313s : 0.00% optimize.opt_a.parameter_eliminate : 0.000009s : 0.00% optimize.opt_a.a_2 : 0.007891s : 0.05% optimize.opt_a.accelerated_algorithm : 0.000535s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000177s : 0.00% optimize.opt_a.shard_inline : 0.000400s : 0.00% optimize.opt_a.auto_parallel : 0.000312s : 0.00% optimize.opt_a.parallel : 0.015502s : 0.11% optimize.opt_a.flash_sp : 0.000178s : 0.00% optimize.opt_a.merge_comm : 0.000343s : 0.00% optimize.opt_a.allreduce_fusion : 0.000265s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000330s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000410s : 0.00% optimize.opt_a.virtual_dataset : 0.000432s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000377s : 0.00% optimize.opt_a.virtual_output : 0.000376s : 0.00% optimize.opt_a.merge_forward : 0.000262s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000704s : 0.00% optimize.opt_a.before_grad : 0.000689s : 0.00% optimize.opt_a.inplace_validation : 0.000299s : 0.00% optimize.opt_a.parallel_renormalize : 0.022045s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000447s : 0.00% optimize.opt_a.meta_fg_expand : 0.265989s : 1.83% optimize.opt_a.inplace_validation_after_expand : 0.001829s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001204s : 0.01% optimize.opt_a.receive_attached : 0.000088s : 0.00% optimize.opt_a.after_resolve : 0.002282s : 0.02% optimize.opt_a.a_after_grad : 0.004403s : 0.03% optimize.opt_a.special_op_eliminate : 0.002083s : 0.01% optimize.opt_a.renormalize : 0.168009s : 1.16% optimize.opt_a.add_forward_monad_depend : 0.000366s : 0.00% optimize.opt_a.auto_monad_grad : 0.000215s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002264s : 0.02% optimize.opt_a.cse : 0.011040s : 0.08% optimize.opt_a.a_3 : 0.026580s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000137s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000990s : 0.01% optimize.convert_after_rewriter : 0.000117s : 0.00% optimize.order_py_execute_after_rewriter : 0.000087s : 0.00% optimize.opt_b.b_1 : 0.003041s : 0.02% optimize.opt_b.b_2 : 0.000135s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000097s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000086s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000089s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000396s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000139s : 0.00% optimize.overlap_param_gather : 0.000004s : 0.00% optimize.cconv : 0.000064s : 0.00% optimize.loop_unroll : 0.001009s : 0.01% optimize.opt_after_cconv.c_1 : 0.000774s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000129s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000093s : 0.00% optimize.opt_after_cconv.cse : 0.000422s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000580s : 0.00% optimize.tuple_transform.d_1 : 0.000908s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000152s : 0.00% optimize.add_recomputation : 0.000684s : 0.00% optimize.cse_after_recomputation.cse : 0.000282s : 0.00% optimize.environ_conv : 0.000088s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000127s : 0.00% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000509s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000150s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000408s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000104s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000146s : 0.00% optimize.add_comm_op_reuse_tag : 0.000142s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000030s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.00% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000076s : 0.00% optimize.overlap_grad_ring_attention : 0.000153s : 0.00% optimize.overlap_grad_flash_sp : 0.000118s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000052s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000140s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000219s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000208s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000330s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001378s : 0.01% distribtued_split : 0.000395s : 0.00% validate : 0.000277s : 0.00% task_emit : 12.815391s : 88.21% execute : 0.000009s : 0.00% Time group info: ------[substitution.] 0.049064 4298 0.04% : 0.000019s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.10% : 0.000051s : 7: substitution.addn_zero_filter 0.03% : 0.000013s : 7: substitution.adjust_all_reduce_mul_add 0.60% : 0.000294s : 71: substitution.arithmetic_simplify 0.11% : 0.000052s : 10: substitution.cast_eliminate 0.11% : 0.000053s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000024s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000017s : 12: substitution.environ_get_depend_swap 0.05% : 0.000027s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.03% : 0.000017s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.02% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 64.76% : 0.031774s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000081s : 126: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.81% : 0.011684s : 331: substitution.inline 1.41% : 0.000692s : 112: substitution.inline_without_move 0.25% : 0.000122s : 309: substitution.j_node_and_user_rematch 0.23% : 0.000115s : 40: substitution.less_batch_normalization 0.10% : 0.000047s : 90: substitution.load_eliminater 0.10% : 0.000048s : 10: substitution.merge_addn 0.22% : 0.000109s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.11% : 0.000053s : 1: substitution.partial_defer_inline 0.15% : 0.000071s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.06% : 0.000031s : 15: substitution.reduce_eliminate 0.32% : 0.000157s : 309: substitution.remove_not_recompute_node 1.98% : 0.000971s : 508: substitution.replace_applicator 0.22% : 0.000106s : 251: substitution.replace_old_param 0.07% : 0.000035s : 11: substitution.reshape_eliminate 0.02% : 0.000012s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000009s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000082s : 34: substitution.switch_simplify 0.06% : 0.000028s : 11: substitution.tile_eliminate 0.52% : 0.000253s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000134s : 107: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000208s : 107: substitution.tuple_list_get_item_depend_reorder 1.57% : 0.000769s : 308: substitution.tuple_list_get_item_eliminator 0.37% : 0.000179s : 107: substitution.tuple_list_get_set_item_eliminator 0.41% : 0.000201s : 210: substitution.updatestate_pure_node_eliminater 0.68% : 0.000333s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 1.006476 2 97.37% : 0.979991s : 1: type_inference.infer 2.63% : 0.026484s : 1: type_inference.specialize ------[replace.] 0.009438 775 0.42% : 0.000040s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.51% : 0.000048s : 7: replace.depend_value_elim 0.41% : 0.000039s : 3: replace.environ_get_set_eliminate 28.85% : 0.002723s : 183: replace.getattr_setattr_resolve 30.66% : 0.002894s : 310: replace.inline 0.20% : 0.000019s : 1: replace.merge_addn 1.25% : 0.000118s : 7: replace.partial_eliminate 3.93% : 0.000371s : 25: replace.replace_applicator 4.11% : 0.000388s : 34: replace.switch_simplify 0.54% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 28.69% : 0.002708s : 191: replace.tuple_list_get_item_eliminator 0.17% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.18% : 0.000017s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041786 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000013s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 70.84% : 0.029600s : 183: match.getattr_setattr_resolve 27.43% : 0.011460s : 310: match.inline 0.05% : 0.000021s : 1: match.merge_addn 0.10% : 0.000042s : 7: match.partial_eliminate 0.22% : 0.000094s : 25: match.replace_applicator 0.15% : 0.000062s : 34: match.switch_simplify 0.08% : 0.000032s : 6: match.tuple_list_get_item_depend_reorder 0.98% : 0.000411s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000009s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020916131318 0.78% : 0.000164s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000057s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000115s : 835: predicate.addn_check_dump 0.78% : 0.000163s : 1198: predicate.addn_zero_filter 0.74% : 0.000154s : 1198: predicate.adjust_all_reduce_mul_add 1.73% : 0.000361s : 2034: predicate.arithmetic_simplify 1.12% : 0.000234s : 1586: predicate.cast_eliminate 3.28% : 0.000686s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.15% : 0.000241s : 1399: predicate.convert_tensor_eliminate 0.56% : 0.000118s : 838: predicate.depend_value_elim 0.82% : 0.000172s : 1202: predicate.dict_get_item_const_eliminator 0.82% : 0.000172s : 1202: predicate.dict_get_item_eliminator 0.83% : 0.000174s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.11% : 0.000023s : 126: predicate.elim_shapecalc_of_broadcastargs 0.85% : 0.000177s : 1334: predicate.environ_add_const_eliminate 0.84% : 0.000175s : 1337: predicate.environ_get_add_eliminate 0.86% : 0.000179s : 1334: predicate.environ_get_depend_swap 1.43% : 0.000299s : 2172: predicate.environ_get_eliminate 0.91% : 0.000190s : 1337: predicate.environ_get_set_eliminate 1.12% : 0.000234s : 1717: predicate.exchange_switch_depend_value 1.42% : 0.000296s : 1717: predicate.float_depend_g_call 0.55% : 0.000114s : 835: predicate.float_environ_get_switch 0.64% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.29% : 0.000060s : 395: predicate.get_grad_eliminate 2.31% : 0.000483s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.54% : 0.000113s : 835: predicate.incorporate_call 0.54% : 0.000112s : 835: predicate.incorporate_call_switch 3.82% : 0.000800s : 4602: predicate.inline 2.33% : 0.000488s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.32% : 0.000067s : 388: predicate.less_batch_normalization 1.14% : 0.000237s : 1660: predicate.list_to_tuple_eliminator_ 1.86% : 0.000389s : 2874: predicate.load_eliminater 0.19% : 0.000041s : 135: predicate.loop_unroll_after_grad 2.46% : 0.000514s : 2640: predicate.loop_unroll_before_grad 0.96% : 0.000200s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000118s : 837: predicate.merge_addn 3.15% : 0.000659s : 3380: predicate.micro_step_allgather_replace 3.17% : 0.000664s : 3380: predicate.mini_step_allgather_replace 0.77% : 0.000161s : 1199: predicate.minmaximum_grad 0.18% : 0.000038s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.08% : 0.000436s : 1717: predicate.partial_defer_inline 1.10% : 0.000230s : 1541: predicate.partial_eliminate 0.82% : 0.000171s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000116s : 824: predicate.reduce_all_const_elim 0.95% : 0.000200s : 1199: predicate.reduce_eliminate 0.14% : 0.000030s : 395: predicate.remove_not_recompute_node 1.92% : 0.000401s : 4829: predicate.replace_applicator 0.79% : 0.000166s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.77% : 0.000162s : 1199: predicate.reshape_eliminate 3.33% : 0.000696s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.36% : 0.000704s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000062s : 395: predicate.shard_identity_eliminate 2.08% : 0.000434s : 2338: predicate.special_op_eliminate 0.63% : 0.000132s : 837: predicate.specialize_transform 3.51% : 0.000735s : 3380: predicate.split_environ_get_set_with_tuple_value 1.59% : 0.000332s : 2203: predicate.stack_unstack_eliminate 1.86% : 0.000389s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.28% : 0.000267s : 1717: predicate.switch_defer_inline 4.74% : 0.000991s : 5201: predicate.switch_layer_defer_inline 4.42% : 0.000925s : 5262: predicate.switch_simplify 0.79% : 0.000165s : 1199: predicate.tile_eliminate 0.76% : 0.000160s : 1199: predicate.transpose_eliminate 1.04% : 0.000218s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.04% : 0.000217s : 1469: predicate.tuple_list_get_item_const_eliminator 0.93% : 0.000195s : 1469: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000394s : 2495: predicate.tuple_list_get_item_eliminator 1.00% : 0.000208s : 1469: predicate.tuple_list_get_set_item_eliminator 1.65% : 0.000345s : 2304: predicate.tuple_list_set_item_eliminator 1.09% : 0.000228s : 1660: predicate.tuple_to_list_eliminator_ 1.85% : 0.000386s : 2874: predicate.updatestate_pure_node_eliminater 2.51% : 0.000524s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.30% : 0.000062s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000058s : 395: predicate.virtual_output_eliminate 0.11% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.066001 747 72.61% : 0.047921s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.14% : 0.001409s : 22: func_graph_cloner_run.FuncGraphClonerNode 25.26% : 0.016671s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.537559 346 0.00% : 0.000006s : 1: ForceFp32Comm 0.27% : 0.041892s : 1: a1a2 0.00% : 0.000160s : 1: add_cache_embedding 0.00% : 0.000149s : 1: add_comm_op_reuse_tag 0.00% : 0.000695s : 1: add_recomputation 0.00% : 0.000417s : 1: assign_add_opt 0.01% : 0.002000s : 1: auto_monad 0.00% : 0.000343s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.44% : 0.069100s : 1: bootstrap 0.00% : 0.000070s : 1: cconv 0.00% : 0.000154s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000124s : 1: convert_after_rewriter 0.00% : 0.000305s : 1: cse_after_recomputation 0.00% : 0.000103s : 1: dataset_repeat_opt 0.00% : 0.000409s : 1: distribtued_split 0.01% : 0.001392s : 1: eliminate_special_op_node 0.00% : 0.000096s : 1: environ_conv 0.00% : 0.000019s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000032s : 1: graph_reusing 0.00% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.28% : 0.043527s : 1: inline 0.01% : 0.001457s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000518s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.01% : 0.001020s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032582s : 61: opt.transform.a1a2 0.00% : 0.000174s : 1: opt.transform.loop_unroll_optimizer 0.58% : 0.089766s : 148: opt.transform.opt_a 0.00% : 0.000772s : 1: opt.transform.opt_after_cconv 0.02% : 0.003146s : 27: opt.transform.opt_b 0.24% : 0.037370s : 16: opt.transform.opt_resolve 0.01% : 0.000904s : 1: opt.transform.opt_trans_graph 0.01% : 0.000822s : 6: opt.transform.special_op_eliminate 0.00% : 0.000692s : 4: opt.transform.symbol_engine_opt 3.75% : 0.583272s : 1: opt_a 0.01% : 0.001579s : 1: opt_after_cconv 0.03% : 0.003912s : 1: opt_b 3.85% : 0.597678s : 1: optimize 0.00% : 0.000147s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000093s : 1: order_py_execute_after_rewriter 0.00% : 0.000123s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000159s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000035s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000008s : 1: overlap_param_gather 0.00% : 0.000082s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000251s : 1: parallel-infer-symbol 0.00% : 0.000008s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000102s : 1: pipeline_split 0.00% : 0.000113s : 1: pre_auto_parallel 0.00% : 0.000154s : 1: py_interpret_to_execute 0.00% : 0.000145s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000109s : 1: remove_cast_before_assign_add 0.00% : 0.000592s : 1: remove_dup_value 0.88% : 0.137436s : 3: renormalize.infer 0.34% : 0.052577s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001001s : 1: rewriter_after_opt_a 0.01% : 0.001853s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000156s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000134s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000798s : 1: symbol_engine_optimizer 82.48% : 12.815433s : 1: task_emit 0.01% : 0.000933s : 1: tuple_transform 6.48% : 1.007017s : 1: type_inference 0.01% : 0.001314s : 1: validate TotalTime = 14.2188, [21] [bootstrap]: 0.0014734 [type_inference]: 0.738033 [auto_monad]: 0.00199759 [graph_reusing]: 2.615e-05 [inline]: 0.0430834, [2] [rewriter_before_opt_a]: 0.00152914 [a1a2]: 0.0414854, [2] [Cycle 1]: 0.0281752, [11] [expand_dump_flag]: 3.34501e-05 [switch_simplify]: 0.00106478 [loop_unroll]: 0.00067419 [a_1]: 0.0219536 [recompute_prepare]: 0.00015524 [updatestate_depend_eliminate]: 0.00035123 [updatestate_assign_eliminate]: 0.00011067 [updatestate_loads_eliminate]: 0.00020333 [parameter_eliminate]: 5.93008e-06 [a_2]: 0.0033162 [parallel_inline_pass]: 0.00011295 [Cycle 2]: 0.00550015, [11] [expand_dump_flag]: 1.31002e-06 [switch_simplify]: 0.000103 [loop_unroll]: 0.00010284 [a_1]: 0.00318438 [recompute_prepare]: 9.8e-05 [updatestate_depend_eliminate]: 0.00021617 [updatestate_assign_eliminate]: 6.56899e-05 [updatestate_loads_eliminate]: 6.253e-05 [parameter_eliminate]: 3.48e-06 [a_2]: 0.00148755 [parallel_inline_pass]: 9.96201e-05 [parallel-infer-symbol]: 0.00016782 [pre_auto_parallel]: 0.00010396 [insert-virtual-dataset]: 0.00119091 [parallel-infer-symbol-second]: 2.55008e-06 [dataset_repeat_opt]: 0.00016053 [pipeline_split]: 9.805e-05 [optimize]: 0.599175, [52] [py_interpret_to_execute]: 0.00012236 [rewriter_before_opt_a]: 0.00026653 [opt_a]: 0.583246, [3] [Cycle 1]: 0.504052, [46] [expand_dump_flag]: 1.94006e-06 [switch_simplify]: 0.00010872 [loop_unroll]: 9.655e-05 [a_1]: 0.00332237 [recompute_prepare]: 0.00010422 [updatestate_depend_eliminate]: 0.00010721 [updatestate_assign_eliminate]: 6.33399e-05 [updatestate_loads_eliminate]: 6.84101e-05 [parameter_eliminate]: 3.46999e-06 [a_2]: 0.00161672 [accelerated_algorithm]: 0.00029179 [shard]: 2.31992e-06 [meta_shard_fg_expand]: 4.932e-05 [shard_inline]: 0.00010729 [auto_parallel]: 8.176e-05 [parallel]: 0.01564 [flash_sp]: 5.838e-05 [merge_comm]: 0.00013018 [allreduce_fusion]: 7.504e-05 [matmul_add_comm_reduction]: 9.833e-05 [allreduce_slice_to_reducescatter]: 6.3004e-07 [virtual_shard_identity]: 0.00012197 [virtual_dataset]: 0.00016881 [get_grad_eliminate_]: 0.00011368 [virtual_output]: 0.00011288 [merge_forward]: 7.897e-05 [cell_reuse_recompute_pass]: 2.42004e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020624 [before_grad]: 0.00020203 [inplace_validation]: 0.00015679 [parallel_renormalize]: 0.0206815 [update_top_fg]: 6.59958e-07 [cast_eliminate]: 0.00014619 [meta_fg_expand]: 0.269719 [inplace_validation_after_expand]: 0.00159343 [flash_sp_send_recv_attached]: 0.00122306 [receive_attached]: 8.176e-05 [after_resolve]: 0.00197178 [a_after_grad]: 0.0039103 [special_op_eliminate]: 0.00188291 [renormalize]: 0.148134 [add_forward_monad_depend]: 0.00035438 [auto_monad_grad]: 0.00020571 [auto_monad_eliminator]: 0.00177745 [cse]: 0.00412596 [a_3]: 0.0244804 [Cycle 2]: 0.0674737, [46] [expand_dump_flag]: 5.14201e-05 [switch_simplify]: 0.0017987 [loop_unroll]: 0.00150273 [a_1]: 0.0307857 [recompute_prepare]: 0.00017016 [updatestate_depend_eliminate]: 0.00023134 [updatestate_assign_eliminate]: 0.00010316 [updatestate_loads_eliminate]: 0.00016354 [parameter_eliminate]: 3.94997e-06 [a_2]: 0.00434193 [accelerated_algorithm]: 0.0002023 [shard]: 2.16998e-06 [meta_shard_fg_expand]: 7.141e-05 [shard_inline]: 0.00014281 [auto_parallel]: 0.00011558 [parallel]: 1.38601e-05 [flash_sp]: 0.00012144 [merge_comm]: 0.00011081 [allreduce_fusion]: 9.313e-05 [matmul_add_comm_reduction]: 0.00011034 [allreduce_slice_to_reducescatter]: 6.10016e-07 [virtual_shard_identity]: 0.00014028 [virtual_dataset]: 0.00013539 [get_grad_eliminate_]: 0.00013178 [virtual_output]: 0.00013446 [merge_forward]: 9.01399e-05 [cell_reuse_recompute_pass]: 2.50002e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024703 [before_grad]: 0.00023714 [inplace_validation]: 8.692e-05 [parallel_renormalize]: 7.0082e-08 [update_top_fg]: 6.50063e-07 [cast_eliminate]: 0.00015561 [meta_fg_expand]: 0.00026788 [inplace_validation_after_expand]: 0.00017888 [flash_sp_send_recv_attached]: 2.04006e-06 [receive_attached]: 1.82004e-06 [after_resolve]: 0.00015958 [a_after_grad]: 0.00022593 [special_op_eliminate]: 0.00013373 [renormalize]: 0.0170159 [add_forward_monad_depend]: 4.34008e-06 [auto_monad_grad]: 1.93994e-06 [auto_monad_eliminator]: 0.00029191 [cse]: 0.00628414 [a_3]: 0.00098385 [Cycle 3]: 0.0116987, [46] [expand_dump_flag]: 2.39001e-06 [switch_simplify]: 0.00013307 [loop_unroll]: 0.00012781 [a_1]: 0.00423407 [recompute_prepare]: 0.00013723 [updatestate_depend_eliminate]: 0.00015492 [updatestate_assign_eliminate]: 9.31501e-05 [updatestate_loads_eliminate]: 9.141e-05 [parameter_eliminate]: 2.85998e-06 [a_2]: 0.00205558 [accelerated_algorithm]: 0.00015602 [shard]: 1.34006e-06 [meta_shard_fg_expand]: 4.931e-05 [shard_inline]: 0.00013169 [auto_parallel]: 0.00010891 [parallel]: 8.05e-06 [flash_sp]: 1.97999e-06 [merge_comm]: 0.00010265 [allreduce_fusion]: 9.686e-05 [matmul_add_comm_reduction]: 0.00013277 [allreduce_slice_to_reducescatter]: 4.7998e-07 [virtual_shard_identity]: 0.0001389 [virtual_dataset]: 0.0001319 [get_grad_eliminate_]: 0.0001259 [virtual_output]: 0.00012858 [merge_forward]: 9.447e-05 [cell_reuse_recompute_pass]: 2.55997e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024449 [before_grad]: 0.00023107 [inplace_validation]: 9.208e-05 [parallel_renormalize]: 7.99773e-08 [update_top_fg]: 5.10016e-07 [cast_eliminate]: 0.00014719 [meta_fg_expand]: 0.0001132 [inplace_validation_after_expand]: 0.00011749 [flash_sp_send_recv_attached]: 1.42003e-06 [receive_attached]: 1.10001e-06 [after_resolve]: 0.00014748 [a_after_grad]: 0.00021763 [special_op_eliminate]: 0.00012837 [renormalize]: 7.0082e-08 [add_forward_monad_depend]: 2.36998e-06 [auto_monad_grad]: 1.91003e-06 [auto_monad_eliminator]: 0.00016929 [cse]: 0.00040886 [a_3]: 0.00093753 [py_interpret_to_execute_after_opt_a]: 0.00134237 [slice_cell_reuse_recomputed_activation]: 2.63005e-06 [rewriter_after_opt_a]: 0.00104199 [convert_after_rewriter]: 0.00011309 [order_py_execute_after_rewriter]: 8.25899e-05 [opt_b]: 0.00396742, [1] [Cycle 1]: 0.00395507, [7] [b_1]: 0.00307434 [b_2]: 0.00013557 [updatestate_depend_eliminate]: 0.0001036 [updatestate_assign_eliminate]: 8.66101e-05 [updatestate_loads_eliminate]: 8.98701e-05 [renormalize]: 4.39934e-07 [cse]: 0.00040902 [optimize_parallel_all_gather_comm]: 0.00013905 [overlap_param_gather]: 1.69e-06 [cconv]: 6.926e-05 [loop_unroll]: 0.00097195 [opt_after_cconv]: 0.00162204, [1] [Cycle 1]: 0.00161435, [7] [c_1]: 0.00082603 [parameter_eliminate]: 2.63995e-06 [updatestate_depend_eliminate]: 0.00014214 [updatestate_assign_eliminate]: 9.198e-05 [updatestate_loads_eliminate]: 9.209e-05 [cse]: 0.00040169 [renormalize]: 6.50063e-07 [remove_dup_value]: 0.00059312 [tuple_transform]: 0.00093859, [1] [Cycle 1]: 0.00093067, [2] [d_1]: 0.00091398 [renormalize]: 3.89991e-07 [partial_unused_args_eliminate]: 3.13995e-06 [add_cache_embedding]: 0.00015694 [add_recomputation]: 0.00071231 [cse_after_recomputation]: 0.00037462, [1] [Cycle 1]: 0.00036674, [1] [cse]: 0.00035054 [environ_conv]: 9.829e-05 [swap_dp_allreduce_reducescatter]: 0.00013262 [bias_add_comm_swap]: 2.68e-06 [label_micro_interleaved_index]: 1.9701e-06 [label_fine_grained_interleaved_index]: 0.00052068 [merge_cast_opt]: 1.33005e-06 [slice_recompute_activation]: 0.00015196 [micro_interleaved_order_control]: 2.16009e-06 [assign_add_opt]: 0.00039873 [ForceFp32Comm]: 1.25996e-06 [remove_cast_before_assign_add]: 0.00010922 [full_micro_interleaved_order_control]: 2.50002e-06 [reorder_send_recv_between_fp_bp]: 1.62993e-06 [comm_op_add_attrs]: 0.0001531 [add_comm_op_reuse_tag]: 0.00015152 [interleave_split_concat_branches]: 1.01002e-06 [interleave_parallel_branches]: 1.27999e-06 [overlap_opt_shard_in_pipeline]: 3.089e-05 [overlap_opt_shard_grad_in_pipeline]: 3.28e-06 [control_data_broadcast_order]: 1.02003e-06 [grouped_pairwise_exchange_alltoall]: 1.124e-05 [offloading_packed_experts]: 2.34006e-06 [overlap_recompute_and_grad_model_parallel]: 2.32004e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.19913e-07 [overlap_recompute_allgather_and_fa_grad]: 0.00010605 [overlap_grad_ring_attention]: 0.00016144 [overlap_grad_flash_sp]: 0.00011977 [begin_end_overlap_inline]: 8.49948e-07 [split_matmul_comm_elemetwise]: 1.92993e-06 [split_layernorm_comm]: 1.84996e-06 [handle_group_info]: 5.54998e-06 [symbol_engine_optimizer]: 0.00080492, [1] [Cycle 1]: 0.0007982, [6] [build]: 5.243e-05 [elim_shapecalc]: 0.00014328 [elim_not_effective]: 0.00022495 [opt_reshape]: 0.00012889 [fold_const_symbol]: 0.00020922 [renormalize]: 3.00002e-07 [pipeline_parallel_scheduler]: 3.74997e-06 [auto_monad_reorder]: 0.00035212 [get_jit_bprop_graph]: 5.30039e-07 [rewriter_after_jit_bprop_graph]: 6.39935e-07 [eliminate_special_op_node]: 0.00140405 [distribtued_split]: 0.0003866 [validate]: 0.00028978 [task_emit]: 12.8294 [execute]: 1.383e-05 Sums bootstrap : 0.001473s : 0.01% type_inference : 0.738033s : 5.19% auto_monad : 0.001998s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001529s : 0.01% inline.a1a2.expand_dump_flag : 0.000035s : 0.00% inline.a1a2.switch_simplify : 0.001168s : 0.01% inline.a1a2.loop_unroll : 0.000777s : 0.01% inline.a1a2.a_1 : 0.025138s : 0.18% inline.a1a2.recompute_prepare : 0.000253s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000567s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000176s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000266s : 0.00% inline.a1a2.parameter_eliminate : 0.000009s : 0.00% inline.a1a2.a_2 : 0.004804s : 0.03% inline.a1a2.parallel_inline_pass : 0.000213s : 0.00% parallel-infer-symbol : 0.000168s : 0.00% pre_auto_parallel : 0.000104s : 0.00% insert-virtual-dataset : 0.001191s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000161s : 0.00% pipeline_split : 0.000098s : 0.00% optimize.py_interpret_to_execute : 0.000122s : 0.00% optimize.rewriter_before_opt_a : 0.000267s : 0.00% optimize.opt_a.expand_dump_flag : 0.000056s : 0.00% optimize.opt_a.switch_simplify : 0.002040s : 0.01% optimize.opt_a.loop_unroll : 0.001727s : 0.01% optimize.opt_a.a_1 : 0.038342s : 0.27% optimize.opt_a.recompute_prepare : 0.000412s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000493s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000260s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000323s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.008014s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000650s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000170s : 0.00% optimize.opt_a.shard_inline : 0.000382s : 0.00% optimize.opt_a.auto_parallel : 0.000306s : 0.00% optimize.opt_a.parallel : 0.015662s : 0.11% optimize.opt_a.flash_sp : 0.000182s : 0.00% optimize.opt_a.merge_comm : 0.000344s : 0.00% optimize.opt_a.allreduce_fusion : 0.000265s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000341s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000401s : 0.00% optimize.opt_a.virtual_dataset : 0.000436s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000371s : 0.00% optimize.opt_a.virtual_output : 0.000376s : 0.00% optimize.opt_a.merge_forward : 0.000264s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000007s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000698s : 0.00% optimize.opt_a.before_grad : 0.000670s : 0.00% optimize.opt_a.inplace_validation : 0.000336s : 0.00% optimize.opt_a.parallel_renormalize : 0.020682s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000449s : 0.00% optimize.opt_a.meta_fg_expand : 0.270100s : 1.90% optimize.opt_a.inplace_validation_after_expand : 0.001890s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001227s : 0.01% optimize.opt_a.receive_attached : 0.000085s : 0.00% optimize.opt_a.after_resolve : 0.002279s : 0.02% optimize.opt_a.a_after_grad : 0.004354s : 0.03% optimize.opt_a.special_op_eliminate : 0.002145s : 0.02% optimize.opt_a.renormalize : 0.165150s : 1.16% optimize.opt_a.add_forward_monad_depend : 0.000361s : 0.00% optimize.opt_a.auto_monad_grad : 0.000210s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002239s : 0.02% optimize.opt_a.cse : 0.010819s : 0.08% optimize.opt_a.a_3 : 0.026402s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.001342s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001042s : 0.01% optimize.convert_after_rewriter : 0.000113s : 0.00% optimize.order_py_execute_after_rewriter : 0.000083s : 0.00% optimize.opt_b.b_1 : 0.003074s : 0.02% optimize.opt_b.b_2 : 0.000136s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000104s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000087s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000409s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000139s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000069s : 0.00% optimize.loop_unroll : 0.000972s : 0.01% optimize.opt_after_cconv.c_1 : 0.000826s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000142s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.cse : 0.000402s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000593s : 0.00% optimize.tuple_transform.d_1 : 0.000914s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000157s : 0.00% optimize.add_recomputation : 0.000712s : 0.01% optimize.cse_after_recomputation.cse : 0.000351s : 0.00% optimize.environ_conv : 0.000098s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000133s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000521s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000152s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000399s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000109s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000153s : 0.00% optimize.add_comm_op_reuse_tag : 0.000152s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000031s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000106s : 0.00% optimize.overlap_grad_ring_attention : 0.000161s : 0.00% optimize.overlap_grad_flash_sp : 0.000120s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000052s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000143s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000225s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000129s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000209s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000352s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.001404s : 0.01% distribtued_split : 0.000387s : 0.00% validate : 0.000290s : 0.00% task_emit : 12.829390s : 90.30% execute : 0.000014s : 0.00% Time group info: ------[substitution.] 0.048083 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.05% : 0.000022s : 9: substitution.addn_check_dump 0.11% : 0.000051s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.65% : 0.000313s : 71: substitution.arithmetic_simplify 0.11% : 0.000052s : 10: substitution.cast_eliminate 0.11% : 0.000053s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000025s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.06% : 0.000030s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000018s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.03% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 64.47% : 0.031000s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000083s : 126: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.75% : 0.011420s : 331: substitution.inline 1.40% : 0.000673s : 112: substitution.inline_without_move 0.25% : 0.000122s : 309: substitution.j_node_and_user_rematch 0.36% : 0.000172s : 40: substitution.less_batch_normalization 0.10% : 0.000046s : 90: substitution.load_eliminater 0.11% : 0.000052s : 10: substitution.merge_addn 0.23% : 0.000112s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.07% : 0.000033s : 1: substitution.partial_defer_inline 0.14% : 0.000068s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000031s : 15: substitution.reduce_eliminate 0.33% : 0.000157s : 309: substitution.remove_not_recompute_node 2.02% : 0.000972s : 508: substitution.replace_applicator 0.22% : 0.000105s : 251: substitution.replace_old_param 0.08% : 0.000038s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000081s : 34: substitution.switch_simplify 0.06% : 0.000028s : 11: substitution.tile_eliminate 0.53% : 0.000254s : 101: substitution.tuple_list_convert_item_index_to_positive 0.28% : 0.000134s : 107: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000204s : 107: substitution.tuple_list_get_item_depend_reorder 1.60% : 0.000768s : 308: substitution.tuple_list_get_item_eliminator 0.37% : 0.000180s : 107: substitution.tuple_list_get_set_item_eliminator 0.40% : 0.000195s : 210: substitution.updatestate_pure_node_eliminater 0.70% : 0.000335s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.737578 2 96.62% : 0.712660s : 1: type_inference.infer 3.38% : 0.024917s : 1: type_inference.specialize ------[replace.] 0.009742 775 0.43% : 0.000042s : 5: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.50% : 0.000049s : 7: replace.depend_value_elim 0.40% : 0.000039s : 3: replace.environ_get_set_eliminate 30.82% : 0.003003s : 183: replace.getattr_setattr_resolve 30.43% : 0.002964s : 310: replace.inline 0.22% : 0.000021s : 1: replace.merge_addn 1.15% : 0.000112s : 7: replace.partial_eliminate 3.86% : 0.000376s : 25: replace.replace_applicator 3.75% : 0.000365s : 34: replace.switch_simplify 0.72% : 0.000071s : 6: replace.tuple_list_get_item_depend_reorder 27.28% : 0.002658s : 191: replace.tuple_list_get_item_eliminator 0.17% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.21% : 0.000021s : 1: replace.virtual_dataset_eliminate ------[match.] 0.040752 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000011s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 70.73% : 0.028823s : 183: match.getattr_setattr_resolve 27.49% : 0.011202s : 310: match.inline 0.06% : 0.000024s : 1: match.merge_addn 0.09% : 0.000038s : 7: match.partial_eliminate 0.24% : 0.000098s : 25: match.replace_applicator 0.15% : 0.000061s : 34: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.01% : 0.000410s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020704131318 0.79% : 0.000164s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000055s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000113s : 835: predicate.addn_check_dump 0.77% : 0.000159s : 1198: predicate.addn_zero_filter 0.74% : 0.000153s : 1198: predicate.adjust_all_reduce_mul_add 1.88% : 0.000389s : 2034: predicate.arithmetic_simplify 1.12% : 0.000232s : 1586: predicate.cast_eliminate 3.22% : 0.000666s : 3484: predicate.check_bprop_eliminate 0.56% : 0.000116s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.13% : 0.000235s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000118s : 838: predicate.depend_value_elim 0.81% : 0.000168s : 1202: predicate.dict_get_item_const_eliminator 0.91% : 0.000188s : 1202: predicate.dict_get_item_eliminator 0.85% : 0.000176s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.87% : 0.000181s : 1334: predicate.environ_add_const_eliminate 0.87% : 0.000181s : 1337: predicate.environ_get_add_eliminate 0.84% : 0.000175s : 1334: predicate.environ_get_depend_swap 1.48% : 0.000306s : 2172: predicate.environ_get_eliminate 0.83% : 0.000172s : 1337: predicate.environ_get_set_eliminate 1.20% : 0.000248s : 1717: predicate.exchange_switch_depend_value 1.40% : 0.000289s : 1717: predicate.float_depend_g_call 0.56% : 0.000115s : 835: predicate.float_environ_get_switch 0.64% : 0.000133s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.29% : 0.000059s : 395: predicate.get_grad_eliminate 2.38% : 0.000494s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000113s : 835: predicate.incorporate_call 0.54% : 0.000111s : 835: predicate.incorporate_call_switch 3.90% : 0.000808s : 4602: predicate.inline 2.35% : 0.000487s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.32% : 0.000066s : 388: predicate.less_batch_normalization 1.10% : 0.000229s : 1660: predicate.list_to_tuple_eliminator_ 1.87% : 0.000387s : 2874: predicate.load_eliminater 0.19% : 0.000039s : 135: predicate.loop_unroll_after_grad 2.39% : 0.000494s : 2640: predicate.loop_unroll_before_grad 0.98% : 0.000202s : 1478: predicate.make_slice_get_slice_eliminator 0.81% : 0.000169s : 837: predicate.merge_addn 3.12% : 0.000646s : 3380: predicate.micro_step_allgather_replace 3.14% : 0.000649s : 3380: predicate.mini_step_allgather_replace 0.83% : 0.000171s : 1199: predicate.minmaximum_grad 0.18% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.11% : 0.000022s : 135: predicate.parallel_virtual_node 2.03% : 0.000421s : 1717: predicate.partial_defer_inline 1.09% : 0.000225s : 1541: predicate.partial_eliminate 0.77% : 0.000159s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000115s : 824: predicate.reduce_all_const_elim 0.96% : 0.000198s : 1199: predicate.reduce_eliminate 0.14% : 0.000029s : 395: predicate.remove_not_recompute_node 1.92% : 0.000397s : 4829: predicate.replace_applicator 0.79% : 0.000163s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.76% : 0.000158s : 1199: predicate.reshape_eliminate 3.18% : 0.000658s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000022s : 135: predicate.row_tensor_eliminate 3.31% : 0.000685s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000061s : 395: predicate.shard_identity_eliminate 2.15% : 0.000444s : 2338: predicate.special_op_eliminate 0.63% : 0.000130s : 837: predicate.specialize_transform 3.47% : 0.000719s : 3380: predicate.split_environ_get_set_with_tuple_value 1.58% : 0.000327s : 2203: predicate.stack_unstack_eliminate 1.83% : 0.000380s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.26% : 0.000261s : 1717: predicate.switch_defer_inline 4.48% : 0.000928s : 5201: predicate.switch_layer_defer_inline 4.34% : 0.000898s : 5262: predicate.switch_simplify 0.81% : 0.000167s : 1199: predicate.tile_eliminate 0.75% : 0.000155s : 1199: predicate.transpose_eliminate 1.07% : 0.000221s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.07% : 0.000222s : 1469: predicate.tuple_list_get_item_const_eliminator 0.94% : 0.000196s : 1469: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000389s : 2495: predicate.tuple_list_get_item_eliminator 0.97% : 0.000201s : 1469: predicate.tuple_list_get_set_item_eliminator 1.64% : 0.000339s : 2304: predicate.tuple_list_set_item_eliminator 1.13% : 0.000234s : 1660: predicate.tuple_to_list_eliminator_ 1.88% : 0.000389s : 2874: predicate.updatestate_pure_node_eliminater 2.50% : 0.000517s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.29% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000056s : 395: predicate.virtual_output_eliminate 0.11% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.060758 747 69.99% : 0.042524s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.33% : 0.001419s : 22: func_graph_cloner_run.FuncGraphClonerNode 27.68% : 0.016815s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.212317 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041490s : 1: a1a2 0.00% : 0.000165s : 1: add_cache_embedding 0.00% : 0.000159s : 1: add_comm_op_reuse_tag 0.00% : 0.000724s : 1: add_recomputation 0.00% : 0.000408s : 1: assign_add_opt 0.01% : 0.002022s : 1: auto_monad 0.00% : 0.000367s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001520s : 1: bootstrap 0.00% : 0.000076s : 1: cconv 0.00% : 0.000161s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000119s : 1: convert_after_rewriter 0.00% : 0.000380s : 1: cse_after_recomputation 0.00% : 0.000170s : 1: dataset_repeat_opt 0.00% : 0.000402s : 1: distribtued_split 0.01% : 0.001420s : 1: eliminate_special_op_node 0.00% : 0.000107s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000035s : 1: graph_reusing 0.00% : 0.000015s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000008s : 1: handle_group_info 0.28% : 0.043092s : 1: inline 0.01% : 0.001213s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000529s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000982s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032281s : 61: opt.transform.a1a2 0.00% : 0.000172s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.089905s : 148: opt.transform.opt_a 0.01% : 0.000823s : 1: opt.transform.opt_after_cconv 0.02% : 0.003178s : 27: opt.transform.opt_b 0.24% : 0.036919s : 16: opt.transform.opt_resolve 0.01% : 0.000911s : 1: opt.transform.opt_trans_graph 0.01% : 0.000824s : 6: opt.transform.special_op_eliminate 0.00% : 0.000697s : 4: opt.transform.symbol_engine_opt 3.83% : 0.583252s : 1: opt_a 0.01% : 0.001628s : 1: opt_after_cconv 0.03% : 0.003972s : 1: opt_b 3.94% : 0.599189s : 1: optimize 0.00% : 0.000147s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000087s : 1: order_py_execute_after_rewriter 0.00% : 0.000124s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000167s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000036s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000111s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000179s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000109s : 1: pipeline_split 0.00% : 0.000114s : 1: pre_auto_parallel 0.00% : 0.000130s : 1: py_interpret_to_execute 0.01% : 0.001357s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000115s : 1: remove_cast_before_assign_add 0.00% : 0.000607s : 1: remove_dup_value 0.88% : 0.133296s : 3: renormalize.infer 0.35% : 0.052494s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001053s : 1: rewriter_after_opt_a 0.01% : 0.001819s : 2: rewriter_before_opt_a 0.00% : 0.000008s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000159s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000139s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000809s : 1: symbol_engine_optimizer 84.34% : 12.829437s : 1: task_emit 0.01% : 0.000943s : 1: tuple_transform 4.85% : 0.738068s : 1: type_inference 0.01% : 0.001345s : 1: validate TotalTime = 14.2872, [21] [bootstrap]: 0.00136375 [type_inference]: 0.761799 [auto_monad]: 0.00188671 [graph_reusing]: 2.56799e-05 [inline]: 0.0433651, [2] [rewriter_before_opt_a]: 0.00154383 [a1a2]: 0.0417468, [2] [Cycle 1]: 0.0286564, [11] [expand_dump_flag]: 3.382e-05 [switch_simplify]: 0.0011172 [loop_unroll]: 0.00068103 [a_1]: 0.0221977 [recompute_prepare]: 0.00017089 [updatestate_depend_eliminate]: 0.00038634 [updatestate_assign_eliminate]: 9.88301e-05 [updatestate_loads_eliminate]: 0.00021237 [parameter_eliminate]: 5.48002e-06 [a_2]: 0.00336606 [parallel_inline_pass]: 0.00010418 [Cycle 2]: 0.00556126, [11] [expand_dump_flag]: 2.25997e-06 [switch_simplify]: 9.34401e-05 [loop_unroll]: 0.00010824 [a_1]: 0.00312413 [recompute_prepare]: 9.814e-05 [updatestate_depend_eliminate]: 0.00024151 [updatestate_assign_eliminate]: 6.726e-05 [updatestate_loads_eliminate]: 6.35401e-05 [parameter_eliminate]: 4.33996e-06 [a_2]: 0.00157392 [parallel_inline_pass]: 0.00010355 [parallel-infer-symbol]: 0.00019227 [pre_auto_parallel]: 0.00011321 [insert-virtual-dataset]: 0.00141896 [parallel-infer-symbol-second]: 2.80002e-06 [dataset_repeat_opt]: 0.00012463 [pipeline_split]: 0.00012611 [optimize]: 0.604288, [52] [py_interpret_to_execute]: 0.00015024 [rewriter_before_opt_a]: 0.00028924 [opt_a]: 0.589415, [3] [Cycle 1]: 0.506735, [46] [expand_dump_flag]: 2.29001e-06 [switch_simplify]: 0.00012867 [loop_unroll]: 9.788e-05 [a_1]: 0.00328088 [recompute_prepare]: 0.00010229 [updatestate_depend_eliminate]: 0.00010996 [updatestate_assign_eliminate]: 6.498e-05 [updatestate_loads_eliminate]: 6.81101e-05 [parameter_eliminate]: 3.40003e-06 [a_2]: 0.00166866 [accelerated_algorithm]: 0.00031012 [shard]: 2.22004e-06 [meta_shard_fg_expand]: 5.684e-05 [shard_inline]: 0.00010846 [auto_parallel]: 8.543e-05 [parallel]: 0.0160508 [flash_sp]: 5.756e-05 [merge_comm]: 0.00012656 [allreduce_fusion]: 7.37001e-05 [matmul_add_comm_reduction]: 0.00010091 [allreduce_slice_to_reducescatter]: 9.00007e-07 [virtual_shard_identity]: 0.00012439 [virtual_dataset]: 0.00016352 [get_grad_eliminate_]: 0.00011416 [virtual_output]: 0.00011137 [merge_forward]: 7.896e-05 [cell_reuse_recompute_pass]: 2.2701e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020813 [before_grad]: 0.00019379 [inplace_validation]: 0.00012023 [parallel_renormalize]: 0.0214737 [update_top_fg]: 8.40053e-07 [cast_eliminate]: 0.00017139 [meta_fg_expand]: 0.266451 [inplace_validation_after_expand]: 0.00158278 [flash_sp_send_recv_attached]: 0.00122147 [receive_attached]: 7.80501e-05 [after_resolve]: 0.00204149 [a_after_grad]: 0.00389454 [special_op_eliminate]: 0.00185977 [renormalize]: 0.152189 [add_forward_monad_depend]: 0.00036506 [auto_monad_grad]: 0.00022092 [auto_monad_eliminator]: 0.00184989 [cse]: 0.00424349 [a_3]: 0.0249133 [Cycle 2]: 0.0708551, [46] [expand_dump_flag]: 6.179e-05 [switch_simplify]: 0.00192097 [loop_unroll]: 0.00152877 [a_1]: 0.0317526 [recompute_prepare]: 0.00017989 [updatestate_depend_eliminate]: 0.00023817 [updatestate_assign_eliminate]: 0.00010777 [updatestate_loads_eliminate]: 0.00016843 [parameter_eliminate]: 4.00003e-06 [a_2]: 0.0043561 [accelerated_algorithm]: 0.00016847 [shard]: 2.41993e-06 [meta_shard_fg_expand]: 8.168e-05 [shard_inline]: 0.00014113 [auto_parallel]: 0.00016092 [parallel]: 1.364e-05 [flash_sp]: 0.00012679 [merge_comm]: 0.00015596 [allreduce_fusion]: 9.499e-05 [matmul_add_comm_reduction]: 0.00011687 [allreduce_slice_to_reducescatter]: 6.60075e-07 [virtual_shard_identity]: 0.00014793 [virtual_dataset]: 0.0001369 [get_grad_eliminate_]: 0.00013224 [virtual_output]: 0.00013534 [merge_forward]: 9.332e-05 [cell_reuse_recompute_pass]: 2.70992e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024984 [before_grad]: 0.00023911 [inplace_validation]: 8.551e-05 [parallel_renormalize]: 8.00937e-08 [update_top_fg]: 6.20028e-07 [cast_eliminate]: 0.00015471 [meta_fg_expand]: 0.00028654 [inplace_validation_after_expand]: 0.00017986 [flash_sp_send_recv_attached]: 2.10991e-06 [receive_attached]: 2.13005e-06 [after_resolve]: 0.00016082 [a_after_grad]: 0.00022605 [special_op_eliminate]: 0.00013423 [renormalize]: 0.0186531 [add_forward_monad_depend]: 4.59002e-06 [auto_monad_grad]: 2.27999e-06 [auto_monad_eliminator]: 0.00028682 [cse]: 0.00677532 [a_3]: 0.0009514 [Cycle 3]: 0.0117982, [46] [expand_dump_flag]: 2.09e-06 [switch_simplify]: 0.00013116 [loop_unroll]: 0.00012802 [a_1]: 0.00425069 [recompute_prepare]: 0.0001355 [updatestate_depend_eliminate]: 0.00015112 [updatestate_assign_eliminate]: 9.694e-05 [updatestate_loads_eliminate]: 9.19701e-05 [parameter_eliminate]: 2.73995e-06 [a_2]: 0.00211575 [accelerated_algorithm]: 0.00015634 [shard]: 1.69e-06 [meta_shard_fg_expand]: 5.343e-05 [shard_inline]: 0.00013431 [auto_parallel]: 0.00011461 [parallel]: 1.065e-05 [flash_sp]: 3.03006e-06 [merge_comm]: 0.00010644 [allreduce_fusion]: 9.494e-05 [matmul_add_comm_reduction]: 0.00012168 [allreduce_slice_to_reducescatter]: 6.70087e-07 [virtual_shard_identity]: 0.00013756 [virtual_dataset]: 0.00013012 [get_grad_eliminate_]: 0.00012691 [virtual_output]: 0.00012875 [merge_forward]: 9.387e-05 [cell_reuse_recompute_pass]: 2.50002e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024479 [before_grad]: 0.00022868 [inplace_validation]: 8.893e-05 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 5.00004e-07 [cast_eliminate]: 0.00014648 [meta_fg_expand]: 0.00011309 [inplace_validation_after_expand]: 0.00011724 [flash_sp_send_recv_attached]: 1.91992e-06 [receive_attached]: 1.64995e-06 [after_resolve]: 0.00014488 [a_after_grad]: 0.00021669 [special_op_eliminate]: 0.00012963 [renormalize]: 1.09896e-07 [add_forward_monad_depend]: 2.31003e-06 [auto_monad_grad]: 1.81992e-06 [auto_monad_eliminator]: 0.00017337 [cse]: 0.00044518 [a_3]: 0.00092995 [py_interpret_to_execute_after_opt_a]: 0.00014366 [slice_cell_reuse_recomputed_activation]: 2.2701e-06 [rewriter_after_opt_a]: 0.00107469 [convert_after_rewriter]: 0.00011379 [order_py_execute_after_rewriter]: 8.056e-05 [opt_b]: 0.00400746, [1] [Cycle 1]: 0.00399896, [7] [b_1]: 0.00311754 [b_2]: 0.00013817 [updatestate_depend_eliminate]: 9.79699e-05 [updatestate_assign_eliminate]: 8.804e-05 [updatestate_loads_eliminate]: 8.99399e-05 [renormalize]: 5.60074e-07 [cse]: 0.00040746 [optimize_parallel_all_gather_comm]: 0.00014268 [overlap_param_gather]: 1.13004e-06 [cconv]: 7.001e-05 [loop_unroll]: 0.00098721 [opt_after_cconv]: 0.00158007, [1] [Cycle 1]: 0.0015723, [7] [c_1]: 0.00078209 [parameter_eliminate]: 3.12005e-06 [updatestate_depend_eliminate]: 0.00013416 [updatestate_assign_eliminate]: 9.53e-05 [updatestate_loads_eliminate]: 9.39e-05 [cse]: 0.00040708 [renormalize]: 5.69969e-07 [remove_dup_value]: 0.00061823 [tuple_transform]: 0.00094946, [1] [Cycle 1]: 0.00094138, [2] [d_1]: 0.00092374 [renormalize]: 6.59958e-07 [partial_unused_args_eliminate]: 3.39001e-06 [add_cache_embedding]: 0.00015664 [add_recomputation]: 0.00075081 [cse_after_recomputation]: 0.00032408, [1] [Cycle 1]: 0.00031498, [1] [cse]: 0.00030046 [environ_conv]: 0.00010126 [swap_dp_allreduce_reducescatter]: 0.00013121 [bias_add_comm_swap]: 3.1899e-06 [label_micro_interleaved_index]: 2.58e-06 [label_fine_grained_interleaved_index]: 0.00053088 [merge_cast_opt]: 1.83005e-06 [slice_recompute_activation]: 0.0001537 [micro_interleaved_order_control]: 2.25997e-06 [assign_add_opt]: 0.00039794 [ForceFp32Comm]: 1.53005e-06 [remove_cast_before_assign_add]: 0.00010804 [full_micro_interleaved_order_control]: 2.14006e-06 [reorder_send_recv_between_fp_bp]: 1.63005e-06 [comm_op_add_attrs]: 0.00015335 [add_comm_op_reuse_tag]: 0.00014829 [interleave_split_concat_branches]: 8.79983e-07 [interleave_parallel_branches]: 8.19913e-07 [overlap_opt_shard_in_pipeline]: 3.014e-05 [overlap_opt_shard_grad_in_pipeline]: 3.24997e-06 [control_data_broadcast_order]: 1.05007e-06 [grouped_pairwise_exchange_alltoall]: 1.10901e-05 [offloading_packed_experts]: 2.32994e-06 [overlap_recompute_and_grad_model_parallel]: 2.63995e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.79983e-07 [overlap_recompute_allgather_and_fa_grad]: 0.00010732 [overlap_grad_ring_attention]: 0.00014713 [overlap_grad_flash_sp]: 0.00012041 [begin_end_overlap_inline]: 1.07998e-06 [split_matmul_comm_elemetwise]: 2.24006e-06 [split_layernorm_comm]: 2.04006e-06 [handle_group_info]: 5.68002e-06 [symbol_engine_optimizer]: 0.00081197, [1] [Cycle 1]: 0.0008048, [6] [build]: 5.843e-05 [elim_shapecalc]: 0.00014781 [elim_not_effective]: 0.0002221 [opt_reshape]: 0.00012898 [fold_const_symbol]: 0.00020915 [renormalize]: 4.89992e-07 [pipeline_parallel_scheduler]: 3.64007e-06 [auto_monad_reorder]: 0.00034395 [get_jit_bprop_graph]: 5.40051e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00144979 [distribtued_split]: 0.00038604 [validate]: 0.00029605 [task_emit]: 12.8685 [execute]: 1.212e-05 Sums bootstrap : 0.001364s : 0.01% type_inference : 0.761799s : 5.34% auto_monad : 0.001887s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001544s : 0.01% inline.a1a2.expand_dump_flag : 0.000036s : 0.00% inline.a1a2.switch_simplify : 0.001211s : 0.01% inline.a1a2.loop_unroll : 0.000789s : 0.01% inline.a1a2.a_1 : 0.025322s : 0.18% inline.a1a2.recompute_prepare : 0.000269s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000628s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000166s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000276s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : 0.004940s : 0.03% inline.a1a2.parallel_inline_pass : 0.000208s : 0.00% parallel-infer-symbol : 0.000192s : 0.00% pre_auto_parallel : 0.000113s : 0.00% insert-virtual-dataset : 0.001419s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000125s : 0.00% pipeline_split : 0.000126s : 0.00% optimize.py_interpret_to_execute : 0.000150s : 0.00% optimize.rewriter_before_opt_a : 0.000289s : 0.00% optimize.opt_a.expand_dump_flag : 0.000066s : 0.00% optimize.opt_a.switch_simplify : 0.002181s : 0.02% optimize.opt_a.loop_unroll : 0.001755s : 0.01% optimize.opt_a.a_1 : 0.039284s : 0.28% optimize.opt_a.recompute_prepare : 0.000418s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000499s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000270s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000329s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.008141s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000635s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000192s : 0.00% optimize.opt_a.shard_inline : 0.000384s : 0.00% optimize.opt_a.auto_parallel : 0.000361s : 0.00% optimize.opt_a.parallel : 0.016075s : 0.11% optimize.opt_a.flash_sp : 0.000187s : 0.00% optimize.opt_a.merge_comm : 0.000389s : 0.00% optimize.opt_a.allreduce_fusion : 0.000264s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000339s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000410s : 0.00% optimize.opt_a.virtual_dataset : 0.000431s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000373s : 0.00% optimize.opt_a.virtual_output : 0.000375s : 0.00% optimize.opt_a.merge_forward : 0.000266s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000007s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000703s : 0.00% optimize.opt_a.before_grad : 0.000662s : 0.00% optimize.opt_a.inplace_validation : 0.000295s : 0.00% optimize.opt_a.parallel_renormalize : 0.021474s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000473s : 0.00% optimize.opt_a.meta_fg_expand : 0.266850s : 1.87% optimize.opt_a.inplace_validation_after_expand : 0.001880s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001225s : 0.01% optimize.opt_a.receive_attached : 0.000082s : 0.00% optimize.opt_a.after_resolve : 0.002347s : 0.02% optimize.opt_a.a_after_grad : 0.004337s : 0.03% optimize.opt_a.special_op_eliminate : 0.002124s : 0.01% optimize.opt_a.renormalize : 0.170842s : 1.20% optimize.opt_a.add_forward_monad_depend : 0.000372s : 0.00% optimize.opt_a.auto_monad_grad : 0.000225s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002310s : 0.02% optimize.opt_a.cse : 0.011464s : 0.08% optimize.opt_a.a_3 : 0.026795s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000144s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001075s : 0.01% optimize.convert_after_rewriter : 0.000114s : 0.00% optimize.order_py_execute_after_rewriter : 0.000081s : 0.00% optimize.opt_b.b_1 : 0.003118s : 0.02% optimize.opt_b.b_2 : 0.000138s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000098s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000088s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000407s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000143s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000070s : 0.00% optimize.loop_unroll : 0.000987s : 0.01% optimize.opt_after_cconv.c_1 : 0.000782s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000134s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000095s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.cse : 0.000407s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000618s : 0.00% optimize.tuple_transform.d_1 : 0.000924s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000157s : 0.00% optimize.add_recomputation : 0.000751s : 0.01% optimize.cse_after_recomputation.cse : 0.000300s : 0.00% optimize.environ_conv : 0.000101s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000131s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000003s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000531s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000154s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000398s : 0.00% optimize.ForceFp32Comm : 0.000002s : 0.00% optimize.remove_cast_before_assign_add : 0.000108s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000153s : 0.00% optimize.add_comm_op_reuse_tag : 0.000148s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000030s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000003s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000107s : 0.00% optimize.overlap_grad_ring_attention : 0.000147s : 0.00% optimize.overlap_grad_flash_sp : 0.000120s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000058s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000148s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000222s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000129s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000209s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000344s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001450s : 0.01% distribtued_split : 0.000386s : 0.00% validate : 0.000296s : 0.00% task_emit : 12.868505s : 90.14% execute : 0.000012s : 0.00% Time group info: ------[substitution.] 0.049539 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.11% : 0.000052s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.67% : 0.000330s : 71: substitution.arithmetic_simplify 0.11% : 0.000055s : 10: substitution.cast_eliminate 0.11% : 0.000055s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000026s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.06% : 0.000030s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000019s : 23: substitution.float_depend_g_call 0.02% : 0.000012s : 12: substitution.float_environ_get_switch 0.03% : 0.000014s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 63.66% : 0.031538s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000085s : 126: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000006s : 8: substitution.incorporate_call_switch 24.45% : 0.012112s : 331: substitution.inline 1.42% : 0.000702s : 112: substitution.inline_without_move 0.25% : 0.000123s : 309: substitution.j_node_and_user_rematch 0.37% : 0.000184s : 40: substitution.less_batch_normalization 0.09% : 0.000046s : 90: substitution.load_eliminater 0.11% : 0.000053s : 10: substitution.merge_addn 0.25% : 0.000121s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.08% : 0.000037s : 1: substitution.partial_defer_inline 0.15% : 0.000074s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000033s : 15: substitution.reduce_eliminate 0.32% : 0.000156s : 309: substitution.remove_not_recompute_node 2.05% : 0.001016s : 508: substitution.replace_applicator 0.22% : 0.000110s : 251: substitution.replace_old_param 0.08% : 0.000038s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000012s : 4: substitution.specialize_transform 0.04% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000085s : 34: substitution.switch_simplify 0.06% : 0.000030s : 11: substitution.tile_eliminate 0.52% : 0.000259s : 101: substitution.tuple_list_convert_item_index_to_positive 0.28% : 0.000139s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000215s : 107: substitution.tuple_list_get_item_depend_reorder 1.60% : 0.000790s : 308: substitution.tuple_list_get_item_eliminator 0.38% : 0.000189s : 107: substitution.tuple_list_get_set_item_eliminator 0.40% : 0.000200s : 210: substitution.updatestate_pure_node_eliminater 0.71% : 0.000350s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.761320 2 96.61% : 0.735500s : 1: type_inference.infer 3.39% : 0.025819s : 1: type_inference.specialize ------[replace.] 0.010150 775 0.43% : 0.000044s : 5: replace.ad_related_special_op_eliminate 0.06% : 0.000006s : 1: replace.arithmetic_simplify 0.53% : 0.000054s : 7: replace.depend_value_elim 0.43% : 0.000043s : 3: replace.environ_get_set_eliminate 30.30% : 0.003076s : 183: replace.getattr_setattr_resolve 29.56% : 0.003000s : 310: replace.inline 0.22% : 0.000022s : 1: replace.merge_addn 1.14% : 0.000116s : 7: replace.partial_eliminate 4.08% : 0.000414s : 25: replace.replace_applicator 3.93% : 0.000398s : 34: replace.switch_simplify 0.51% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 28.48% : 0.002890s : 191: replace.tuple_list_get_item_eliminator 0.17% : 0.000017s : 1: replace.updatestate_useless_node_eliminater 0.16% : 0.000017s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041973 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000011s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 69.89% : 0.029335s : 183: match.getattr_setattr_resolve 28.33% : 0.011890s : 310: match.inline 0.06% : 0.000025s : 1: match.merge_addn 0.09% : 0.000037s : 7: match.partial_eliminate 0.24% : 0.000100s : 25: match.replace_applicator 0.15% : 0.000064s : 34: match.switch_simplify 0.07% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 1.01% : 0.000424s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.021058131318 0.75% : 0.000158s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000057s : 254: predicate.ad_related_special_op_eliminate 0.55% : 0.000116s : 835: predicate.addn_check_dump 0.77% : 0.000162s : 1198: predicate.addn_zero_filter 0.75% : 0.000158s : 1198: predicate.adjust_all_reduce_mul_add 1.80% : 0.000379s : 2034: predicate.arithmetic_simplify 1.12% : 0.000235s : 1586: predicate.cast_eliminate 3.21% : 0.000677s : 3484: predicate.check_bprop_eliminate 0.56% : 0.000119s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.14% : 0.000241s : 1399: predicate.convert_tensor_eliminate 0.56% : 0.000119s : 838: predicate.depend_value_elim 0.80% : 0.000168s : 1202: predicate.dict_get_item_const_eliminator 0.89% : 0.000188s : 1202: predicate.dict_get_item_eliminator 0.81% : 0.000170s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000010s : 126: predicate.elim_not_effective 0.11% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.83% : 0.000176s : 1334: predicate.environ_add_const_eliminate 0.83% : 0.000175s : 1337: predicate.environ_get_add_eliminate 0.83% : 0.000175s : 1334: predicate.environ_get_depend_swap 1.43% : 0.000302s : 2172: predicate.environ_get_eliminate 0.82% : 0.000174s : 1337: predicate.environ_get_set_eliminate 1.36% : 0.000286s : 1717: predicate.exchange_switch_depend_value 1.44% : 0.000302s : 1717: predicate.float_depend_g_call 0.56% : 0.000117s : 835: predicate.float_environ_get_switch 0.65% : 0.000136s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.29% : 0.000060s : 395: predicate.get_grad_eliminate 2.39% : 0.000503s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000115s : 835: predicate.incorporate_call 0.54% : 0.000113s : 835: predicate.incorporate_call_switch 3.85% : 0.000811s : 4602: predicate.inline 2.33% : 0.000490s : 2203: predicate.inline_without_move 0.14% : 0.000029s : 395: predicate.j_node_and_user_rematch 0.34% : 0.000072s : 388: predicate.less_batch_normalization 1.11% : 0.000234s : 1660: predicate.list_to_tuple_eliminator_ 1.85% : 0.000390s : 2874: predicate.load_eliminater 0.28% : 0.000059s : 135: predicate.loop_unroll_after_grad 2.49% : 0.000524s : 2640: predicate.loop_unroll_before_grad 1.02% : 0.000215s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000118s : 837: predicate.merge_addn 3.09% : 0.000651s : 3380: predicate.micro_step_allgather_replace 3.11% : 0.000655s : 3380: predicate.mini_step_allgather_replace 0.76% : 0.000160s : 1199: predicate.minmaximum_grad 0.17% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.11% : 0.000024s : 135: predicate.parallel_virtual_node 2.10% : 0.000442s : 1717: predicate.partial_defer_inline 1.10% : 0.000231s : 1541: predicate.partial_eliminate 0.78% : 0.000164s : 1198: predicate.print_const_string_wrapper 0.55% : 0.000116s : 824: predicate.reduce_all_const_elim 1.01% : 0.000213s : 1199: predicate.reduce_eliminate 0.14% : 0.000029s : 395: predicate.remove_not_recompute_node 2.07% : 0.000436s : 4829: predicate.replace_applicator 1.06% : 0.000222s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.78% : 0.000165s : 1199: predicate.reshape_eliminate 3.17% : 0.000667s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.31% : 0.000697s : 3484: predicate.same_eliminate 0.24% : 0.000050s : 633: predicate.set_cell_output_no_recompute 0.31% : 0.000064s : 395: predicate.shard_identity_eliminate 2.13% : 0.000448s : 2338: predicate.special_op_eliminate 0.63% : 0.000132s : 837: predicate.specialize_transform 3.45% : 0.000726s : 3380: predicate.split_environ_get_set_with_tuple_value 1.56% : 0.000328s : 2203: predicate.stack_unstack_eliminate 1.83% : 0.000386s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.23% : 0.000259s : 1717: predicate.switch_defer_inline 4.38% : 0.000922s : 5201: predicate.switch_layer_defer_inline 4.34% : 0.000913s : 5262: predicate.switch_simplify 0.80% : 0.000169s : 1199: predicate.tile_eliminate 0.75% : 0.000157s : 1199: predicate.transpose_eliminate 1.03% : 0.000217s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.05% : 0.000220s : 1469: predicate.tuple_list_get_item_const_eliminator 0.93% : 0.000196s : 1469: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000396s : 2495: predicate.tuple_list_get_item_eliminator 1.00% : 0.000210s : 1469: predicate.tuple_list_get_set_item_eliminator 1.64% : 0.000346s : 2304: predicate.tuple_list_set_item_eliminator 1.06% : 0.000224s : 1660: predicate.tuple_to_list_eliminator_ 1.87% : 0.000394s : 2874: predicate.updatestate_pure_node_eliminater 2.57% : 0.000541s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.29% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000058s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.063433 747 71.00% : 0.045037s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.31% : 0.001463s : 22: func_graph_cloner_run.FuncGraphClonerNode 26.70% : 0.016934s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.295356 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041752s : 1: a1a2 0.00% : 0.000164s : 1: add_cache_embedding 0.00% : 0.000155s : 1: add_comm_op_reuse_tag 0.01% : 0.000765s : 1: add_recomputation 0.00% : 0.000407s : 1: assign_add_opt 0.01% : 0.001911s : 1: auto_monad 0.00% : 0.000358s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001416s : 1: bootstrap 0.00% : 0.000077s : 1: cconv 0.00% : 0.000160s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000121s : 1: convert_after_rewriter 0.00% : 0.000329s : 1: cse_after_recomputation 0.00% : 0.000134s : 1: dataset_repeat_opt 0.00% : 0.000402s : 1: distribtued_split 0.01% : 0.001466s : 1: eliminate_special_op_node 0.00% : 0.000111s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000009s : 1: handle_group_info 0.28% : 0.043376s : 1: inline 0.01% : 0.001444s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000541s : 1: label_fine_grained_interleaved_index 0.00% : 0.000006s : 1: label_micro_interleaved_index 0.01% : 0.000999s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032662s : 61: opt.transform.a1a2 0.00% : 0.000200s : 1: opt.transform.loop_unroll_optimizer 0.60% : 0.091578s : 148: opt.transform.opt_a 0.01% : 0.000779s : 1: opt.transform.opt_after_cconv 0.02% : 0.003225s : 27: opt.transform.opt_b 0.25% : 0.037593s : 16: opt.transform.opt_resolve 0.01% : 0.000920s : 1: opt.transform.opt_trans_graph 0.01% : 0.000834s : 6: opt.transform.special_op_eliminate 0.00% : 0.000702s : 4: opt.transform.symbol_engine_opt 3.85% : 0.589426s : 1: opt_a 0.01% : 0.001586s : 1: opt_after_cconv 0.03% : 0.004012s : 1: opt_b 3.95% : 0.604300s : 1: optimize 0.00% : 0.000151s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000086s : 1: order_py_execute_after_rewriter 0.00% : 0.000125s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000152s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000035s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000113s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000206s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000136s : 1: pipeline_split 0.00% : 0.000124s : 1: pre_auto_parallel 0.00% : 0.000160s : 1: py_interpret_to_execute 0.00% : 0.000151s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000113s : 1: remove_cast_before_assign_add 0.00% : 0.000633s : 1: remove_dup_value 0.91% : 0.138538s : 3: renormalize.infer 0.35% : 0.053734s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001085s : 1: rewriter_after_opt_a 0.01% : 0.001858s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000160s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000138s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000816s : 1: symbol_engine_optimizer 84.13% : 12.868553s : 1: task_emit 0.01% : 0.000954s : 1: tuple_transform 4.98% : 0.761832s : 1: type_inference 0.01% : 0.001383s : 1: validate TotalTime = 14.4241, [21] [bootstrap]: 0.00132267 [type_inference]: 0.762478 [auto_monad]: 0.00193893 [graph_reusing]: 2.54899e-05 [inline]: 0.044259, [2] [rewriter_before_opt_a]: 0.00157535 [a1a2]: 0.0426443, [2] [Cycle 1]: 0.0294763, [11] [expand_dump_flag]: 6.852e-05 [switch_simplify]: 0.00114262 [loop_unroll]: 0.00069439 [a_1]: 0.0229651 [recompute_prepare]: 0.00018737 [updatestate_depend_eliminate]: 0.00040748 [updatestate_assign_eliminate]: 0.00010333 [updatestate_loads_eliminate]: 0.00021867 [parameter_eliminate]: 6.42997e-06 [a_2]: 0.00338731 [parallel_inline_pass]: 0.00010422 [Cycle 2]: 0.00547427, [11] [expand_dump_flag]: 1.35996e-06 [switch_simplify]: 9.457e-05 [loop_unroll]: 9.23199e-05 [a_1]: 0.0031815 [recompute_prepare]: 0.0001013 [updatestate_depend_eliminate]: 0.00020789 [updatestate_assign_eliminate]: 6.613e-05 [updatestate_loads_eliminate]: 6.3e-05 [parameter_eliminate]: 3.10992e-06 [a_2]: 0.00148551 [parallel_inline_pass]: 0.00010101 [parallel-infer-symbol]: 0.00019006 [pre_auto_parallel]: 9.158e-05 [insert-virtual-dataset]: 0.00130603 [parallel-infer-symbol-second]: 2.55997e-06 [dataset_repeat_opt]: 0.0001041 [pipeline_split]: 0.00010782 [optimize]: 0.615891, [52] [py_interpret_to_execute]: 0.0001445 [rewriter_before_opt_a]: 0.0002819 [opt_a]: 0.600912, [3] [Cycle 1]: 0.517016, [46] [expand_dump_flag]: 1.74006e-06 [switch_simplify]: 0.00010868 [loop_unroll]: 9.619e-05 [a_1]: 0.00332977 [recompute_prepare]: 0.00010574 [updatestate_depend_eliminate]: 0.00010681 [updatestate_assign_eliminate]: 6.475e-05 [updatestate_loads_eliminate]: 6.559e-05 [parameter_eliminate]: 3.26999e-06 [a_2]: 0.00153725 [accelerated_algorithm]: 0.0002368 [shard]: 2.25997e-06 [meta_shard_fg_expand]: 5.083e-05 [shard_inline]: 0.00010595 [auto_parallel]: 8.063e-05 [parallel]: 0.0148035 [flash_sp]: 7.18901e-05 [merge_comm]: 0.00012911 [allreduce_fusion]: 7.45e-05 [matmul_add_comm_reduction]: 9.855e-05 [allreduce_slice_to_reducescatter]: 4.70085e-07 [virtual_shard_identity]: 0.00012308 [virtual_dataset]: 0.00016284 [get_grad_eliminate_]: 0.00011513 [virtual_output]: 0.00011129 [merge_forward]: 7.731e-05 [cell_reuse_recompute_pass]: 2.7999e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020498 [before_grad]: 0.00019296 [inplace_validation]: 0.0001207 [parallel_renormalize]: 0.0254264 [update_top_fg]: 1.02003e-06 [cast_eliminate]: 0.00014234 [meta_fg_expand]: 0.270096 [inplace_validation_after_expand]: 0.00166725 [flash_sp_send_recv_attached]: 0.0012592 [receive_attached]: 8.18401e-05 [after_resolve]: 0.00466705 [a_after_grad]: 0.00422398 [special_op_eliminate]: 0.00200419 [renormalize]: 0.150077 [add_forward_monad_depend]: 0.00036804 [auto_monad_grad]: 0.00238347 [auto_monad_eliminator]: 0.00208578 [cse]: 0.00434307 [a_3]: 0.0254343 [Cycle 2]: 0.0720147, [46] [expand_dump_flag]: 5.882e-05 [switch_simplify]: 0.0018916 [loop_unroll]: 0.00154827 [a_1]: 0.0318181 [recompute_prepare]: 0.00017696 [updatestate_depend_eliminate]: 0.00023585 [updatestate_assign_eliminate]: 0.00010603 [updatestate_loads_eliminate]: 0.00016785 [parameter_eliminate]: 4.59002e-06 [a_2]: 0.004319 [accelerated_algorithm]: 0.00016649 [shard]: 2.21003e-06 [meta_shard_fg_expand]: 9.019e-05 [shard_inline]: 0.00013965 [auto_parallel]: 0.00012356 [parallel]: 1.453e-05 [flash_sp]: 0.00012807 [merge_comm]: 0.00011953 [allreduce_fusion]: 9.22399e-05 [matmul_add_comm_reduction]: 0.00011568 [allreduce_slice_to_reducescatter]: 5.50062e-07 [virtual_shard_identity]: 0.00014521 [virtual_dataset]: 0.00013912 [get_grad_eliminate_]: 0.00013178 [virtual_output]: 0.00013471 [merge_forward]: 9.64e-05 [cell_reuse_recompute_pass]: 2.44007e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025077 [before_grad]: 0.00023778 [inplace_validation]: 8.59699e-05 [parallel_renormalize]: 6.00703e-08 [update_top_fg]: 7.60076e-07 [cast_eliminate]: 0.00015262 [meta_fg_expand]: 0.00029358 [inplace_validation_after_expand]: 0.00018098 [flash_sp_send_recv_attached]: 1.90001e-06 [receive_attached]: 2.23995e-06 [after_resolve]: 0.00016075 [a_after_grad]: 0.00022666 [special_op_eliminate]: 0.0001338 [renormalize]: 0.0193078 [add_forward_monad_depend]: 4.25999e-06 [auto_monad_grad]: 2.14996e-06 [auto_monad_eliminator]: 0.0002801 [cse]: 0.00729064 [a_3]: 0.00096167 [Cycle 3]: 0.0118566, [46] [expand_dump_flag]: 3.22994e-06 [switch_simplify]: 0.00013113 [loop_unroll]: 0.00012802 [a_1]: 0.00429902 [recompute_prepare]: 0.00013609 [updatestate_depend_eliminate]: 0.00015908 [updatestate_assign_eliminate]: 9.465e-05 [updatestate_loads_eliminate]: 9.196e-05 [parameter_eliminate]: 3.00992e-06 [a_2]: 0.00206662 [accelerated_algorithm]: 0.00015554 [shard]: 1.51002e-06 [meta_shard_fg_expand]: 5.802e-05 [shard_inline]: 0.00013316 [auto_parallel]: 0.00011332 [parallel]: 1.082e-05 [flash_sp]: 2.64996e-06 [merge_comm]: 0.00010748 [allreduce_fusion]: 9.537e-05 [matmul_add_comm_reduction]: 0.00011988 [allreduce_slice_to_reducescatter]: 4.69969e-07 [virtual_shard_identity]: 0.0001378 [virtual_dataset]: 0.00013144 [get_grad_eliminate_]: 0.00012575 [virtual_output]: 0.00012872 [merge_forward]: 9.46401e-05 [cell_reuse_recompute_pass]: 2.73995e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024671 [before_grad]: 0.00022885 [inplace_validation]: 8.872e-05 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 6.00005e-07 [cast_eliminate]: 0.00014709 [meta_fg_expand]: 0.00011357 [inplace_validation_after_expand]: 0.00015971 [flash_sp_send_recv_attached]: 2.04006e-06 [receive_attached]: 1.56998e-06 [after_resolve]: 0.00015033 [a_after_grad]: 0.00021802 [special_op_eliminate]: 0.00012971 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 3.00992e-06 [auto_monad_grad]: 2.17999e-06 [auto_monad_eliminator]: 0.000179 [cse]: 0.00042472 [a_3]: 0.00093061 [py_interpret_to_execute_after_opt_a]: 0.00015047 [slice_cell_reuse_recomputed_activation]: 2.54007e-06 [rewriter_after_opt_a]: 0.00104816 [convert_after_rewriter]: 0.00011689 [order_py_execute_after_rewriter]: 8.246e-05 [opt_b]: 0.00395896, [1] [Cycle 1]: 0.00394692, [7] [b_1]: 0.00305462 [b_2]: 0.00013649 [updatestate_depend_eliminate]: 0.0001008 [updatestate_assign_eliminate]: 8.776e-05 [updatestate_loads_eliminate]: 9.141e-05 [renormalize]: 6.89994e-07 [cse]: 0.0004171 [optimize_parallel_all_gather_comm]: 0.00014364 [overlap_param_gather]: 2.61003e-06 [cconv]: 7.551e-05 [loop_unroll]: 0.00107334 [opt_after_cconv]: 0.00159198, [1] [Cycle 1]: 0.00158404, [7] [c_1]: 0.00079375 [parameter_eliminate]: 2.88e-06 [updatestate_depend_eliminate]: 0.00013728 [updatestate_assign_eliminate]: 9.425e-05 [updatestate_loads_eliminate]: 9.242e-05 [cse]: 0.00040596 [renormalize]: 6.20028e-07 [remove_dup_value]: 0.0006438 [tuple_transform]: 0.00099818, [1] [Cycle 1]: 0.00099056, [2] [d_1]: 0.00096997 [renormalize]: 5.30039e-07 [partial_unused_args_eliminate]: 3.54997e-06 [add_cache_embedding]: 0.00016245 [add_recomputation]: 0.00073667 [cse_after_recomputation]: 0.00031667, [1] [Cycle 1]: 0.00030876, [1] [cse]: 0.00029722 [environ_conv]: 9.93899e-05 [swap_dp_allreduce_reducescatter]: 0.00013357 [bias_add_comm_swap]: 3.12005e-06 [label_micro_interleaved_index]: 2.07999e-06 [label_fine_grained_interleaved_index]: 0.00053576 [merge_cast_opt]: 1.79e-06 [slice_recompute_activation]: 0.00014876 [micro_interleaved_order_control]: 2.45997e-06 [assign_add_opt]: 0.00040538 [ForceFp32Comm]: 1.31002e-06 [remove_cast_before_assign_add]: 0.00010912 [full_micro_interleaved_order_control]: 2.49001e-06 [reorder_send_recv_between_fp_bp]: 2.11003e-06 [comm_op_add_attrs]: 0.00015444 [add_comm_op_reuse_tag]: 0.00014679 [interleave_split_concat_branches]: 1.04005e-06 [interleave_parallel_branches]: 8.60076e-07 [overlap_opt_shard_in_pipeline]: 9.25001e-06 [overlap_opt_shard_grad_in_pipeline]: 3.99002e-06 [control_data_broadcast_order]: 1.21992e-06 [grouped_pairwise_exchange_alltoall]: 1.10699e-05 [offloading_packed_experts]: 2.44996e-06 [overlap_recompute_and_grad_model_parallel]: 2.22004e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.49948e-07 [overlap_recompute_allgather_and_fa_grad]: 0.00012269 [overlap_grad_ring_attention]: 0.0001455 [overlap_grad_flash_sp]: 0.0001213 [begin_end_overlap_inline]: 1.13004e-06 [split_matmul_comm_elemetwise]: 2.23005e-06 [split_layernorm_comm]: 2.11003e-06 [handle_group_info]: 8.09005e-06 [symbol_engine_optimizer]: 0.00087328, [1] [Cycle 1]: 0.00086631, [6] [build]: 5.566e-05 [elim_shapecalc]: 0.00021025 [elim_not_effective]: 0.00022143 [opt_reshape]: 0.00013007 [fold_const_symbol]: 0.00021051 [renormalize]: 5.29923e-07 [pipeline_parallel_scheduler]: 3.84997e-06 [auto_monad_reorder]: 0.0003127 [get_jit_bprop_graph]: 5.59958e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00144068 [distribtued_split]: 0.00036627 [validate]: 0.00029246 [task_emit]: 12.9925 [execute]: 1.32299e-05 Sums bootstrap : 0.001323s : 0.01% type_inference : 0.762478s : 5.29% auto_monad : 0.001939s : 0.01% graph_reusing : 0.000025s : 0.00% inline.rewriter_before_opt_a : 0.001575s : 0.01% inline.a1a2.expand_dump_flag : 0.000070s : 0.00% inline.a1a2.switch_simplify : 0.001237s : 0.01% inline.a1a2.loop_unroll : 0.000787s : 0.01% inline.a1a2.a_1 : 0.026147s : 0.18% inline.a1a2.recompute_prepare : 0.000289s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000615s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000169s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000282s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : 0.004873s : 0.03% inline.a1a2.parallel_inline_pass : 0.000205s : 0.00% parallel-infer-symbol : 0.000190s : 0.00% pre_auto_parallel : 0.000092s : 0.00% insert-virtual-dataset : 0.001306s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000104s : 0.00% pipeline_split : 0.000108s : 0.00% optimize.py_interpret_to_execute : 0.000145s : 0.00% optimize.rewriter_before_opt_a : 0.000282s : 0.00% optimize.opt_a.expand_dump_flag : 0.000064s : 0.00% optimize.opt_a.switch_simplify : 0.002131s : 0.01% optimize.opt_a.loop_unroll : 0.001772s : 0.01% optimize.opt_a.a_1 : 0.039447s : 0.27% optimize.opt_a.recompute_prepare : 0.000419s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000502s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000265s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000325s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.007923s : 0.05% optimize.opt_a.accelerated_algorithm : 0.000559s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000199s : 0.00% optimize.opt_a.shard_inline : 0.000379s : 0.00% optimize.opt_a.auto_parallel : 0.000318s : 0.00% optimize.opt_a.parallel : 0.014829s : 0.10% optimize.opt_a.flash_sp : 0.000203s : 0.00% optimize.opt_a.merge_comm : 0.000356s : 0.00% optimize.opt_a.allreduce_fusion : 0.000262s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000334s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000406s : 0.00% optimize.opt_a.virtual_dataset : 0.000433s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000373s : 0.00% optimize.opt_a.virtual_output : 0.000375s : 0.00% optimize.opt_a.merge_forward : 0.000268s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000702s : 0.00% optimize.opt_a.before_grad : 0.000660s : 0.00% optimize.opt_a.inplace_validation : 0.000295s : 0.00% optimize.opt_a.parallel_renormalize : 0.025427s : 0.18% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000442s : 0.00% optimize.opt_a.meta_fg_expand : 0.270503s : 1.88% optimize.opt_a.inplace_validation_after_expand : 0.002008s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001263s : 0.01% optimize.opt_a.receive_attached : 0.000086s : 0.00% optimize.opt_a.after_resolve : 0.004978s : 0.03% optimize.opt_a.a_after_grad : 0.004669s : 0.03% optimize.opt_a.special_op_eliminate : 0.002268s : 0.02% optimize.opt_a.renormalize : 0.169385s : 1.18% optimize.opt_a.add_forward_monad_depend : 0.000375s : 0.00% optimize.opt_a.auto_monad_grad : 0.002388s : 0.02% optimize.opt_a.auto_monad_eliminator : 0.002545s : 0.02% optimize.opt_a.cse : 0.012058s : 0.08% optimize.opt_a.a_3 : 0.027327s : 0.19% optimize.py_interpret_to_execute_after_opt_a : 0.000150s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001048s : 0.01% optimize.convert_after_rewriter : 0.000117s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.003055s : 0.02% optimize.opt_b.b_2 : 0.000136s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000101s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000088s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000417s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000144s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000076s : 0.00% optimize.loop_unroll : 0.001073s : 0.01% optimize.opt_after_cconv.c_1 : 0.000794s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000137s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.cse : 0.000406s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000644s : 0.00% optimize.tuple_transform.d_1 : 0.000970s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000004s : 0.00% optimize.add_cache_embedding : 0.000162s : 0.00% optimize.add_recomputation : 0.000737s : 0.01% optimize.cse_after_recomputation.cse : 0.000297s : 0.00% optimize.environ_conv : 0.000099s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000134s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000536s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000149s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000405s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000109s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000154s : 0.00% optimize.add_comm_op_reuse_tag : 0.000147s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000009s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000123s : 0.00% optimize.overlap_grad_ring_attention : 0.000145s : 0.00% optimize.overlap_grad_flash_sp : 0.000121s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000008s : 0.00% optimize.symbol_engine_optimizer.build : 0.000056s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000210s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000221s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000130s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000211s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000313s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001441s : 0.01% distribtued_split : 0.000366s : 0.00% validate : 0.000292s : 0.00% task_emit : 12.992464s : 90.15% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.052829 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000023s : 9: substitution.addn_check_dump 0.11% : 0.000057s : 7: substitution.addn_zero_filter 0.03% : 0.000017s : 7: substitution.adjust_all_reduce_mul_add 0.62% : 0.000328s : 71: substitution.arithmetic_simplify 0.11% : 0.000056s : 10: substitution.cast_eliminate 0.10% : 0.000055s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000026s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000021s : 12: substitution.environ_get_depend_swap 0.06% : 0.000031s : 27: substitution.environ_get_eliminate 0.07% : 0.000035s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000020s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.02% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 59.98% : 0.031685s : 257: substitution.getattr_setattr_resolve 0.16% : 0.000086s : 126: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.83% : 0.012590s : 331: substitution.inline 1.45% : 0.000765s : 112: substitution.inline_without_move 0.23% : 0.000122s : 309: substitution.j_node_and_user_rematch 0.23% : 0.000122s : 40: substitution.less_batch_normalization 0.09% : 0.000047s : 90: substitution.load_eliminater 0.10% : 0.000055s : 10: substitution.merge_addn 0.23% : 0.000119s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.03% : 0.000015s : 1: substitution.partial_defer_inline 0.14% : 0.000076s : 23: substitution.partial_eliminate 0.03% : 0.000017s : 26: substitution.reduce_all_const_elim 0.06% : 0.000034s : 15: substitution.reduce_eliminate 0.30% : 0.000157s : 309: substitution.remove_not_recompute_node 2.02% : 0.001068s : 508: substitution.replace_applicator 5.10% : 0.002695s : 251: substitution.replace_old_param 0.07% : 0.000040s : 11: substitution.reshape_eliminate 0.02% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000087s : 34: substitution.switch_simplify 0.06% : 0.000031s : 11: substitution.tile_eliminate 0.51% : 0.000269s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000142s : 107: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000220s : 107: substitution.tuple_list_get_item_depend_reorder 1.53% : 0.000806s : 308: substitution.tuple_list_get_item_eliminator 0.36% : 0.000192s : 107: substitution.tuple_list_get_set_item_eliminator 0.38% : 0.000202s : 210: substitution.updatestate_pure_node_eliminater 0.66% : 0.000347s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.761995 2 96.40% : 0.734586s : 1: type_inference.infer 3.60% : 0.027409s : 1: type_inference.specialize ------[replace.] 0.010352 775 0.41% : 0.000043s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.49% : 0.000050s : 7: replace.depend_value_elim 0.41% : 0.000043s : 3: replace.environ_get_set_eliminate 29.72% : 0.003077s : 183: replace.getattr_setattr_resolve 29.66% : 0.003070s : 310: replace.inline 0.20% : 0.000021s : 1: replace.merge_addn 1.16% : 0.000120s : 7: replace.partial_eliminate 4.59% : 0.000476s : 25: replace.replace_applicator 4.02% : 0.000416s : 34: replace.switch_simplify 0.51% : 0.000053s : 6: replace.tuple_list_get_item_depend_reorder 28.40% : 0.002941s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.20% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042610 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000014s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 69.16% : 0.029470s : 183: match.getattr_setattr_resolve 29.03% : 0.012368s : 310: match.inline 0.06% : 0.000026s : 1: match.merge_addn 0.09% : 0.000040s : 7: match.partial_eliminate 0.25% : 0.000106s : 25: match.replace_applicator 0.16% : 0.000067s : 34: match.switch_simplify 0.08% : 0.000033s : 6: match.tuple_list_get_item_depend_reorder 1.01% : 0.000432s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.021301131318 0.75% : 0.000160s : 1198: predicate.accumulaten_eliminater 0.28% : 0.000059s : 254: predicate.ad_related_special_op_eliminate 0.54% : 0.000114s : 835: predicate.addn_check_dump 0.83% : 0.000178s : 1198: predicate.addn_zero_filter 0.72% : 0.000154s : 1198: predicate.adjust_all_reduce_mul_add 1.77% : 0.000377s : 2034: predicate.arithmetic_simplify 1.08% : 0.000230s : 1586: predicate.cast_eliminate 3.25% : 0.000692s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000117s : 835: predicate.compare_switch_simplify 0.06% : 0.000012s : 135: predicate.const_output_eliminate 0.17% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.23% : 0.000261s : 1399: predicate.convert_tensor_eliminate 0.56% : 0.000118s : 838: predicate.depend_value_elim 0.81% : 0.000172s : 1202: predicate.dict_get_item_const_eliminator 0.83% : 0.000178s : 1202: predicate.dict_get_item_eliminator 0.80% : 0.000169s : 1202: predicate.dict_set_item_eliminator 0.04% : 0.000009s : 126: predicate.elim_not_effective 0.41% : 0.000088s : 126: predicate.elim_shapecalc_of_broadcastargs 0.82% : 0.000174s : 1334: predicate.environ_add_const_eliminate 0.85% : 0.000181s : 1337: predicate.environ_get_add_eliminate 0.89% : 0.000189s : 1334: predicate.environ_get_depend_swap 1.41% : 0.000300s : 2172: predicate.environ_get_eliminate 0.84% : 0.000180s : 1337: predicate.environ_get_set_eliminate 1.10% : 0.000234s : 1717: predicate.exchange_switch_depend_value 1.39% : 0.000296s : 1717: predicate.float_depend_g_call 0.54% : 0.000115s : 835: predicate.float_environ_get_switch 0.63% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000060s : 395: predicate.get_grad_eliminate 2.28% : 0.000486s : 1893: predicate.getattr_setattr_resolve 0.25% : 0.000054s : 126: predicate.graph_param_transform 0.53% : 0.000114s : 835: predicate.incorporate_call 0.52% : 0.000111s : 835: predicate.incorporate_call_switch 3.80% : 0.000810s : 4602: predicate.inline 2.71% : 0.000576s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.33% : 0.000070s : 388: predicate.less_batch_normalization 1.11% : 0.000236s : 1660: predicate.list_to_tuple_eliminator_ 1.94% : 0.000413s : 2874: predicate.load_eliminater 0.20% : 0.000043s : 135: predicate.loop_unroll_after_grad 2.44% : 0.000520s : 2640: predicate.loop_unroll_before_grad 0.95% : 0.000203s : 1478: predicate.make_slice_get_slice_eliminator 0.55% : 0.000117s : 837: predicate.merge_addn 3.18% : 0.000677s : 3380: predicate.micro_step_allgather_replace 3.19% : 0.000680s : 3380: predicate.mini_step_allgather_replace 0.75% : 0.000161s : 1199: predicate.minmaximum_grad 0.17% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.11% : 0.000024s : 135: predicate.parallel_virtual_node 2.08% : 0.000444s : 1717: predicate.partial_defer_inline 1.06% : 0.000226s : 1541: predicate.partial_eliminate 0.76% : 0.000161s : 1198: predicate.print_const_string_wrapper 0.55% : 0.000116s : 824: predicate.reduce_all_const_elim 0.94% : 0.000200s : 1199: predicate.reduce_eliminate 0.14% : 0.000030s : 395: predicate.remove_not_recompute_node 1.90% : 0.000406s : 4829: predicate.replace_applicator 0.78% : 0.000166s : 2203: predicate.replace_old_param 0.05% : 0.000011s : 135: predicate.reset_defer_inline 0.77% : 0.000164s : 1199: predicate.reshape_eliminate 3.25% : 0.000692s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000022s : 135: predicate.row_tensor_eliminate 3.34% : 0.000712s : 3484: predicate.same_eliminate 0.23% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000063s : 395: predicate.shard_identity_eliminate 2.32% : 0.000495s : 2338: predicate.special_op_eliminate 0.62% : 0.000132s : 837: predicate.specialize_transform 3.50% : 0.000746s : 3380: predicate.split_environ_get_set_with_tuple_value 1.56% : 0.000331s : 2203: predicate.stack_unstack_eliminate 1.79% : 0.000380s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.20% : 0.000255s : 1717: predicate.switch_defer_inline 4.45% : 0.000948s : 5201: predicate.switch_layer_defer_inline 4.41% : 0.000940s : 5262: predicate.switch_simplify 0.75% : 0.000160s : 1199: predicate.tile_eliminate 0.75% : 0.000160s : 1199: predicate.transpose_eliminate 1.03% : 0.000219s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.05% : 0.000223s : 1469: predicate.tuple_list_get_item_const_eliminator 0.94% : 0.000200s : 1469: predicate.tuple_list_get_item_depend_reorder 1.85% : 0.000394s : 2495: predicate.tuple_list_get_item_eliminator 0.98% : 0.000210s : 1469: predicate.tuple_list_get_set_item_eliminator 1.62% : 0.000346s : 2304: predicate.tuple_list_set_item_eliminator 1.06% : 0.000225s : 1660: predicate.tuple_to_list_eliminator_ 1.84% : 0.000392s : 2874: predicate.updatestate_pure_node_eliminater 2.46% : 0.000524s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000021s : 135: predicate.value_based_eliminate 0.29% : 0.000061s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000058s : 395: predicate.virtual_output_eliminate 0.10% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.060754 747 68.98% : 0.041910s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.50% : 0.001522s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.51% : 0.017322s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.451649 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.28% : 0.042650s : 1: a1a2 0.00% : 0.000169s : 1: add_cache_embedding 0.00% : 0.000153s : 1: add_comm_op_reuse_tag 0.00% : 0.000748s : 1: add_recomputation 0.00% : 0.000413s : 1: assign_add_opt 0.01% : 0.001963s : 1: auto_monad 0.00% : 0.000325s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001381s : 1: bootstrap 0.00% : 0.000082s : 1: cconv 0.00% : 0.000161s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000125s : 1: convert_after_rewriter 0.00% : 0.000321s : 1: cse_after_recomputation 0.00% : 0.000113s : 1: dataset_repeat_opt 0.00% : 0.000379s : 1: distribtued_split 0.01% : 0.001456s : 1: eliminate_special_op_node 0.00% : 0.000107s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000011s : 1: handle_group_info 0.29% : 0.044269s : 1: inline 0.01% : 0.001329s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000543s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.001085s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.22% : 0.033458s : 61: opt.transform.a1a2 0.00% : 0.000181s : 1: opt.transform.loop_unroll_optimizer 0.61% : 0.095008s : 148: opt.transform.opt_a 0.01% : 0.000791s : 1: opt.transform.opt_after_cconv 0.02% : 0.003159s : 27: opt.transform.opt_b 0.24% : 0.037650s : 16: opt.transform.opt_resolve 0.01% : 0.000966s : 1: opt.transform.opt_trans_graph 0.01% : 0.000832s : 6: opt.transform.special_op_eliminate 0.00% : 0.000766s : 4: opt.transform.symbol_engine_opt 3.89% : 0.600924s : 1: opt_a 0.01% : 0.001598s : 1: opt_after_cconv 0.03% : 0.003963s : 1: opt_b 3.99% : 0.615906s : 1: optimize 0.00% : 0.000152s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000126s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000151s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000013s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000128s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000202s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000118s : 1: pipeline_split 0.00% : 0.000101s : 1: pre_auto_parallel 0.00% : 0.000152s : 1: py_interpret_to_execute 0.00% : 0.000160s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000114s : 1: remove_cast_before_assign_add 0.00% : 0.000659s : 1: remove_dup_value 0.90% : 0.139591s : 3: renormalize.infer 0.36% : 0.055173s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001060s : 1: rewriter_after_opt_a 0.01% : 0.001881s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000154s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000140s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000877s : 1: symbol_engine_optimizer 84.08% : 12.992510s : 1: task_emit 0.01% : 0.001003s : 1: tuple_transform 4.93% : 0.762515s : 1: type_inference 0.01% : 0.001388s : 1: validate TotalTime = 14.2999, [21] [bootstrap]: 0.00161838 [type_inference]: 0.758897 [auto_monad]: 0.00207706 [graph_reusing]: 2.68799e-05 [inline]: 0.0429892, [2] [rewriter_before_opt_a]: 0.00148523 [a1a2]: 0.0414337, [2] [Cycle 1]: 0.0282156, [11] [expand_dump_flag]: 3.259e-05 [switch_simplify]: 0.00104197 [loop_unroll]: 0.00066707 [a_1]: 0.0219208 [recompute_prepare]: 0.00015999 [updatestate_depend_eliminate]: 0.00035402 [updatestate_assign_eliminate]: 0.00011193 [updatestate_loads_eliminate]: 0.00019754 [parameter_eliminate]: 5.91995e-06 [a_2]: 0.00341313 [parallel_inline_pass]: 0.00010357 [Cycle 2]: 0.00544375, [11] [expand_dump_flag]: 1.17009e-06 [switch_simplify]: 9.276e-05 [loop_unroll]: 9.14399e-05 [a_1]: 0.00313737 [recompute_prepare]: 9.772e-05 [updatestate_depend_eliminate]: 0.00022324 [updatestate_assign_eliminate]: 6.345e-05 [updatestate_loads_eliminate]: 6.267e-05 [parameter_eliminate]: 3.28e-06 [a_2]: 0.00149518 [parallel_inline_pass]: 0.00010076 [parallel-infer-symbol]: 0.0001997 [pre_auto_parallel]: 9.024e-05 [insert-virtual-dataset]: 0.00132063 [parallel-infer-symbol-second]: 2.49001e-06 [dataset_repeat_opt]: 0.00012075 [pipeline_split]: 0.00010169 [optimize]: 0.587612, [52] [py_interpret_to_execute]: 0.00011868 [rewriter_before_opt_a]: 0.00027329 [opt_a]: 0.573114, [3] [Cycle 1]: 0.494397, [46] [expand_dump_flag]: 1.79e-06 [switch_simplify]: 0.00010993 [loop_unroll]: 9.76501e-05 [a_1]: 0.00327313 [recompute_prepare]: 0.00010341 [updatestate_depend_eliminate]: 0.00010403 [updatestate_assign_eliminate]: 6.154e-05 [updatestate_loads_eliminate]: 6.77999e-05 [parameter_eliminate]: 3.12994e-06 [a_2]: 0.0016213 [accelerated_algorithm]: 0.00029687 [shard]: 2.42994e-06 [meta_shard_fg_expand]: 4.912e-05 [shard_inline]: 0.00010772 [auto_parallel]: 8.168e-05 [parallel]: 0.0156225 [flash_sp]: 7.441e-05 [merge_comm]: 0.0001302 [allreduce_fusion]: 7.499e-05 [matmul_add_comm_reduction]: 9.76099e-05 [allreduce_slice_to_reducescatter]: 4.89992e-07 [virtual_shard_identity]: 0.00015787 [virtual_dataset]: 0.00016866 [get_grad_eliminate_]: 0.00011508 [virtual_output]: 0.00011207 [merge_forward]: 8.04299e-05 [cell_reuse_recompute_pass]: 2.64996e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020464 [before_grad]: 0.00020245 [inplace_validation]: 0.00012787 [parallel_renormalize]: 0.0214238 [update_top_fg]: 7.89994e-07 [cast_eliminate]: 0.00014463 [meta_fg_expand]: 0.260849 [inplace_validation_after_expand]: 0.00159722 [flash_sp_send_recv_attached]: 0.00120356 [receive_attached]: 6.63201e-05 [after_resolve]: 0.00195996 [a_after_grad]: 0.00395382 [special_op_eliminate]: 0.00182902 [renormalize]: 0.146705 [add_forward_monad_depend]: 0.0003473 [auto_monad_grad]: 0.0002044 [auto_monad_eliminator]: 0.00177355 [cse]: 0.00417331 [a_3]: 0.0244782 [Cycle 2]: 0.0670522, [46] [expand_dump_flag]: 5.113e-05 [switch_simplify]: 0.00180065 [loop_unroll]: 0.00149928 [a_1]: 0.0305968 [recompute_prepare]: 0.00016906 [updatestate_depend_eliminate]: 0.00022161 [updatestate_assign_eliminate]: 0.00010157 [updatestate_loads_eliminate]: 0.00016101 [parameter_eliminate]: 3.99002e-06 [a_2]: 0.00428084 [accelerated_algorithm]: 0.00016017 [shard]: 2.29001e-06 [meta_shard_fg_expand]: 7.018e-05 [shard_inline]: 0.00013823 [auto_parallel]: 0.00011513 [parallel]: 1.341e-05 [flash_sp]: 0.00011845 [merge_comm]: 0.00012317 [allreduce_fusion]: 9.54199e-05 [matmul_add_comm_reduction]: 0.00011197 [allreduce_slice_to_reducescatter]: 4.69969e-07 [virtual_shard_identity]: 0.00014115 [virtual_dataset]: 0.00013643 [get_grad_eliminate_]: 0.00013186 [virtual_output]: 0.00013423 [merge_forward]: 8.90699e-05 [cell_reuse_recompute_pass]: 2.44996e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024835 [before_grad]: 0.00023511 [inplace_validation]: 8.45301e-05 [parallel_renormalize]: 6.99656e-08 [update_top_fg]: 5.70086e-07 [cast_eliminate]: 0.00015197 [meta_fg_expand]: 0.00025875 [inplace_validation_after_expand]: 0.00017818 [flash_sp_send_recv_attached]: 2.25997e-06 [receive_attached]: 1.25007e-06 [after_resolve]: 0.00015776 [a_after_grad]: 0.000224 [special_op_eliminate]: 0.00013374 [renormalize]: 0.0170024 [add_forward_monad_depend]: 5.13997e-06 [auto_monad_grad]: 1.76998e-06 [auto_monad_eliminator]: 0.00030429 [cse]: 0.00623882 [a_3]: 0.00094333 [Cycle 3]: 0.0116437, [46] [expand_dump_flag]: 1.86998e-06 [switch_simplify]: 0.00013131 [loop_unroll]: 0.00012774 [a_1]: 0.00422329 [recompute_prepare]: 0.00013204 [updatestate_depend_eliminate]: 0.00016633 [updatestate_assign_eliminate]: 9.269e-05 [updatestate_loads_eliminate]: 9.1e-05 [parameter_eliminate]: 3.12005e-06 [a_2]: 0.00204742 [accelerated_algorithm]: 0.00015495 [shard]: 1.23004e-06 [meta_shard_fg_expand]: 4.75501e-05 [shard_inline]: 0.00013109 [auto_parallel]: 0.0001086 [parallel]: 9.22999e-06 [flash_sp]: 2.11003e-06 [merge_comm]: 0.00010531 [allreduce_fusion]: 9.427e-05 [matmul_add_comm_reduction]: 0.00011579 [allreduce_slice_to_reducescatter]: 4.09898e-07 [virtual_shard_identity]: 0.00013656 [virtual_dataset]: 0.00013174 [get_grad_eliminate_]: 0.00012605 [virtual_output]: 0.00012835 [merge_forward]: 9.251e-05 [cell_reuse_recompute_pass]: 2.71003e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025699 [before_grad]: 0.00023246 [inplace_validation]: 9.28401e-05 [parallel_renormalize]: 1.10012e-07 [update_top_fg]: 4.89992e-07 [cast_eliminate]: 0.00014583 [meta_fg_expand]: 0.00011145 [inplace_validation_after_expand]: 0.00011478 [flash_sp_send_recv_attached]: 1.45007e-06 [receive_attached]: 1.20001e-06 [after_resolve]: 0.00014509 [a_after_grad]: 0.00021746 [special_op_eliminate]: 0.00012844 [renormalize]: 1.10012e-07 [add_forward_monad_depend]: 2.59001e-06 [auto_monad_grad]: 1.73994e-06 [auto_monad_eliminator]: 0.0001697 [cse]: 0.00039916 [a_3]: 0.00092896 [py_interpret_to_execute_after_opt_a]: 0.00012643 [slice_cell_reuse_recomputed_activation]: 2.74007e-06 [rewriter_after_opt_a]: 0.00109608 [convert_after_rewriter]: 0.00011615 [order_py_execute_after_rewriter]: 8.196e-05 [opt_b]: 0.00392933, [1] [Cycle 1]: 0.00392134, [7] [b_1]: 0.00301449 [b_2]: 0.00013616 [updatestate_depend_eliminate]: 9.591e-05 [updatestate_assign_eliminate]: 8.54499e-05 [updatestate_loads_eliminate]: 8.834e-05 [renormalize]: 3.69968e-07 [cse]: 0.00044643 [optimize_parallel_all_gather_comm]: 0.00013531 [overlap_param_gather]: 1.16997e-06 [cconv]: 6.38401e-05 [loop_unroll]: 0.0009442 [opt_after_cconv]: 0.00154322, [1] [Cycle 1]: 0.00153575, [7] [c_1]: 0.00077416 [parameter_eliminate]: 2.69001e-06 [updatestate_depend_eliminate]: 0.00012874 [updatestate_assign_eliminate]: 9.142e-05 [updatestate_loads_eliminate]: 9.12599e-05 [cse]: 0.00039081 [renormalize]: 5.89993e-07 [remove_dup_value]: 0.00057433 [tuple_transform]: 0.00097694, [1] [Cycle 1]: 0.00096978, [2] [d_1]: 0.0009502 [renormalize]: 5.10016e-07 [partial_unused_args_eliminate]: 3.25998e-06 [add_cache_embedding]: 0.00014689 [add_recomputation]: 0.00070098 [cse_after_recomputation]: 0.00030929, [1] [Cycle 1]: 0.00030161, [1] [cse]: 0.00028799 [environ_conv]: 9.263e-05 [swap_dp_allreduce_reducescatter]: 0.00012545 [bias_add_comm_swap]: 3.06999e-06 [label_micro_interleaved_index]: 1.80001e-06 [label_fine_grained_interleaved_index]: 0.00052497 [merge_cast_opt]: 1.55997e-06 [slice_recompute_activation]: 0.00014453 [micro_interleaved_order_control]: 1.56998e-06 [assign_add_opt]: 0.00038306 [ForceFp32Comm]: 1.51002e-06 [remove_cast_before_assign_add]: 0.00010518 [full_micro_interleaved_order_control]: 2.52004e-06 [reorder_send_recv_between_fp_bp]: 1.69e-06 [comm_op_add_attrs]: 0.00014437 [add_comm_op_reuse_tag]: 0.00014666 [interleave_split_concat_branches]: 9.80101e-07 [interleave_parallel_branches]: 1.17009e-06 [overlap_opt_shard_in_pipeline]: 3.369e-05 [overlap_opt_shard_grad_in_pipeline]: 3.64997e-06 [control_data_broadcast_order]: 1.13004e-06 [grouped_pairwise_exchange_alltoall]: 1.09e-05 [offloading_packed_experts]: 2.53995e-06 [overlap_recompute_and_grad_model_parallel]: 2.11003e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.69971e-07 [overlap_recompute_allgather_and_fa_grad]: 9.862e-05 [overlap_grad_ring_attention]: 0.00014405 [overlap_grad_flash_sp]: 0.00014461 [begin_end_overlap_inline]: 1.0801e-06 [split_matmul_comm_elemetwise]: 2.10002e-06 [split_layernorm_comm]: 2.02004e-06 [handle_group_info]: 5.53997e-06 [symbol_engine_optimizer]: 0.00079461, [1] [Cycle 1]: 0.00078776, [6] [build]: 5.02899e-05 [elim_shapecalc]: 0.0001443 [elim_not_effective]: 0.00021866 [opt_reshape]: 0.00012868 [fold_const_symbol]: 0.00020953 [renormalize]: 4.49945e-07 [pipeline_parallel_scheduler]: 3.49001e-06 [auto_monad_reorder]: 0.00034743 [get_jit_bprop_graph]: 5.50062e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00137225 [distribtued_split]: 0.00038982 [validate]: 0.00028675 [task_emit]: 12.9009 [execute]: 1.26901e-05 Sums bootstrap : 0.001618s : 0.01% type_inference : 0.758897s : 5.31% auto_monad : 0.002077s : 0.01% graph_reusing : 0.000027s : 0.00% inline.rewriter_before_opt_a : 0.001485s : 0.01% inline.a1a2.expand_dump_flag : 0.000034s : 0.00% inline.a1a2.switch_simplify : 0.001135s : 0.01% inline.a1a2.loop_unroll : 0.000759s : 0.01% inline.a1a2.a_1 : 0.025058s : 0.18% inline.a1a2.recompute_prepare : 0.000258s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000577s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000175s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000260s : 0.00% inline.a1a2.parameter_eliminate : 0.000009s : 0.00% inline.a1a2.a_2 : 0.004908s : 0.03% inline.a1a2.parallel_inline_pass : 0.000204s : 0.00% parallel-infer-symbol : 0.000200s : 0.00% pre_auto_parallel : 0.000090s : 0.00% insert-virtual-dataset : 0.001321s : 0.01% parallel-infer-symbol-second : 0.000002s : 0.00% dataset_repeat_opt : 0.000121s : 0.00% pipeline_split : 0.000102s : 0.00% optimize.py_interpret_to_execute : 0.000119s : 0.00% optimize.rewriter_before_opt_a : 0.000273s : 0.00% optimize.opt_a.expand_dump_flag : 0.000055s : 0.00% optimize.opt_a.switch_simplify : 0.002042s : 0.01% optimize.opt_a.loop_unroll : 0.001725s : 0.01% optimize.opt_a.a_1 : 0.038093s : 0.27% optimize.opt_a.recompute_prepare : 0.000405s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000492s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000256s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000320s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.007950s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000612s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000167s : 0.00% optimize.opt_a.shard_inline : 0.000377s : 0.00% optimize.opt_a.auto_parallel : 0.000305s : 0.00% optimize.opt_a.parallel : 0.015645s : 0.11% optimize.opt_a.flash_sp : 0.000195s : 0.00% optimize.opt_a.merge_comm : 0.000359s : 0.00% optimize.opt_a.allreduce_fusion : 0.000265s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000325s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000436s : 0.00% optimize.opt_a.virtual_dataset : 0.000437s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000373s : 0.00% optimize.opt_a.virtual_output : 0.000375s : 0.00% optimize.opt_a.merge_forward : 0.000262s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000710s : 0.00% optimize.opt_a.before_grad : 0.000670s : 0.00% optimize.opt_a.inplace_validation : 0.000305s : 0.00% optimize.opt_a.parallel_renormalize : 0.021424s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000442s : 0.00% optimize.opt_a.meta_fg_expand : 0.261219s : 1.83% optimize.opt_a.inplace_validation_after_expand : 0.001890s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001207s : 0.01% optimize.opt_a.receive_attached : 0.000069s : 0.00% optimize.opt_a.after_resolve : 0.002263s : 0.02% optimize.opt_a.a_after_grad : 0.004395s : 0.03% optimize.opt_a.special_op_eliminate : 0.002091s : 0.01% optimize.opt_a.renormalize : 0.163707s : 1.15% optimize.opt_a.add_forward_monad_depend : 0.000355s : 0.00% optimize.opt_a.auto_monad_grad : 0.000208s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002248s : 0.02% optimize.opt_a.cse : 0.010811s : 0.08% optimize.opt_a.a_3 : 0.026350s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000126s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.001096s : 0.01% optimize.convert_after_rewriter : 0.000116s : 0.00% optimize.order_py_execute_after_rewriter : 0.000082s : 0.00% optimize.opt_b.b_1 : 0.003014s : 0.02% optimize.opt_b.b_2 : 0.000136s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000096s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000085s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000088s : 0.00% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000446s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000135s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000064s : 0.00% optimize.loop_unroll : 0.000944s : 0.01% optimize.opt_after_cconv.c_1 : 0.000774s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000129s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.cse : 0.000391s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000574s : 0.00% optimize.tuple_transform.d_1 : 0.000950s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000147s : 0.00% optimize.add_recomputation : 0.000701s : 0.00% optimize.cse_after_recomputation.cse : 0.000288s : 0.00% optimize.environ_conv : 0.000093s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000125s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000525s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000145s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000383s : 0.00% optimize.ForceFp32Comm : 0.000002s : 0.00% optimize.remove_cast_before_assign_add : 0.000105s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000144s : 0.00% optimize.add_comm_op_reuse_tag : 0.000147s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000034s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000004s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000099s : 0.00% optimize.overlap_grad_ring_attention : 0.000144s : 0.00% optimize.overlap_grad_flash_sp : 0.000145s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000050s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000144s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000219s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000129s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000210s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000347s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001372s : 0.01% distribtued_split : 0.000390s : 0.00% validate : 0.000287s : 0.00% task_emit : 12.900926s : 90.29% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.047658 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000020s : 9: substitution.addn_check_dump 0.11% : 0.000050s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.66% : 0.000316s : 71: substitution.arithmetic_simplify 0.11% : 0.000052s : 10: substitution.cast_eliminate 0.11% : 0.000054s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000001s : 1: substitution.elim_shapecalc_of_broadcastargs 0.09% : 0.000045s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.06% : 0.000030s : 27: substitution.environ_get_eliminate 0.07% : 0.000034s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000019s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.03% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000030s : 107: substitution.fold_const_symbol 64.08% : 0.030538s : 257: substitution.getattr_setattr_resolve 0.18% : 0.000084s : 126: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 23.96% : 0.011418s : 331: substitution.inline 1.41% : 0.000670s : 112: substitution.inline_without_move 0.26% : 0.000123s : 309: substitution.j_node_and_user_rematch 0.37% : 0.000174s : 40: substitution.less_batch_normalization 0.10% : 0.000046s : 90: substitution.load_eliminater 0.11% : 0.000050s : 10: substitution.merge_addn 0.24% : 0.000114s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.08% : 0.000040s : 1: substitution.partial_defer_inline 0.12% : 0.000058s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000032s : 15: substitution.reduce_eliminate 0.33% : 0.000156s : 309: substitution.remove_not_recompute_node 2.05% : 0.000975s : 508: substitution.replace_applicator 0.22% : 0.000106s : 251: substitution.replace_old_param 0.08% : 0.000037s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.17% : 0.000080s : 34: substitution.switch_simplify 0.06% : 0.000029s : 11: substitution.tile_eliminate 0.54% : 0.000255s : 101: substitution.tuple_list_convert_item_index_to_positive 0.29% : 0.000136s : 107: substitution.tuple_list_get_item_const_eliminator 0.44% : 0.000210s : 107: substitution.tuple_list_get_item_depend_reorder 1.62% : 0.000771s : 308: substitution.tuple_list_get_item_eliminator 0.39% : 0.000184s : 107: substitution.tuple_list_get_set_item_eliminator 0.41% : 0.000193s : 210: substitution.updatestate_pure_node_eliminater 0.71% : 0.000338s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000011s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.758433 2 96.66% : 0.733098s : 1: type_inference.infer 3.34% : 0.025335s : 1: type_inference.specialize ------[replace.] 0.009693 775 0.42% : 0.000041s : 5: replace.ad_related_special_op_eliminate 0.08% : 0.000007s : 1: replace.arithmetic_simplify 0.52% : 0.000050s : 7: replace.depend_value_elim 0.42% : 0.000040s : 3: replace.environ_get_set_eliminate 30.99% : 0.003004s : 183: replace.getattr_setattr_resolve 29.57% : 0.002866s : 310: replace.inline 0.22% : 0.000021s : 1: replace.merge_addn 1.16% : 0.000112s : 7: replace.partial_eliminate 3.85% : 0.000373s : 25: replace.replace_applicator 3.71% : 0.000360s : 34: replace.switch_simplify 0.52% : 0.000051s : 6: replace.tuple_list_get_item_depend_reorder 28.17% : 0.002731s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.20% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.040327 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000013s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 70.45% : 0.028411s : 183: match.getattr_setattr_resolve 27.75% : 0.011190s : 310: match.inline 0.05% : 0.000022s : 1: match.merge_addn 0.09% : 0.000038s : 7: match.partial_eliminate 0.24% : 0.000098s : 25: match.replace_applicator 0.15% : 0.000061s : 34: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.02% : 0.000411s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020691131318 0.79% : 0.000163s : 1198: predicate.accumulaten_eliminater 0.27% : 0.000056s : 254: predicate.ad_related_special_op_eliminate 0.54% : 0.000113s : 835: predicate.addn_check_dump 0.76% : 0.000156s : 1198: predicate.addn_zero_filter 0.74% : 0.000154s : 1198: predicate.adjust_all_reduce_mul_add 1.84% : 0.000380s : 2034: predicate.arithmetic_simplify 1.12% : 0.000231s : 1586: predicate.cast_eliminate 3.25% : 0.000672s : 3484: predicate.check_bprop_eliminate 0.55% : 0.000115s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.18% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.16% : 0.000241s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000117s : 838: predicate.depend_value_elim 0.84% : 0.000173s : 1202: predicate.dict_get_item_const_eliminator 0.85% : 0.000176s : 1202: predicate.dict_get_item_eliminator 0.84% : 0.000175s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000021s : 126: predicate.elim_shapecalc_of_broadcastargs 0.85% : 0.000176s : 1334: predicate.environ_add_const_eliminate 0.85% : 0.000176s : 1337: predicate.environ_get_add_eliminate 0.85% : 0.000175s : 1334: predicate.environ_get_depend_swap 1.47% : 0.000305s : 2172: predicate.environ_get_eliminate 0.83% : 0.000173s : 1337: predicate.environ_get_set_eliminate 1.12% : 0.000232s : 1717: predicate.exchange_switch_depend_value 1.55% : 0.000320s : 1717: predicate.float_depend_g_call 0.56% : 0.000116s : 835: predicate.float_environ_get_switch 0.65% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000059s : 395: predicate.get_grad_eliminate 2.30% : 0.000476s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000115s : 835: predicate.incorporate_call 0.54% : 0.000112s : 835: predicate.incorporate_call_switch 4.17% : 0.000864s : 4602: predicate.inline 2.33% : 0.000483s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.32% : 0.000066s : 388: predicate.less_batch_normalization 1.10% : 0.000227s : 1660: predicate.list_to_tuple_eliminator_ 1.85% : 0.000383s : 2874: predicate.load_eliminater 0.19% : 0.000038s : 135: predicate.loop_unroll_after_grad 2.39% : 0.000495s : 2640: predicate.loop_unroll_before_grad 0.98% : 0.000202s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000116s : 837: predicate.merge_addn 3.14% : 0.000650s : 3380: predicate.micro_step_allgather_replace 3.20% : 0.000662s : 3380: predicate.mini_step_allgather_replace 0.80% : 0.000166s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000021s : 135: predicate.parallel_virtual_node 2.00% : 0.000413s : 1717: predicate.partial_defer_inline 1.10% : 0.000228s : 1541: predicate.partial_eliminate 0.79% : 0.000163s : 1198: predicate.print_const_string_wrapper 0.56% : 0.000116s : 824: predicate.reduce_all_const_elim 0.94% : 0.000195s : 1199: predicate.reduce_eliminate 0.14% : 0.000029s : 395: predicate.remove_not_recompute_node 1.93% : 0.000399s : 4829: predicate.replace_applicator 0.80% : 0.000165s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.79% : 0.000164s : 1199: predicate.reshape_eliminate 3.17% : 0.000657s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000022s : 135: predicate.row_tensor_eliminate 3.33% : 0.000690s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.29% : 0.000061s : 395: predicate.shard_identity_eliminate 2.13% : 0.000441s : 2338: predicate.special_op_eliminate 0.63% : 0.000131s : 837: predicate.specialize_transform 3.49% : 0.000722s : 3380: predicate.split_environ_get_set_with_tuple_value 1.57% : 0.000325s : 2203: predicate.stack_unstack_eliminate 1.85% : 0.000384s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.29% : 0.000267s : 1717: predicate.switch_defer_inline 4.45% : 0.000920s : 5201: predicate.switch_layer_defer_inline 4.36% : 0.000903s : 5262: predicate.switch_simplify 0.77% : 0.000160s : 1199: predicate.tile_eliminate 0.77% : 0.000159s : 1199: predicate.transpose_eliminate 1.06% : 0.000220s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.06% : 0.000218s : 1469: predicate.tuple_list_get_item_const_eliminator 0.93% : 0.000192s : 1469: predicate.tuple_list_get_item_depend_reorder 1.90% : 0.000393s : 2495: predicate.tuple_list_get_item_eliminator 1.03% : 0.000214s : 1469: predicate.tuple_list_get_set_item_eliminator 1.63% : 0.000338s : 2304: predicate.tuple_list_set_item_eliminator 1.07% : 0.000221s : 1660: predicate.tuple_to_list_eliminator_ 1.86% : 0.000384s : 2874: predicate.updatestate_pure_node_eliminater 2.50% : 0.000518s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.29% : 0.000060s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000057s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.058917 747 69.62% : 0.041017s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.40% : 0.001416s : 22: func_graph_cloner_run.FuncGraphClonerNode 27.98% : 0.016484s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.280170 346 0.00% : 0.000006s : 1: ForceFp32Comm 0.27% : 0.041438s : 1: a1a2 0.00% : 0.000153s : 1: add_cache_embedding 0.00% : 0.000154s : 1: add_comm_op_reuse_tag 0.00% : 0.000713s : 1: add_recomputation 0.00% : 0.000393s : 1: assign_add_opt 0.01% : 0.002101s : 1: auto_monad 0.00% : 0.000362s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.01% : 0.001675s : 1: bootstrap 0.00% : 0.000070s : 1: cconv 0.00% : 0.000151s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000124s : 1: convert_after_rewriter 0.00% : 0.000314s : 1: cse_after_recomputation 0.00% : 0.000130s : 1: dataset_repeat_opt 0.00% : 0.000406s : 1: distribtued_split 0.01% : 0.001388s : 1: eliminate_special_op_node 0.00% : 0.000102s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000035s : 1: graph_reusing 0.00% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000008s : 1: handle_group_info 0.28% : 0.042998s : 1: inline 0.01% : 0.001344s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000533s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000956s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032250s : 61: opt.transform.a1a2 0.00% : 0.000171s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.089511s : 148: opt.transform.opt_a 0.01% : 0.000772s : 1: opt.transform.opt_after_cconv 0.02% : 0.003123s : 27: opt.transform.opt_b 0.24% : 0.036569s : 16: opt.transform.opt_resolve 0.01% : 0.000947s : 1: opt.transform.opt_trans_graph 0.01% : 0.000819s : 6: opt.transform.special_op_eliminate 0.00% : 0.000695s : 4: opt.transform.symbol_engine_opt 3.75% : 0.573120s : 1: opt_a 0.01% : 0.001549s : 1: opt_after_cconv 0.03% : 0.003934s : 1: opt_b 3.85% : 0.587624s : 1: optimize 0.00% : 0.000143s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000088s : 1: order_py_execute_after_rewriter 0.00% : 0.000150s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000149s : 1: overlap_grad_ring_attention 0.00% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000039s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000104s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000212s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000112s : 1: pipeline_split 0.00% : 0.000100s : 1: pre_auto_parallel 0.00% : 0.000127s : 1: py_interpret_to_execute 0.00% : 0.000134s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000111s : 1: remove_cast_before_assign_add 0.00% : 0.000589s : 1: remove_dup_value 0.87% : 0.133092s : 3: renormalize.infer 0.34% : 0.052001s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001109s : 1: rewriter_after_opt_a 0.01% : 0.001782s : 2: rewriter_before_opt_a 0.00% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000151s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000133s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000798s : 1: symbol_engine_optimizer 84.43% : 12.900977s : 1: task_emit 0.01% : 0.000982s : 1: tuple_transform 4.97% : 0.758939s : 1: type_inference 0.01% : 0.001350s : 1: validate TotalTime = 14.4672, [21] [bootstrap]: 0.00131044 [type_inference]: 0.776087 [auto_monad]: 0.00185633 [graph_reusing]: 2.80701e-05 [inline]: 0.043296, [2] [rewriter_before_opt_a]: 0.0015023 [a1a2]: 0.0417584, [2] [Cycle 1]: 0.028615, [11] [expand_dump_flag]: 3.38299e-05 [switch_simplify]: 0.00113999 [loop_unroll]: 0.0006814 [a_1]: 0.0220951 [recompute_prepare]: 0.00017137 [updatestate_depend_eliminate]: 0.00044734 [updatestate_assign_eliminate]: 9.01601e-05 [updatestate_loads_eliminate]: 0.00020238 [parameter_eliminate]: 5.20993e-06 [a_2]: 0.0034692 [parallel_inline_pass]: 0.0001025 [Cycle 2]: 0.00552172, [11] [expand_dump_flag]: 1.65997e-06 [switch_simplify]: 9.298e-05 [loop_unroll]: 9.27501e-05 [a_1]: 0.00312162 [recompute_prepare]: 9.80799e-05 [updatestate_depend_eliminate]: 0.00030241 [updatestate_assign_eliminate]: 6.707e-05 [updatestate_loads_eliminate]: 6.429e-05 [parameter_eliminate]: 3.43996e-06 [a_2]: 0.00149823 [parallel_inline_pass]: 0.00010056 [parallel-infer-symbol]: 0.00017561 [pre_auto_parallel]: 8.417e-05 [insert-virtual-dataset]: 0.00122702 [parallel-infer-symbol-second]: 2.65997e-06 [dataset_repeat_opt]: 0.00010494 [pipeline_split]: 9.995e-05 [optimize]: 0.597429, [52] [py_interpret_to_execute]: 0.00014494 [rewriter_before_opt_a]: 0.00031777 [opt_a]: 0.582793, [3] [Cycle 1]: 0.500889, [46] [expand_dump_flag]: 2.14996e-06 [switch_simplify]: 0.00011196 [loop_unroll]: 9.685e-05 [a_1]: 0.00328721 [recompute_prepare]: 0.00010206 [updatestate_depend_eliminate]: 0.00011011 [updatestate_assign_eliminate]: 0.00012752 [updatestate_loads_eliminate]: 6.988e-05 [parameter_eliminate]: 3.72995e-06 [a_2]: 0.00163416 [accelerated_algorithm]: 0.00030179 [shard]: 1.73005e-06 [meta_shard_fg_expand]: 5.526e-05 [shard_inline]: 0.00011556 [auto_parallel]: 8.482e-05 [parallel]: 0.0150502 [flash_sp]: 7.22001e-05 [merge_comm]: 0.00013159 [allreduce_fusion]: 7.573e-05 [matmul_add_comm_reduction]: 9.836e-05 [allreduce_slice_to_reducescatter]: 4.80097e-07 [virtual_shard_identity]: 0.00012333 [virtual_dataset]: 0.00016679 [get_grad_eliminate_]: 0.00011477 [virtual_output]: 0.00011684 [merge_forward]: 8.01301e-05 [cell_reuse_recompute_pass]: 2.45008e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00022258 [before_grad]: 0.00019994 [inplace_validation]: 0.00012688 [parallel_renormalize]: 0.0216004 [update_top_fg]: 9.00007e-07 [cast_eliminate]: 0.00014842 [meta_fg_expand]: 0.265715 [inplace_validation_after_expand]: 0.00173081 [flash_sp_send_recv_attached]: 0.00128217 [receive_attached]: 8.12201e-05 [after_resolve]: 0.00200064 [a_after_grad]: 0.00407216 [special_op_eliminate]: 0.00201496 [renormalize]: 0.14769 [add_forward_monad_depend]: 0.00036616 [auto_monad_grad]: 0.00021923 [auto_monad_eliminator]: 0.0018678 [cse]: 0.00422199 [a_3]: 0.0246396 [Cycle 2]: 0.0701213, [46] [expand_dump_flag]: 5.254e-05 [switch_simplify]: 0.00181352 [loop_unroll]: 0.00155628 [a_1]: 0.0317769 [recompute_prepare]: 0.00018041 [updatestate_depend_eliminate]: 0.00023539 [updatestate_assign_eliminate]: 0.00010398 [updatestate_loads_eliminate]: 0.00016497 [parameter_eliminate]: 3.33996e-06 [a_2]: 0.00429884 [accelerated_algorithm]: 0.00015993 [shard]: 1.56998e-06 [meta_shard_fg_expand]: 8.65599e-05 [shard_inline]: 0.00013922 [auto_parallel]: 0.00011112 [parallel]: 8.09005e-06 [flash_sp]: 0.00012285 [merge_comm]: 0.00010986 [allreduce_fusion]: 9.24e-05 [matmul_add_comm_reduction]: 0.0001141 [allreduce_slice_to_reducescatter]: 4.10015e-07 [virtual_shard_identity]: 0.00014277 [virtual_dataset]: 0.00013582 [get_grad_eliminate_]: 0.00013066 [virtual_output]: 0.00013474 [merge_forward]: 9.055e-05 [cell_reuse_recompute_pass]: 2.35997e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024929 [before_grad]: 0.00023793 [inplace_validation]: 8.447e-05 [parallel_renormalize]: 7.99773e-08 [update_top_fg]: 4.80097e-07 [cast_eliminate]: 0.00015185 [meta_fg_expand]: 0.00035298 [inplace_validation_after_expand]: 0.00018147 [flash_sp_send_recv_attached]: 1.56998e-06 [receive_attached]: 1.12003e-06 [after_resolve]: 0.00016154 [a_after_grad]: 0.00022632 [special_op_eliminate]: 0.00013384 [renormalize]: 0.0180221 [add_forward_monad_depend]: 4.60004e-06 [auto_monad_grad]: 2.10002e-06 [auto_monad_eliminator]: 0.00028571 [cse]: 0.00690835 [a_3]: 0.0009493 [Cycle 3]: 0.0117608, [46] [expand_dump_flag]: 3.04997e-06 [switch_simplify]: 0.00013119 [loop_unroll]: 0.00012684 [a_1]: 0.00426365 [recompute_prepare]: 0.00013508 [updatestate_depend_eliminate]: 0.00018278 [updatestate_assign_eliminate]: 9.358e-05 [updatestate_loads_eliminate]: 8.919e-05 [parameter_eliminate]: 3.16999e-06 [a_2]: 0.00204725 [accelerated_algorithm]: 0.00015477 [shard]: 1.79e-06 [meta_shard_fg_expand]: 5.343e-05 [shard_inline]: 0.00013178 [auto_parallel]: 0.00011185 [parallel]: 1.007e-05 [flash_sp]: 2.64996e-06 [merge_comm]: 0.00010745 [allreduce_fusion]: 9.57099e-05 [matmul_add_comm_reduction]: 0.00011898 [allreduce_slice_to_reducescatter]: 5.60074e-07 [virtual_shard_identity]: 0.00013668 [virtual_dataset]: 0.00013084 [get_grad_eliminate_]: 0.00012719 [virtual_output]: 0.00012894 [merge_forward]: 9.37601e-05 [cell_reuse_recompute_pass]: 2.80002e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.0002615 [before_grad]: 0.00023348 [inplace_validation]: 9.509e-05 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 6.29923e-07 [cast_eliminate]: 0.00014701 [meta_fg_expand]: 0.00011588 [inplace_validation_after_expand]: 0.00012106 [flash_sp_send_recv_attached]: 2.23995e-06 [receive_attached]: 1.34995e-06 [after_resolve]: 0.0001459 [a_after_grad]: 0.0002171 [special_op_eliminate]: 0.00012738 [renormalize]: 7.0082e-08 [add_forward_monad_depend]: 2.98e-06 [auto_monad_grad]: 1.91992e-06 [auto_monad_eliminator]: 0.00017791 [cse]: 0.00041187 [a_3]: 0.00092316 [py_interpret_to_execute_after_opt_a]: 0.00014117 [slice_cell_reuse_recomputed_activation]: 2.30002e-06 [rewriter_after_opt_a]: 0.00100188 [convert_after_rewriter]: 0.00011901 [order_py_execute_after_rewriter]: 8.31899e-05 [opt_b]: 0.00396559, [1] [Cycle 1]: 0.0039541, [7] [b_1]: 0.00301178 [b_2]: 0.00013512 [updatestate_depend_eliminate]: 0.0001008 [updatestate_assign_eliminate]: 8.78599e-05 [updatestate_loads_eliminate]: 9.10199e-05 [renormalize]: 5.10016e-07 [cse]: 0.00047091 [optimize_parallel_all_gather_comm]: 0.00014064 [overlap_param_gather]: 3.95998e-06 [cconv]: 6.763e-05 [loop_unroll]: 0.00097983 [opt_after_cconv]: 0.00156314, [1] [Cycle 1]: 0.0015557, [7] [c_1]: 0.00077482 [parameter_eliminate]: 3.23006e-06 [updatestate_depend_eliminate]: 0.00013631 [updatestate_assign_eliminate]: 9.381e-05 [updatestate_loads_eliminate]: 9.15701e-05 [cse]: 0.00039918 [renormalize]: 6.19912e-07 [remove_dup_value]: 0.000603 [tuple_transform]: 0.00101642, [1] [Cycle 1]: 0.00100924, [2] [d_1]: 0.00099052 [renormalize]: 4.60073e-07 [partial_unused_args_eliminate]: 3.01993e-06 [add_cache_embedding]: 0.00015535 [add_recomputation]: 0.00071106 [cse_after_recomputation]: 0.00032138, [1] [Cycle 1]: 0.0003132, [1] [cse]: 0.00029956 [environ_conv]: 9.01e-05 [swap_dp_allreduce_reducescatter]: 0.00013113 [bias_add_comm_swap]: 2.70002e-06 [label_micro_interleaved_index]: 1.70001e-06 [label_fine_grained_interleaved_index]: 0.00051458 [merge_cast_opt]: 1.33994e-06 [slice_recompute_activation]: 0.00014853 [micro_interleaved_order_control]: 1.87999e-06 [assign_add_opt]: 0.00038273 [ForceFp32Comm]: 1.25007e-06 [remove_cast_before_assign_add]: 0.00010576 [full_micro_interleaved_order_control]: 1.76998e-06 [reorder_send_recv_between_fp_bp]: 1.09e-06 [comm_op_add_attrs]: 0.0001464 [add_comm_op_reuse_tag]: 0.00014448 [interleave_split_concat_branches]: 7.3004e-07 [interleave_parallel_branches]: 5.19911e-07 [overlap_opt_shard_in_pipeline]: 1.08599e-05 [overlap_opt_shard_grad_in_pipeline]: 2.49001e-06 [control_data_broadcast_order]: 9.50065e-07 [grouped_pairwise_exchange_alltoall]: 1.001e-05 [offloading_packed_experts]: 1.62004e-06 [overlap_recompute_and_grad_model_parallel]: 1.73005e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.40051e-07 [overlap_recompute_allgather_and_fa_grad]: 7.47e-05 [overlap_grad_ring_attention]: 0.00017855 [overlap_grad_flash_sp]: 0.0001208 [begin_end_overlap_inline]: 6.29923e-07 [split_matmul_comm_elemetwise]: 1.54995e-06 [split_layernorm_comm]: 1.15996e-06 [handle_group_info]: 7.18003e-06 [symbol_engine_optimizer]: 0.00080434, [1] [Cycle 1]: 0.00079715, [6] [build]: 5.35999e-05 [elim_shapecalc]: 0.00014583 [elim_not_effective]: 0.00022213 [opt_reshape]: 0.00012814 [fold_const_symbol]: 0.00020947 [renormalize]: 4.70085e-07 [pipeline_parallel_scheduler]: 2.88e-06 [auto_monad_reorder]: 0.00029962 [get_jit_bprop_graph]: 4.80097e-07 [rewriter_after_jit_bprop_graph]: 3.89991e-07 [eliminate_special_op_node]: 0.0014095 [distribtued_split]: 0.0003522 [validate]: 0.0002813 [task_emit]: 13.0416 [execute]: 1.143e-05 Sums bootstrap : 0.001310s : 0.01% type_inference : 0.776087s : 5.37% auto_monad : 0.001856s : 0.01% graph_reusing : 0.000028s : 0.00% inline.rewriter_before_opt_a : 0.001502s : 0.01% inline.a1a2.expand_dump_flag : 0.000035s : 0.00% inline.a1a2.switch_simplify : 0.001233s : 0.01% inline.a1a2.loop_unroll : 0.000774s : 0.01% inline.a1a2.a_1 : 0.025217s : 0.17% inline.a1a2.recompute_prepare : 0.000269s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000750s : 0.01% inline.a1a2.updatestate_assign_eliminate : 0.000157s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000267s : 0.00% inline.a1a2.parameter_eliminate : 0.000009s : 0.00% inline.a1a2.a_2 : 0.004967s : 0.03% inline.a1a2.parallel_inline_pass : 0.000203s : 0.00% parallel-infer-symbol : 0.000176s : 0.00% pre_auto_parallel : 0.000084s : 0.00% insert-virtual-dataset : 0.001227s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000105s : 0.00% pipeline_split : 0.000100s : 0.00% optimize.py_interpret_to_execute : 0.000145s : 0.00% optimize.rewriter_before_opt_a : 0.000318s : 0.00% optimize.opt_a.expand_dump_flag : 0.000058s : 0.00% optimize.opt_a.switch_simplify : 0.002057s : 0.01% optimize.opt_a.loop_unroll : 0.001780s : 0.01% optimize.opt_a.a_1 : 0.039328s : 0.27% optimize.opt_a.recompute_prepare : 0.000418s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000528s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000325s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000324s : 0.00% optimize.opt_a.parameter_eliminate : 0.000010s : 0.00% optimize.opt_a.a_2 : 0.007980s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000616s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000195s : 0.00% optimize.opt_a.shard_inline : 0.000387s : 0.00% optimize.opt_a.auto_parallel : 0.000308s : 0.00% optimize.opt_a.parallel : 0.015068s : 0.10% optimize.opt_a.flash_sp : 0.000198s : 0.00% optimize.opt_a.merge_comm : 0.000349s : 0.00% optimize.opt_a.allreduce_fusion : 0.000264s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000331s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000403s : 0.00% optimize.opt_a.virtual_dataset : 0.000433s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000373s : 0.00% optimize.opt_a.virtual_output : 0.000381s : 0.00% optimize.opt_a.merge_forward : 0.000264s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000008s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000733s : 0.01% optimize.opt_a.before_grad : 0.000671s : 0.00% optimize.opt_a.inplace_validation : 0.000306s : 0.00% optimize.opt_a.parallel_renormalize : 0.021601s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000447s : 0.00% optimize.opt_a.meta_fg_expand : 0.266184s : 1.84% optimize.opt_a.inplace_validation_after_expand : 0.002033s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001286s : 0.01% optimize.opt_a.receive_attached : 0.000084s : 0.00% optimize.opt_a.after_resolve : 0.002308s : 0.02% optimize.opt_a.a_after_grad : 0.004516s : 0.03% optimize.opt_a.special_op_eliminate : 0.002276s : 0.02% optimize.opt_a.renormalize : 0.165712s : 1.15% optimize.opt_a.add_forward_monad_depend : 0.000374s : 0.00% optimize.opt_a.auto_monad_grad : 0.000223s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002331s : 0.02% optimize.opt_a.cse : 0.011542s : 0.08% optimize.opt_a.a_3 : 0.026512s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000141s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.001002s : 0.01% optimize.convert_after_rewriter : 0.000119s : 0.00% optimize.order_py_execute_after_rewriter : 0.000083s : 0.00% optimize.opt_b.b_1 : 0.003012s : 0.02% optimize.opt_b.b_2 : 0.000135s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000101s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000088s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000471s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000141s : 0.00% optimize.overlap_param_gather : 0.000004s : 0.00% optimize.cconv : 0.000068s : 0.00% optimize.loop_unroll : 0.000980s : 0.01% optimize.opt_after_cconv.c_1 : 0.000775s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000136s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000092s : 0.00% optimize.opt_after_cconv.cse : 0.000399s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000603s : 0.00% optimize.tuple_transform.d_1 : 0.000991s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000155s : 0.00% optimize.add_recomputation : 0.000711s : 0.00% optimize.cse_after_recomputation.cse : 0.000300s : 0.00% optimize.environ_conv : 0.000090s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000131s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000515s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000149s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000383s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000106s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000146s : 0.00% optimize.add_comm_op_reuse_tag : 0.000144s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000011s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000075s : 0.00% optimize.overlap_grad_ring_attention : 0.000179s : 0.00% optimize.overlap_grad_flash_sp : 0.000121s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000007s : 0.00% optimize.symbol_engine_optimizer.build : 0.000054s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000146s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000222s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000128s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000209s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000300s : 0.00% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001410s : 0.01% distribtued_split : 0.000352s : 0.00% validate : 0.000281s : 0.00% task_emit : 13.041638s : 90.22% execute : 0.000011s : 0.00% Time group info: ------[substitution.] 0.049772 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000020s : 9: substitution.addn_check_dump 0.11% : 0.000054s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.65% : 0.000322s : 71: substitution.arithmetic_simplify 0.11% : 0.000055s : 10: substitution.cast_eliminate 0.11% : 0.000055s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000024s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000017s : 12: substitution.environ_get_depend_swap 0.06% : 0.000029s : 27: substitution.environ_get_eliminate 0.07% : 0.000033s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000019s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.02% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 63.75% : 0.031729s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000084s : 126: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 24.42% : 0.012153s : 331: substitution.inline 1.54% : 0.000764s : 112: substitution.inline_without_move 0.25% : 0.000124s : 309: substitution.j_node_and_user_rematch 0.35% : 0.000175s : 40: substitution.less_batch_normalization 0.09% : 0.000046s : 90: substitution.load_eliminater 0.10% : 0.000050s : 10: substitution.merge_addn 0.23% : 0.000112s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.05% : 0.000024s : 1: substitution.partial_defer_inline 0.13% : 0.000065s : 23: substitution.partial_eliminate 0.03% : 0.000016s : 26: substitution.reduce_all_const_elim 0.07% : 0.000033s : 15: substitution.reduce_eliminate 0.32% : 0.000159s : 309: substitution.remove_not_recompute_node 2.00% : 0.000993s : 508: substitution.replace_applicator 0.22% : 0.000108s : 251: substitution.replace_old_param 0.07% : 0.000037s : 11: substitution.reshape_eliminate 0.03% : 0.000012s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000016s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000077s : 34: substitution.switch_simplify 0.06% : 0.000029s : 11: substitution.tile_eliminate 0.51% : 0.000253s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000134s : 107: substitution.tuple_list_get_item_const_eliminator 0.42% : 0.000209s : 107: substitution.tuple_list_get_item_depend_reorder 1.73% : 0.000863s : 308: substitution.tuple_list_get_item_eliminator 0.36% : 0.000181s : 107: substitution.tuple_list_get_set_item_eliminator 0.39% : 0.000193s : 210: substitution.updatestate_pure_node_eliminater 0.69% : 0.000343s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.775643 2 96.70% : 0.750065s : 1: type_inference.infer 3.30% : 0.025578s : 1: type_inference.specialize ------[replace.] 0.010368 775 0.41% : 0.000043s : 5: replace.ad_related_special_op_eliminate 0.06% : 0.000007s : 1: replace.arithmetic_simplify 0.52% : 0.000054s : 7: replace.depend_value_elim 0.42% : 0.000044s : 3: replace.environ_get_set_eliminate 29.96% : 0.003106s : 183: replace.getattr_setattr_resolve 29.27% : 0.003035s : 310: replace.inline 0.22% : 0.000022s : 1: replace.merge_addn 1.14% : 0.000118s : 7: replace.partial_eliminate 3.72% : 0.000386s : 25: replace.replace_applicator 3.66% : 0.000379s : 34: replace.switch_simplify 0.50% : 0.000052s : 6: replace.tuple_list_get_item_depend_reorder 29.78% : 0.003088s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.18% : 0.000018s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042084 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.02% : 0.000010s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000017s : 3: match.environ_get_set_eliminate 69.78% : 0.029368s : 183: match.getattr_setattr_resolve 28.31% : 0.011916s : 310: match.inline 0.05% : 0.000023s : 1: match.merge_addn 0.09% : 0.000037s : 7: match.partial_eliminate 0.23% : 0.000098s : 25: match.replace_applicator 0.14% : 0.000057s : 34: match.switch_simplify 0.08% : 0.000032s : 6: match.tuple_list_get_item_depend_reorder 1.16% : 0.000490s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020853131318 0.75% : 0.000157s : 1198: predicate.accumulaten_eliminater 0.28% : 0.000059s : 254: predicate.ad_related_special_op_eliminate 0.54% : 0.000114s : 835: predicate.addn_check_dump 0.76% : 0.000158s : 1198: predicate.addn_zero_filter 0.74% : 0.000154s : 1198: predicate.adjust_all_reduce_mul_add 1.79% : 0.000374s : 2034: predicate.arithmetic_simplify 1.15% : 0.000241s : 1586: predicate.cast_eliminate 3.25% : 0.000678s : 3484: predicate.check_bprop_eliminate 0.56% : 0.000116s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.13% : 0.000236s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000118s : 838: predicate.depend_value_elim 0.84% : 0.000176s : 1202: predicate.dict_get_item_const_eliminator 0.83% : 0.000174s : 1202: predicate.dict_get_item_eliminator 0.88% : 0.000184s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000010s : 126: predicate.elim_not_effective 0.11% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.85% : 0.000178s : 1334: predicate.environ_add_const_eliminate 0.86% : 0.000179s : 1337: predicate.environ_get_add_eliminate 0.84% : 0.000176s : 1334: predicate.environ_get_depend_swap 1.42% : 0.000297s : 2172: predicate.environ_get_eliminate 0.85% : 0.000177s : 1337: predicate.environ_get_set_eliminate 1.14% : 0.000238s : 1717: predicate.exchange_switch_depend_value 1.39% : 0.000289s : 1717: predicate.float_depend_g_call 0.55% : 0.000115s : 835: predicate.float_environ_get_switch 0.64% : 0.000134s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000059s : 395: predicate.get_grad_eliminate 2.34% : 0.000489s : 1893: predicate.getattr_setattr_resolve 0.06% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000115s : 835: predicate.incorporate_call 0.53% : 0.000111s : 835: predicate.incorporate_call_switch 3.83% : 0.000798s : 4602: predicate.inline 2.60% : 0.000543s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.32% : 0.000067s : 388: predicate.less_batch_normalization 1.10% : 0.000229s : 1660: predicate.list_to_tuple_eliminator_ 1.94% : 0.000405s : 2874: predicate.load_eliminater 0.20% : 0.000042s : 135: predicate.loop_unroll_after_grad 2.40% : 0.000501s : 2640: predicate.loop_unroll_before_grad 0.99% : 0.000206s : 1478: predicate.make_slice_get_slice_eliminator 0.56% : 0.000117s : 837: predicate.merge_addn 3.07% : 0.000641s : 3380: predicate.micro_step_allgather_replace 3.10% : 0.000646s : 3380: predicate.mini_step_allgather_replace 0.78% : 0.000163s : 1199: predicate.minmaximum_grad 0.18% : 0.000037s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.10% : 0.000020s : 135: predicate.parallel_virtual_node 2.09% : 0.000437s : 1717: predicate.partial_defer_inline 1.07% : 0.000224s : 1541: predicate.partial_eliminate 0.75% : 0.000156s : 1198: predicate.print_const_string_wrapper 0.55% : 0.000115s : 824: predicate.reduce_all_const_elim 0.97% : 0.000202s : 1199: predicate.reduce_eliminate 0.14% : 0.000030s : 395: predicate.remove_not_recompute_node 1.92% : 0.000400s : 4829: predicate.replace_applicator 0.78% : 0.000163s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.79% : 0.000164s : 1199: predicate.reshape_eliminate 3.40% : 0.000710s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000022s : 135: predicate.row_tensor_eliminate 3.30% : 0.000688s : 3484: predicate.same_eliminate 0.23% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.28% : 0.000059s : 395: predicate.shard_identity_eliminate 2.42% : 0.000504s : 2338: predicate.special_op_eliminate 0.64% : 0.000133s : 837: predicate.specialize_transform 3.61% : 0.000754s : 3380: predicate.split_environ_get_set_with_tuple_value 1.56% : 0.000325s : 2203: predicate.stack_unstack_eliminate 1.81% : 0.000378s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.27% : 0.000266s : 1717: predicate.switch_defer_inline 4.42% : 0.000921s : 5201: predicate.switch_layer_defer_inline 4.31% : 0.000898s : 5262: predicate.switch_simplify 0.77% : 0.000160s : 1199: predicate.tile_eliminate 0.76% : 0.000158s : 1199: predicate.transpose_eliminate 1.05% : 0.000219s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.04% : 0.000217s : 1469: predicate.tuple_list_get_item_const_eliminator 0.92% : 0.000192s : 1469: predicate.tuple_list_get_item_depend_reorder 1.88% : 0.000392s : 2495: predicate.tuple_list_get_item_eliminator 0.99% : 0.000206s : 1469: predicate.tuple_list_get_set_item_eliminator 1.61% : 0.000335s : 2304: predicate.tuple_list_set_item_eliminator 1.11% : 0.000231s : 1660: predicate.tuple_to_list_eliminator_ 1.84% : 0.000383s : 2874: predicate.updatestate_pure_node_eliminater 2.46% : 0.000514s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.28% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.27% : 0.000057s : 395: predicate.virtual_output_eliminate 0.11% : 0.000022s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.060196 747 69.20% : 0.041656s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.53% : 0.001525s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.27% : 0.017015s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.463355 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041763s : 1: a1a2 0.00% : 0.000163s : 1: add_cache_embedding 0.00% : 0.000152s : 1: add_comm_op_reuse_tag 0.00% : 0.000724s : 1: add_recomputation 0.00% : 0.000393s : 1: assign_add_opt 0.01% : 0.001876s : 1: auto_monad 0.00% : 0.000313s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001356s : 1: bootstrap 0.00% : 0.000074s : 1: cconv 0.00% : 0.000154s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000127s : 1: convert_after_rewriter 0.00% : 0.000326s : 1: cse_after_recomputation 0.00% : 0.000115s : 1: dataset_repeat_opt 0.00% : 0.000367s : 1: distribtued_split 0.01% : 0.001426s : 1: eliminate_special_op_node 0.00% : 0.000099s : 1: environ_conv 0.00% : 0.000021s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000036s : 1: graph_reusing 0.00% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000010s : 1: handle_group_info 0.28% : 0.043307s : 1: inline 0.01% : 0.001251s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000524s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000991s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.032592s : 61: opt.transform.a1a2 0.00% : 0.000178s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.091380s : 148: opt.transform.opt_a 0.00% : 0.000772s : 1: opt.transform.opt_after_cconv 0.02% : 0.003117s : 27: opt.transform.opt_b 0.24% : 0.037804s : 16: opt.transform.opt_resolve 0.01% : 0.000987s : 1: opt.transform.opt_trans_graph 0.01% : 0.000841s : 6: opt.transform.special_op_eliminate 0.00% : 0.000700s : 4: opt.transform.symbol_engine_opt 3.77% : 0.582799s : 1: opt_a 0.01% : 0.001569s : 1: opt_after_cconv 0.03% : 0.003970s : 1: opt_b 3.86% : 0.597442s : 1: optimize 0.00% : 0.000149s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000089s : 1: order_py_execute_after_rewriter 0.00% : 0.000125s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000185s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000015s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000008s : 1: overlap_param_gather 0.00% : 0.000080s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000188s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000111s : 1: pipeline_split 0.00% : 0.000093s : 1: pre_auto_parallel 0.00% : 0.000153s : 1: py_interpret_to_execute 0.00% : 0.000149s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000112s : 1: remove_cast_before_assign_add 0.00% : 0.000617s : 1: remove_dup_value 0.86% : 0.133448s : 3: renormalize.infer 0.35% : 0.053825s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001014s : 1: rewriter_after_opt_a 0.01% : 0.001842s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000156s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000138s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000808s : 1: symbol_engine_optimizer 84.34% : 13.041682s : 1: task_emit 0.01% : 0.001022s : 1: tuple_transform 5.02% : 0.776108s : 1: type_inference 0.01% : 0.001385s : 1: validate TotalTime = 14.4486, [21] [bootstrap]: 0.00146717 [type_inference]: 0.752272 [auto_monad]: 0.00192215 [graph_reusing]: 2.437e-05 [inline]: 0.0441014, [2] [rewriter_before_opt_a]: 0.00148607 [a1a2]: 0.0425733, [2] [Cycle 1]: 0.0289445, [11] [expand_dump_flag]: 3.543e-05 [switch_simplify]: 0.00111237 [loop_unroll]: 0.00073818 [a_1]: 0.0224464 [recompute_prepare]: 0.00016469 [updatestate_depend_eliminate]: 0.00037488 [updatestate_assign_eliminate]: 9.355e-05 [updatestate_loads_eliminate]: 0.00025333 [parameter_eliminate]: 6.56e-06 [a_2]: 0.00342531 [parallel_inline_pass]: 0.0001041 [Cycle 2]: 0.00560544, [11] [expand_dump_flag]: 2.09e-06 [switch_simplify]: 9.36501e-05 [loop_unroll]: 9.4e-05 [a_1]: 0.00322083 [recompute_prepare]: 9.995e-05 [updatestate_depend_eliminate]: 0.00020786 [updatestate_assign_eliminate]: 6.399e-05 [updatestate_loads_eliminate]: 6.301e-05 [parameter_eliminate]: 3.66999e-06 [a_2]: 0.00157754 [parallel_inline_pass]: 0.00010309 [parallel-infer-symbol]: 0.00019529 [pre_auto_parallel]: 0.00010695 [insert-virtual-dataset]: 0.00124602 [parallel-infer-symbol-second]: 2.71003e-06 [dataset_repeat_opt]: 0.0001378 [pipeline_split]: 0.0001014 [optimize]: 0.599665, [52] [py_interpret_to_execute]: 0.00012653 [rewriter_before_opt_a]: 0.00028034 [opt_a]: 0.585024, [3] [Cycle 1]: 0.502206, [46] [expand_dump_flag]: 2.05997e-06 [switch_simplify]: 0.00011003 [loop_unroll]: 9.69301e-05 [a_1]: 0.003304 [recompute_prepare]: 0.00010352 [updatestate_depend_eliminate]: 0.00010455 [updatestate_assign_eliminate]: 6.379e-05 [updatestate_loads_eliminate]: 6.942e-05 [parameter_eliminate]: 3.36999e-06 [a_2]: 0.00167742 [accelerated_algorithm]: 0.0002886 [shard]: 2.32004e-06 [meta_shard_fg_expand]: 4.891e-05 [shard_inline]: 0.00010974 [auto_parallel]: 8.196e-05 [parallel]: 0.0149376 [flash_sp]: 5.874e-05 [merge_comm]: 0.00013191 [allreduce_fusion]: 7.499e-05 [matmul_add_comm_reduction]: 9.802e-05 [allreduce_slice_to_reducescatter]: 5.20027e-07 [virtual_shard_identity]: 0.00012491 [virtual_dataset]: 0.00016772 [get_grad_eliminate_]: 0.00011506 [virtual_output]: 0.00011389 [merge_forward]: 7.89299e-05 [cell_reuse_recompute_pass]: 3.18e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00020773 [before_grad]: 0.00020565 [inplace_validation]: 0.00012958 [parallel_renormalize]: 0.022071 [update_top_fg]: 9.89996e-07 [cast_eliminate]: 0.00014653 [meta_fg_expand]: 0.264763 [inplace_validation_after_expand]: 0.0015371 [flash_sp_send_recv_attached]: 0.00118909 [receive_attached]: 9.94301e-05 [after_resolve]: 0.00194705 [a_after_grad]: 0.00387278 [special_op_eliminate]: 0.00179595 [renormalize]: 0.151153 [add_forward_monad_depend]: 0.00036016 [auto_monad_grad]: 0.00021806 [auto_monad_eliminator]: 0.00183829 [cse]: 0.00418705 [a_3]: 0.0239487 [Cycle 2]: 0.0709072, [46] [expand_dump_flag]: 4.83e-05 [switch_simplify]: 0.00178604 [loop_unroll]: 0.00151142 [a_1]: 0.0314617 [recompute_prepare]: 0.00017242 [updatestate_depend_eliminate]: 0.0002291 [updatestate_assign_eliminate]: 0.00010162 [updatestate_loads_eliminate]: 0.00016643 [parameter_eliminate]: 3.32994e-06 [a_2]: 0.0043101 TotalTime = 14.5502, [21] [bootstrap]: 0.0015196 [type_inference]: 0.777004 [auto_monad]: 0.001992 [graph_reusing]: 2.566e-05 [inline]: 0.0431653, [2] [rewriter_before_opt_a]: 0.00149294 [a1a2]: 0.0416326, [2] [Cycle 1]: 0.0284594, [11] [expand_dump_flag]: 3.723e-05 [switch_simplify]: 0.00108428 [loop_unroll]: 0.00068096 [a_1]: 0.0221261 [recompute_prepare]: 0.0001655 [updatestate_depend_eliminate]: 0.00035422 [updatestate_assign_eliminate]: 8.90699e-05 [updatestate_loads_eliminate]: 0.00019718 [parameter_eliminate]: 5.20004e-06 [a_2]: 0.00344296 [parallel_inline_pass]: 0.00010346 [Cycle 2]: 0.00549404, [11] [expand_dump_flag]: 1.32993e-06 [switch_simplify]: 9.338e-05 [loop_unroll]: 9.276e-05 [a_1]: 0.00315693 [recompute_prepare]: 9.988e-05 [updatestate_depend_eliminate]: 0.00022878 [updatestate_assign_eliminate]: 6.467e-05 [updatestate_loads_eliminate]: 6.227e-05 [parameter_eliminate]: 4.18001e-06 [a_2]: 0.00150801 [parallel_inline_pass]: 0.00010087 [parallel-infer-symbol]: 0.00016808 [pre_auto_parallel]: 0.00010017 [insert-virtual-dataset]: 0.00131599 [parallel-infer-symbol-second]: 3.03995e-06 [dataset_repeat_opt]: 9.77e-05 [pipeline_split]: 0.00010979 [optimize]: 0.613661, [52] [py_interpret_to_execute]: 0.00013898 [rewriter_before_opt_a]: 0.00028558 [opt_a]: 0.599118, [3] [Cycle 1]: 0.514719, [46] [expand_dump_flag]: 2.07999e-06 [switch_simplify]: 0.00011215 [loop_unroll]: 9.86201e-05 [a_1]: 0.00338299 [recompute_prepare]: 0.00011063 [updatestate_depend_eliminate]: 0.00012062 [updatestate_assign_eliminate]: 6.64e-05 [updatestate_loads_eliminate]: 7.48199e-05 [parameter_eliminate]: 3.99002e-06 [a_2]: 0.00168465 [accelerated_algorithm]: 0.00032833 [shard]: 1.85997e-06 [meta_shard_fg_expand]: 6.179e-05 [shard_inline]: 0.00011164 [auto_parallel]: 8.832e-05 [parallel]: 0.0154757 [flash_sp]: 5.84499e-05 [merge_comm]: 0.00016371 [allreduce_fusion]: 7.71501e-05 [matmul_add_comm_reduction]: 0.00010324 [allreduce_slice_to_reducescatter]: 7.09901e-07 [virtual_shard_identity]: 0.00013003 [virtual_dataset]: 0.00016142 [get_grad_eliminate_]: 0.00011565 [virtual_output]: 0.00011298 [merge_forward]: 7.304e-05 [cell_reuse_recompute_pass]: 3.12994e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00021058 [before_grad]: 0.00021058 [inplace_validation]: 0.00013243 [parallel_renormalize]: 0.0233289 [update_top_fg]: 1.73994e-06 [cast_eliminate]: 0.00014734 [meta_fg_expand]: 0.269691 [inplace_validation_after_expand]: 0.00169733 [flash_sp_send_recv_attached]: 0.00125645 [receive_attached]: 0.00010713 [after_resolve]: 0.00199931 [a_after_grad]: 0.00395942 [special_op_eliminate]: 0.00192829 [renormalize]: 0.155792 [add_forward_monad_depend]: 0.00035889 [auto_monad_grad]: 0.00022162 [auto_monad_eliminator]: 0.00182614 [cse]: 0.00415059 [a_3]: 0.0243789 [Cycle 2]: 0.0724642, [46] [expand_dump_flag]: 5.729e-05 [switch_simplify]: 0.0017959 [loop_unroll]: 0.0015146 [a_1]: 0.0319834 [recompute_prepare]: 0.00019205 [updatestate_depend_eliminate]: 0.00024515 [updatestate_assign_eliminate]: 0.00010602 [updatestate_loads_eliminate]: 0.00017224 [parameter_eliminate]: 4.07e-06 [a_9 [accelerated_algorithm]: 0.0001652 [shard]: 1.73994e-06 [meta_shard_fg_expand]: 8.315e-05 [shard_inline]: 0.00013846 [auto_parallel]: 0.00011411 [parallel]: 1.12e-05 [flash_sp]: 0.00012585 [merge_comm]: 0.00011169 [allreduce_fusion]: 9.111e-05 [matmul_add_comm_reduction]: 0.00011157 [allreduce_slice_to_reducescatter]: 6.20028e-07 [virtual_shard_identity]: 0.00014214 [virtual_dataset]: 0.0001353 [get_grad_eliminate_]: 0.00012944 [virtual_output]: 0.00013435 [merge_forward]: 9.332e-05 [cell_reuse_recompute_pass]: 2.78e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00026078 [before_grad]: 0.00024366 [inplace_validation]: 8.64899e-05 [parallel_renormalize]: 7.99773e-08 [update_top_fg]: 7.10017e-07 [cast_eliminate]: 0.00015703 [meta_fg_expand]: 0.00028607 [inplace_validation_after_expand]: 0.00017988 [flash_sp_send_recv_attached]: 2.11992e-06 [receive_attached]: 2.00002e-06 [after_resolve]: 0.00016094 [a_after_grad]: 0.00022603 [special_op_eliminate]: 0.00013311 [renormalize]: 0.0190057 [add_forward_monad_depend]: 7.30995e-06 [auto_monad_grad]: 3.13995e-06 [auto_monad_eliminator]: 0.00029085 [cse]: 0.00699729 [a_3]: 0.00101595 [Cycle 3]: 0.0118888, [46] [expand_dump_flag]: 3.19991e-06 [switch_simplify]: 0.00013304 [loop_unroll]: 0.00012816 [a_1]: 0.0043093 [recompute_prepare]: 0.00013664 [updatestate_depend_eliminate]: 0.00015226 [updatestate_assign_eliminate]: 9.64401e-05 [updatestate_loads_eliminate]: 9.53301e-05 [parameter_eliminate]: 4.10003e-06 [a_2]: 0.00207676 [accelerated_algorithm]: 0.00018791 [shard]: 1.97999e-06 [meta_shard_fg_expand]: 5.575e-05 [shard_inline]: 0.00013443 [auto_parallel]: 0.00011551 [parallel]: 1.218e-05 [flash_sp]: 2.59001e-06 [merge_comm]: 0.00010783 [allreduce_fusion]: 9.51e-05 [matmul_add_comm_reduction]: 0.00011887 [allreduce_slice_to_reducescatter]: 5.69969e-07 [virtual_shard_identity]: 0.00013862 [virtual_dataset]: 0.00013158 [get_grad_eliminate_]: 0.00012707 [virtual_output]: 0.00012894 [merge_forward]: 9.20601e-05 [cell_reuse_recompute_pass]: 3.45998e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024445 [before_grad]: 0.00023205 [inplace_validation]: 8.852e-05 [parallel_renormalize]: 6.99656e-08 [update_top_fg]: 6.20028e-07 [cast_eliminate]: 0.00015032 [meta_fg_expand]: 0.00011511 [inplace_validation_after_expand]: 0.00011741 [flash_sp_send_recv_attached]: 1.79e-06 [receive_attached]: 1.96008e-06 [after_resolve]: 0.00014755 [a_after_grad]: 0.00021825 [special_op_eliminate]: 0.00012997 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 2.41003e-06 [auto_monad_grad]: 2.63995e-06 [auto_monad_eliminator]: 0.00017561 [cse]: 0.00040248 [a_3]: 0.00097233 [py_interpret_to_execute_after_opt_a]: 0.00014671 [slice_cell_reuse_recomputed_activation]: 2.48e-06 [rewriter_after_opt_a]: 0.00098905 [convert_after_rewriter]: 0.0001123 [order_py_execute_after_rewriter]: 8.036e-05 [opt_b]: 0.00396252, [1] [Cycle 1]: 0.00395394, [7] [b_1]: 0.0030847 [b_2]: 0.00013655 [updatestate_depend_eliminate]: 0.00010124 [updatestate_assign_eliminate]: 8.672]: 0.00441268 [accelerated_algorithm]: 0.00017134 [shard]: 2.37999e-06 [meta_shard_fg_expand]: 0.00010189 [shard_inline]: 0.00014158 [auto_parallel]: 0.0001235 [parallel]: 1.284e-05 [flash_sp]: 0.00012738 [merge_comm]: 0.00012112 [allreduce_fusion]: 9.506e-05 [matmul_add_comm_reduction]: 0.00011637 [allreduce_slice_to_reducescatter]: 4.7998e-07 [virtual_shard_identity]: 0.00014743 [virtual_dataset]: 0.00013914 [get_grad_eliminate_]: 0.00013411 [virtual_output]: 0.00013741 [merge_forward]: 9.286e-05 [cell_reuse_recompute_pass]: 2.75008e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00025491 [before_grad]: 0.00023821 [inplace_validation]: 8.696e-05 [parallel_renormalize]: 1.10012e-07 [update_top_fg]: 7.69971e-07 [cast_eliminate]: 0.00016084 [meta_fg_expand]: 0.00036051 [inplace_validation_after_expand]: 0.00018791 [flash_sp_send_recv_attached]: 2.27999e-06 [receive_attached]: 2.20002e-06 [after_resolve]: 0.00016659 [a_after_grad]: 0.00023633 [special_op_eliminate]: 0.0001366 [renormalize]: 0.0196143 [add_forward_monad_depend]: 4.54998e-06 [auto_monad_grad]: 2.15007e-06 [auto_monad_eliminator]: 0.00027846 [cse]: 0.00717661 [a_3]: 0.00096936 [Cycle 3]: 0.0119147, [46] [expand_dump_flag]: 1.82004e-06 [switch_simplify]: 0.00013224 [loop_unroll]: 0.0001294 [a_1]: 0.00429251 [recompute_prepare]: 0.00013708 [updatestate_depend_eliminate]: 0.00015879 [updatestate_assign_eliminate]: 9.51e-05 [updatestate_loads_eliminate]: 9.24e-05 [parameter_eliminate]: 2.86009e-06 [a_2]: 0.0020948 [accelerated_algorithm]: 0.00015739 [shard]: 1.75997e-06 [meta_shard_fg_expand]: 5.572e-05 [shard_inline]: 0.00013327 [auto_parallel]: 0.00011307 [parallel]: 1.024e-05 [flash_sp]: 2.71003e-06 [merge_comm]: 0.000106 [allreduce_fusion]: 9.689e-05 [matmul_add_comm_reduction]: 0.00012121 [allreduce_slice_to_reducescatter]: 8.39937e-07 [virtual_shard_identity]: 0.00013967 [virtual_dataset]: 0.00013402 [get_grad_eliminate_]: 0.00012839 [virtual_output]: 0.0001304 [merge_forward]: 9.515e-05 [cell_reuse_recompute_pass]: 3.02005e-06 [cell_reuse_handle_not_recompute_node_pass]: 0.00024876 [before_grad]: 0.00023305 [inplace_validation]: 9.127e-05 [parallel_renormalize]: 1.00001e-07 [update_top_fg]: 7.10017e-07 [cast_eliminate]: 0.00015032 [meta_fg_expand]: 0.00011256 [inplace_validation_after_expand]: 0.00012495 [flash_sp_send_recv_attached]: 1.75007e-06 [receive_attached]: 1.88011e-06 [after_resolve]: 0.00014839 [a_after_grad]: 0.00022058 [special_op_eliminate]: 0.00013073 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 2.58989e-06 [auto_monad_grad]: 1.82004e-06 [auto_monad_eliminator]: 0.0001765 [cse]: 0.00045509 [a_3]: 0.000947 [py_interpret_to_execute_after_opt_a]: 0.00014611 [slice_cell_reuse_recomputed_activation]: 2.33995e-06 [rewriter_after_opt_a]: 0.00097322 [convert_after_rewriter]: 0.00011584 [order_py_execute_after_rewriter]: 8.04099e-05 [opt_b]: 0.00400134, [1] [Cycle 1]: 0.00399344, [7] [b_1]: 0.0031131 [b_2]: 0.00013809 [updatestate_depend_eliminate]: 9.87001e-05 [updatestate_assign_elim3e-05 [updatestate_loads_eliminate]: 9.03901e-05 [renormalize]: 5.39934e-07 [cse]: 0.00039871 [optimize_parallel_all_gather_comm]: 0.00014016 [overlap_param_gather]: 1.52003e-06 [cconv]: 7.127e-05 [loop_unroll]: 0.00096196 [opt_after_cconv]: 0.0016044, [1] [Cycle 1]: 0.00159634, [7] [c_1]: 0.00082147 [parameter_eliminate]: 2.91993e-06 [updatestate_depend_eliminate]: 0.0001376 [updatestate_assign_eliminate]: 9.35299e-05 [updatestate_loads_eliminate]: 9.54199e-05 [cse]: 0.00038979 [renormalize]: 5.40051e-07 [remove_dup_value]: 0.00060655 [tuple_transform]: 0.00099269, [1] [Cycle 1]: 0.00098466, [2] [d_1]: 0.00096762 [renormalize]: 3.40049e-07 [partial_unused_args_eliminate]: 2.91003e-06 [add_cache_embedding]: 0.00015145 [add_recomputation]: 0.0007155 [cse_after_recomputation]: 0.00031684, [1] [Cycle 1]: 0.00030875, [1] [cse]: 0.00029554 [environ_conv]: 9.719e-05 [swap_dp_allreduce_reducescatter]: 0.00012977 [bias_add_comm_swap]: 2.84007e-06 [label_micro_interleaved_index]: 2.00002e-06 [label_fine_grained_interleaved_index]: 0.00055626 [merge_cast_opt]: 1.63994e-06 [slice_recompute_activation]: 0.00014648 [micro_interleaved_order_control]: 1.90001e-06 [assign_add_opt]: 0.00038978 [ForceFp32Comm]: 1.32003e-06 [remove_cast_before_assign_add]: 0.00010363 [full_micro_interleaved_order_control]: 2.29001e-06 [reorder_send_recv_between_fp_bp]: 1.75997e-06 [comm_op_add_attrs]: 0.00014745 [add_comm_op_reuse_tag]: 0.00014898 [interleave_split_concat_branches]: 1.05007e-06 [interleave_parallel_branches]: 9.79984e-07 [overlap_opt_shard_in_pipeline]: 1.35599e-05 [overlap_opt_shard_grad_in_pipeline]: 3.37989e-06 [control_data_broadcast_order]: 1.06997e-06 [grouped_pairwise_exchange_alltoall]: 1.13901e-05 [offloading_packed_experts]: 2.54996e-06 [overlap_recompute_and_grad_model_parallel]: 1.91992e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.49948e-07 [overlap_recompute_allgather_and_fa_grad]: 0.00010354 [overlap_grad_ring_attention]: 0.0001648 [overlap_grad_flash_sp]: 0.00012201 [begin_end_overlap_inline]: 1.02003e-06 [split_matmul_comm_elemetwise]: 2.09e-06 [split_layernorm_comm]: 2.2999e-06 [handle_group_info]: 5.53997e-06 [symbol_engine_optimizer]: 0.00081148, [1] [Cycle 1]: 0.00080425, [6] [build]: 5.374e-05 [elim_shapecalc]: 0.00014803 [elim_not_effective]: 0.00022142 [opt_reshape]: 0.00013106 [fold_const_symbol]: 0.00021079 [renormalize]: 4.1991e-07 [pipeline_parallel_scheduler]: 3.90992e-06 [auto_monad_reorder]: 0.00031364 [get_jit_bprop_graph]: 5.89993e-07 [rewriter_after_jit_bprop_graph]: 4.89992e-07 [eliminate_special_op_node]: 0.00145782 [distribtued_split]: 0.00037438 [validate]: 0.00028958 [task_emit]: 13.0435 [execute]: 1.282e-05 Sums bootstrap : 0.001467s : 0.01% type_inference : 0.752272s : 5.21% auto_monad : 0.001922s : 0.01% graph_reusing : 0.000024s : 0.00% inline.rewriter_before_opt_a : 0.001486s : 0.01% inline.a1a2.expand_dump_flag : 0.000038s : 0.00% inline.a1a2.switch_simplify : 0.001206s : 0.01% inline.a1a2.loop_unroll : 0.000832s : 0.01% inline.a1a2.a_1 : 0.025667s : 0.18% inline.a1a2.recompute_prepare : 0.000265s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000583s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000158s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000316s : 0.00% inline.a1a2.parameter_eliminate : 0.000010s : 0.00% inline.a1a2.a_2 : inate]: 8.674e-05 [updatestate_loads_eliminate]: 8.972e-05 [renormalize]: 6.10016e-07 [cse]: 0.00040509 [optimize_parallel_all_gather_comm]: 0.00014213 [overlap_param_gather]: 1.00001e-06 [cconv]: 6.85101e-05 [loop_unroll]: 0.00087921 [opt_after_cconv]: 0.00157452, [1] [Cycle 1]: 0.00156645, [7] [c_1]: 0.0007939 [parameter_eliminate]: 3.14997e-06 [updatestate_depend_eliminate]: 0.00013279 [updatestate_assign_eliminate]: 9.45099e-05 [updatestate_loads_eliminate]: 9.06e-05 [cse]: 0.00039342 [renormalize]: 5.60074e-07 [remove_dup_value]: 0.00063031 [tuple_transform]: 0.00093694, [1] [Cycle 1]: 0.00092974, [2] [d_1]: 0.00091159 [renormalize]: 5.69969e-07 [partial_unused_args_eliminate]: 2.93006e-06 [add_cache_embedding]: 0.0001539 [add_recomputation]: 0.00073389 [cse_after_recomputation]: 0.00034661, [1] [Cycle 1]: 0.00033821, [1] [cse]: 0.00032271 [environ_conv]: 9.604e-05 [swap_dp_allreduce_reducescatter]: 0.00013505 [bias_add_comm_swap]: 2.81003e-06 [label_micro_interleaved_index]: 1.46998e-06 [label_fine_grained_interleaved_index]: 0.00052992 [merge_cast_opt]: 1.46998e-06 [slice_recompute_activation]: 0.00014647 [micro_interleaved_order_control]: 1.59e-06 [assign_add_opt]: 0.00038572 [ForceFp32Comm]: 1.34006e-06 [remove_cast_before_assign_add]: 0.00011045 [full_micro_interleaved_order_control]: 1.79e-06 [reorder_send_recv_between_fp_bp]: 1.04995e-06 [comm_op_add_attrs]: 0.00014439 [add_comm_op_reuse_tag]: 0.00014844 [interleave_split_concat_branches]: 7.20029e-07 [interleave_parallel_branches]: 4.39934e-07 [overlap_opt_shard_in_pipeline]: 1.69501e-05 [overlap_opt_shard_grad_in_pipeline]: 2.48e-06 [control_data_broadcast_order]: 1.06997e-06 [grouped_pairwise_exchange_alltoall]: 9.39006e-06 [offloading_packed_experts]: 1.51002e-06 [overlap_recompute_and_grad_model_parallel]: 1.42003e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.10016e-07 [overlap_recompute_allgather_and_fa_grad]: 7.682e-05 [overlap_grad_ring_attention]: 0.00016288 [overlap_grad_flash_sp]: 0.00011909 [begin_end_overlap_inline]: 6.20028e-07 [split_matmul_comm_elemetwise]: 1.41992e-06 [split_layernorm_comm]: 1.46008e-06 [handle_group_info]: 3.62005e-06 [symbol_engine_optimizer]: 0.00081475, [1] [Cycle 1]: 0.00080734, [6] [build]: 5.528e-05 [elim_shapecalc]: 0.00014528 [elim_not_effective]: 0.00022358 [opt_reshape]: 0.00013125 [fold_const_symbol]: 0.00021183 [renormalize]: 5.30039e-07 [pipeline_parallel_scheduler]: 3.15008e-06 [auto_monad_reorder]: 0.000313 [get_jit_bprop_graph]: 6.20028e-07 [rewriter_after_jit_bprop_graph]: 4.50062e-07 [eliminate_special_op_node]: 0.00143294 [distribtued_split]: 0.00037488 [validate]: 0.00030015 [task_emit]: 13.1071 [execute]: 1.28001e-05 Sums bootstrap : 0.001520s : 0.01% type_inference : 0.777004s : 5.34% auto_monad : 0.001992s : 0.01% graph_reusing : 0.000026s : 0.00% inline.rewriter_before_opt_a : 0.001493s : 0.01% inline.a1a2.expand_dump_flag : 0.000039s : 0.00% inline.a1a2.switch_simplify : 0.001178s : 0.01% inline.a1a2.loop_unroll : 0.000774s : 0.01% inline.a1a2.a_1 : 0.025283s : 0.17% inline.a1a2.recompute_prepare : 0.000265s : 0.00% inline.a1a2.updatestate_depend_eliminate : 0.000583s : 0.00% inline.a1a2.updatestate_assign_eliminate : 0.000154s : 0.00% inline.a1a2.updatestate_loads_eliminate : 0.000259s : 0.00% inline.a1a2.parameter_eliminate : 0.000009s : 0.00% inline.a1a2.a_2 0.005003s : 0.03% inline.a1a2.parallel_inline_pass : 0.000207s : 0.00% parallel-infer-symbol : 0.000195s : 0.00% pre_auto_parallel : 0.000107s : 0.00% insert-virtual-dataset : 0.001246s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000138s : 0.00% pipeline_split : 0.000101s : 0.00% optimize.py_interpret_to_execute : 0.000127s : 0.00% optimize.rewriter_before_opt_a : 0.000280s : 0.00% optimize.opt_a.expand_dump_flag : 0.000054s : 0.00% optimize.opt_a.switch_simplify : 0.002029s : 0.01% optimize.opt_a.loop_unroll : 0.001737s : 0.01% optimize.opt_a.a_1 : 0.039075s : 0.27% optimize.opt_a.recompute_prepare : 0.000413s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000486s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000262s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000331s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.008064s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000642s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000188s : 0.00% optimize.opt_a.shard_inline : 0.000383s : 0.00% optimize.opt_a.auto_parallel : 0.000312s : 0.00% optimize.opt_a.parallel : 0.014961s : 0.10% optimize.opt_a.flash_sp : 0.000187s : 0.00% optimize.opt_a.merge_comm : 0.000351s : 0.00% optimize.opt_a.allreduce_fusion : 0.000261s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000328s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000406s : 0.00% optimize.opt_a.virtual_dataset : 0.000435s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000372s : 0.00% optimize.opt_a.virtual_output : 0.000377s : 0.00% optimize.opt_a.merge_forward : 0.000264s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000713s : 0.00% optimize.opt_a.before_grad : 0.000681s : 0.00% optimize.opt_a.inplace_validation : 0.000305s : 0.00% optimize.opt_a.parallel_renormalize : 0.022071s : 0.15% optimize.opt_a.update_top_fg : 0.000002s : 0.00% optimize.opt_a.cast_eliminate : 0.000454s : 0.00% optimize.opt_a.meta_fg_expand : 0.265164s : 1.84% optimize.opt_a.inplace_validation_after_expand : 0.001834s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001193s : 0.01% optimize.opt_a.receive_attached : 0.000103s : 0.00% optimize.opt_a.after_resolve : 0.002256s : 0.02% optimize.opt_a.a_after_grad : 0.004317s : 0.03% optimize.opt_a.special_op_eliminate : 0.002059s : 0.01% optimize.opt_a.renormalize : 0.170159s : 1.18% optimize.opt_a.add_forward_monad_depend : 0.000370s : 0.00% optimize.opt_a.auto_monad_grad : 0.000224s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002305s : 0.02% optimize.opt_a.cse : 0.011587s : 0.08% optimize.opt_a.a_3 : 0.025937s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000147s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000989s : 0.01% optimize.convert_after_rewriter : 0.000112s : 0.00% optimize.order_py_execute_after_rewriter : 0.000080s : 0.00% optimize.o : 0.004951s : 0.03% inline.a1a2.parallel_inline_pass : 0.000204s : 0.00% parallel-infer-symbol : 0.000168s : 0.00% pre_auto_parallel : 0.000100s : 0.00% insert-virtual-dataset : 0.001316s : 0.01% parallel-infer-symbol-second : 0.000003s : 0.00% dataset_repeat_opt : 0.000098s : 0.00% pipeline_split : 0.000110s : 0.00% optimize.py_interpret_to_execute : 0.000139s : 0.00% optimize.rewriter_before_opt_a : 0.000286s : 0.00% optimize.opt_a.expand_dump_flag : 0.000061s : 0.00% optimize.opt_a.switch_simplify : 0.002040s : 0.01% optimize.opt_a.loop_unroll : 0.001743s : 0.01% optimize.opt_a.a_1 : 0.039659s : 0.27% optimize.opt_a.recompute_prepare : 0.000440s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000525s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000268s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000339s : 0.00% optimize.opt_a.parameter_eliminate : 0.000011s : 0.00% optimize.opt_a.a_2 : 0.008192s : 0.06% optimize.opt_a.accelerated_algorithm : 0.000657s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000219s : 0.00% optimize.opt_a.shard_inline : 0.000386s : 0.00% optimize.opt_a.auto_parallel : 0.000325s : 0.00% optimize.opt_a.parallel : 0.015499s : 0.11% optimize.opt_a.flash_sp : 0.000189s : 0.00% optimize.opt_a.merge_comm : 0.000391s : 0.00% optimize.opt_a.allreduce_fusion : 0.000269s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000341s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000417s : 0.00% optimize.opt_a.virtual_dataset : 0.000435s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000378s : 0.00% optimize.opt_a.virtual_output : 0.000381s : 0.00% optimize.opt_a.merge_forward : 0.000261s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000009s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000714s : 0.00% optimize.opt_a.before_grad : 0.000682s : 0.00% optimize.opt_a.inplace_validation : 0.000311s : 0.00% optimize.opt_a.parallel_renormalize : 0.023329s : 0.16% optimize.opt_a.update_top_fg : 0.000003s : 0.00% optimize.opt_a.cast_eliminate : 0.000458s : 0.00% optimize.opt_a.meta_fg_expand : 0.270164s : 1.86% optimize.opt_a.inplace_validation_after_expand : 0.002010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.001260s : 0.01% optimize.opt_a.receive_attached : 0.000111s : 0.00% optimize.opt_a.after_resolve : 0.002314s : 0.02% optimize.opt_a.a_after_grad : 0.004416s : 0.03% optimize.opt_a.special_op_eliminate : 0.002196s : 0.02% optimize.opt_a.renormalize : 0.175406s : 1.21% optimize.opt_a.add_forward_monad_depend : 0.000366s : 0.00% optimize.opt_a.auto_monad_grad : 0.000226s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.002281s : 0.02% optimize.opt_a.cse : 0.011782s : 0.08% optimize.opt_a.a_3 : 0.026295s : 0.18% optimize.py_interpret_to_execute_after_opt_a : 0.000146s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000973s : 0.01% optimize.convert_after_rewriter : 0.000116s : 0.00% optimize.order_py_execute_after_rewriter : 0.000080s : 0.00% optipt_b.b_1 : 0.003085s : 0.02% optimize.opt_b.b_2 : 0.000137s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000101s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000087s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000399s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000140s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000071s : 0.00% optimize.loop_unroll : 0.000962s : 0.01% optimize.opt_after_cconv.c_1 : 0.000821s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000138s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000094s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000095s : 0.00% optimize.opt_after_cconv.cse : 0.000390s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000607s : 0.00% optimize.tuple_transform.d_1 : 0.000968s : 0.01% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000151s : 0.00% optimize.add_recomputation : 0.000715s : 0.00% optimize.cse_after_recomputation.cse : 0.000296s : 0.00% optimize.environ_conv : 0.000097s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000130s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000556s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000146s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000390s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000104s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000147s : 0.00% optimize.add_comm_op_reuse_tag : 0.000149s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000014s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.00% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000104s : 0.00% optimize.overlap_grad_ring_attention : 0.000165s : 0.00% optimize.overlap_grad_flash_sp : 0.000122s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.00% optimize.symbol_engine_optimizer.build : 0.000054s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000148s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000221s : 0.00% optmize.opt_b.b_1 : 0.003113s : 0.02% optimize.opt_b.b_2 : 0.000138s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000099s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000087s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000090s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000405s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000142s : 0.00% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000069s : 0.00% optimize.loop_unroll : 0.000879s : 0.01% optimize.opt_after_cconv.c_1 : 0.000794s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000133s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000095s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000091s : 0.00% optimize.opt_after_cconv.cse : 0.000393s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000630s : 0.00% optimize.tuple_transform.d_1 : 0.000912s : 0.01% optimize.tuple_transform.renormalize : 0.000001s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000154s : 0.00% optimize.add_recomputation : 0.000734s : 0.01% optimize.cse_after_recomputation.cse : 0.000323s : 0.00% optimize.environ_conv : 0.000096s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000135s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000530s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000146s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000386s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000110s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000144s : 0.00% optimize.add_comm_op_reuse_tag : 0.000148s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000017s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.00% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000077s : 0.00% optimize.overlap_grad_ring_attention : 0.000163s : 0.00% optimize.overlap_grad_flash_sp : 0.000119s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000004s : 0.00% optimize.symbol_engine_optimizer.build : 0.000055s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000145s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000224s : 0.00% imize.symbol_engine_optimizer.opt_reshape : 0.000131s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000211s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000004s : 0.00% auto_monad_reorder : 0.000314s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001458s : 0.01% distribtued_split : 0.000374s : 0.00% validate : 0.000290s : 0.00% task_emit : 13.043483s : 90.35% execute : 0.000013s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000131s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000212s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% pipeline_parallel_scheduler : 0.000003s : 0.00% auto_monad_reorder : 0.000313s : 0.00% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.001433s : 0.01% distribtued_split : 0.000375s : 0.00% validate : 0.000300s : 0.00% task_emit : 13.107145s : 90.15% execute : 0.000013s : 0.00% Time group info: ------[substitution.] 0.049298 4298 0.04% : 0.000021s : 5: substitution.ad_related_special_op_eliminate 0.05% : 0.000023s : 9: substitution.addn_check_dump 0.11% : 0.000054s : 7: substitution.addn_zero_filter 0.03% : 0.000015s : 7: substitution.adjust_all_reduce_mul_add 0.65% : 0.000319s : 71: substitution.arithmetic_simplify 0.12% : 0.000057s : 10: substitution.cast_eliminate 0.11% : 0.000056s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.08% : 0.000040s : 15: substitution.environ_get_add_eliminate 0.04% : 0.000019s : 12: substitution.environ_get_depend_swap 0.06% : 0.000030s : 27: substitution.environ_get_eliminate 0.07% : 0.000037s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000019s : 23: substitution.float_depend_g_call 0.02% : 0.000011s : 12: substitution.float_environ_get_switch 0.03% : 0.000012s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 63.95% : 0.031525s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000084s : 126: substitution.graph_param_transform 0.02% : 0.000009s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 24.37% : 0.012013s : 331: substitution.inline 1.35% : 0.000667s : 112: substitution.inline_without_move 0.25% : 0.000123s : 309: substitution.j_node_and_user_rematch 0.34% : 0.000167s : 40: substitution.less_batch_normalization 0.09% : 0.000045s : 90: substitution.load_eliminater 0.11% : 0.000053s : 10: substitution.merge_addn 0.23% : 0.000112s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.04% : 0.000019s : 1: substitution.partial_defer_inline 0.12% : 0.000059s : 23: substitution.partial_eliminate 0.03% : 0.000017s : 26: substitution.reduce_all_const_elim 0.07% : 0.000033s : 15: substitution.reduce_eliminate 0.32% : 0.000158s : 309: substitution.remove_not_recompute_node 1.98% : 0.000976s : 508: substitution.replace_applicator 0.22% : 0.000110s : 251: substitution.replace_old_param 0.08% : 0.000038s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000017s : 12: substitution.split_environ_get_set_with_tuple_value 0.20% : 0.000100s : 34: substitution.switch_simplify 0.06% : 0.000030s : 11: substitution.tile_eliminate 0.52% : 0.000258s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000134s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000213s : 107: substitution.tuple_list_get_item_depend_reorder 1.61% : 0.000792s : 308: substitution.tuple_list_get_item_eliminator 0.37% : 0.000185s : 107: substitution.tuple_list_get_set_item_eliminator 0.40% : 0.000196s : 210: substitution.updatestate_pure_node_eliminater 0.70% : 0.000347s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.751807 2 96.54% : 0.725770s : 1: type_inference.infer 3.46% : 0.026037s : 1: type_inference.specialize ------[replace.] 0.009999 775 0.41% : 0.000041s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.52% : 0.000052s : 7: replace.depend_value_elim 0.45% : 0.000045s : 3: replace.environ_get_set_eliminate 29.13% : 0.002913s : 183: replace.getattr_setattr_resolve 29.79% : 0.002978s : 310: replace.inline 0.21% : 0.000021s Time group info: ------[substitution.] 0.049952 4298 0.04% : 0.000020s : 5: substitution.ad_related_special_op_eliminate 0.04% : 0.000021s : 9: substitution.addn_check_dump 0.10% : 0.000052s : 7: substitution.addn_zero_filter 0.03% : 0.000016s : 7: substitution.adjust_all_reduce_mul_add 0.64% : 0.000318s : 71: substitution.arithmetic_simplify 0.11% : 0.000055s : 10: substitution.cast_eliminate 0.11% : 0.000054s : 47: substitution.depend_value_elim 0.06% : 0.000030s : 107: substitution.elim_not_effective 0.00% : 0.000002s : 1: substitution.elim_shapecalc_of_broadcastargs 0.05% : 0.000024s : 15: substitution.environ_get_add_eliminate 0.03% : 0.000016s : 12: substitution.environ_get_depend_swap 0.06% : 0.000029s : 27: substitution.environ_get_eliminate 0.07% : 0.000033s : 15: substitution.environ_get_set_eliminate 0.04% : 0.000019s : 23: substitution.float_depend_g_call 0.02% : 0.000010s : 12: substitution.float_environ_get_switch 0.02% : 0.000011s : 10: substitution.float_tuple_getitem_switch 0.06% : 0.000031s : 107: substitution.fold_const_symbol 64.06% : 0.032000s : 257: substitution.getattr_setattr_resolve 0.17% : 0.000083s : 126: substitution.graph_param_transform 0.02% : 0.000008s : 8: substitution.incorporate_call 0.01% : 0.000005s : 8: substitution.incorporate_call_switch 24.32% : 0.012146s : 331: substitution.inline 1.51% : 0.000752s : 112: substitution.inline_without_move 0.25% : 0.000124s : 309: substitution.j_node_and_user_rematch 0.37% : 0.000185s : 40: substitution.less_batch_normalization 0.09% : 0.000045s : 90: substitution.load_eliminater 0.11% : 0.000054s : 10: substitution.merge_addn 0.23% : 0.000115s : 101: substitution.minmaximum_grad 0.00% : 0.000002s : 4: substitution.opt_reshape 0.04% : 0.000022s : 1: substitution.partial_defer_inline 0.13% : 0.000066s : 23: substitution.partial_eliminate 0.03% : 0.000017s : 26: substitution.reduce_all_const_elim 0.07% : 0.000034s : 15: substitution.reduce_eliminate 0.32% : 0.000158s : 309: substitution.remove_not_recompute_node 1.99% : 0.000993s : 508: substitution.replace_applicator 0.22% : 0.000109s : 251: substitution.replace_old_param 0.08% : 0.000039s : 11: substitution.reshape_eliminate 0.03% : 0.000013s : 6: substitution.set_cell_output_no_recompute 0.02% : 0.000011s : 4: substitution.specialize_transform 0.03% : 0.000015s : 12: substitution.split_environ_get_set_with_tuple_value 0.16% : 0.000078s : 34: substitution.switch_simplify 0.06% : 0.000031s : 11: substitution.tile_eliminate 0.51% : 0.000256s : 101: substitution.tuple_list_convert_item_index_to_positive 0.27% : 0.000134s : 107: substitution.tuple_list_get_item_const_eliminator 0.43% : 0.000213s : 107: substitution.tuple_list_get_item_depend_reorder 1.55% : 0.000775s : 308: substitution.tuple_list_get_item_eliminator 0.37% : 0.000183s : 107: substitution.tuple_list_get_set_item_eliminator 0.39% : 0.000196s : 210: substitution.updatestate_pure_node_eliminater 0.67% : 0.000337s : 265: substitution.updatestate_useless_node_eliminater 0.02% : 0.000012s : 1: substitution.virtual_dataset_eliminate ------[type_inference.] 0.776538 2 96.49% : 0.749284s : 1: type_inference.infer 3.51% : 0.027255s : 1: type_inference.specialize ------[replace.] 0.010049 775 0.44% : 0.000044s : 5: replace.ad_related_special_op_eliminate 0.07% : 0.000007s : 1: replace.arithmetic_simplify 0.51% : 0.000052s : 7: replace.depend_value_elim 0.43% : 0.000043s : 3: replace.environ_get_set_eliminate 29.40% : 0.002954s : 183: replace.getattr_setattr_resolve 29.52% : 0.002967s : 310: replace.inline 0.23% : 0.000024s : 1: replace.merge_addn 1.16% : 0.000116s : 7: replace.partial_eliminate 4.13% : 0.000413s : 25: replace.replace_applicator 3.95% : 0.000395s : 34: replace.switch_simplify 0.49% : 0.000049s : 6: replace.tuple_list_get_item_depend_reorder 29.31% : 0.002930s : 191: replace.tuple_list_get_item_eliminator 0.17% : 0.000017s : 1: replace.updatestate_useless_node_eliminater 0.20% : 0.000020s : 1: replace.virtual_dataset_eliminate ------[match.] 0.041806 775 0.04% : 0.000017s : 5: match.ad_related_special_op_eliminate 0.03% : 0.000012s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.05% : 0.000019s : 3: match.environ_get_set_eliminate 70.01% : 0.029270s : 183: match.getattr_setattr_resolve 28.13% : 0.011759s : 310: match.inline 0.06% : 0.000023s : 1: match.merge_addn 0.10% : 0.000040s : 7: match.partial_eliminate 0.25% : 0.000106s : 25: match.replace_applicator 0.19% : 0.000080s : 34: match.switch_simplify 0.07% : 0.000030s : 6: match.tuple_list_get_item_depend_reorder 1.02% : 0.000427s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000009s : 1: match.updatestate_useless_node_eliminater 0.03% : 0.000011s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020363131318 0.76% : 0.000156s : 1198: predicate.accumulaten_eliminater 0.28% : 0.000057s : 254: predicate.ad_related_special_op_eliminate 0.57% : 0.000116s : 835: predicate.addn_check_dump 0.80% : 0.000163s : 1198: predicate.addn_zero_filter 0.76% : 0.000155s : 1198: predicate.adjust_all_reduce_mul_add 1.81% : 0.000369s : 2034: predicate.arithmetic_simplify 1.16% : 0.000235s : 1586: predicate.cast_eliminate 3.08% : 0.000628s : 3484: predicate.check_bprop_eliminate 0.57% : 0.000117s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.18% : 0.000036s : 242: predicate.convert_tensor_all_eliminate 1.16% : 0.000236s : 1399: predicate.convert_tensor_eliminate 0.59% : 0.000120s : 838: predicate.depend_value_elim 0.83% : 0.000170s : 1202: predicate.dict_get_item_const_eliminator 0.86% : 0.000176s : 1202: predicate.dict_get_item_eliminator 0.83% : 0.000170s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 126: predicate.elim_not_effective 0.11% : 0.000023s : 126: predicate.elim_shapecalc_of_broadcastargs 0.85% : 0.000174s : 1334: predicate.environ_add_const_eliminate 0.86% : 0.000176s : 1337: predicate.environ_get_add_eliminate 0.86% : 0.000174s : 1334: predicate.environ_get_depend_swap 1.46% : 0.000298s : 2172: predicate.environ_get_eliminate 0.86% : 0.000174s : 1337: predicate.environ_get_set_eliminate 1.13% : 0.000231s : 1717: predicate.exchange_switch_depend_value 1.41% : 0.000286s : 1717: predicate.float_depend_g_call 0.57% : 0.000117s : 835: predicate.float_environ_get_switch 0.67% : 0.000136s : 970: predicate.float_tuple_getitem_switch 0.05% : 0.000009s : 126: predicate.fold_const_symbol 0.29% : 0.000058s : 395: predicate.get_grad_eliminate 2.31% : 0.000470s : 1893: predicate.getattr_setattr_resolve 0.06% : 0.000011s : 126: predicate.graph_param_transform 0.57% : 0.000115s : 835: predicate.incorporate_call 0.56% : 0.000113s : 835: predicate.incorporate_call_switch 3.96% : 0.000806s : 4602: predicate.inline 2.27% : 0.000462s : 2203: predicate.inline_without_move 0.15% : 0.000031s : 395: predicate.j_node_and_user_rematch 0.34% : 0.000068s : 388: predicate.less_batch_normalization 1.10% : 0.000225s : 1660: predicate.list_to_tuple_eliminator_ 1.94% : 0.000395s : 2874: predicate.load_eliminater 0.21% : 0.000042s : 135: predicate.loop_unroll_after_grad 2.31% : 0.000471s : 2640:: 1: replace.merge_addn 1.14% : 0.000115s : 7: replace.partial_eliminate 3.88% : 0.000390s : 25: replace.replace_applicator 3.83% : 0.000384s : 34: replace.switch_simplify 0.51% : 0.000052s : 6: replace.tuple_list_get_item_depend_reorder 29.67% : 0.002982s : 191: replace.tuple_list_get_item_eliminator 0.16% : 0.000016s : 1: replace.updatestate_useless_node_eliminater 0.21% : 0.000021s : 1: replace.virtual_dataset_eliminate ------[match.] 0.042344 775 0.04% : 0.000016s : 5: match.ad_related_special_op_eliminate 0.02% : 0.000010s : 1: match.arithmetic_simplify 0.01% : 0.000003s : 7: match.depend_value_elim 0.04% : 0.000016s : 3: match.environ_get_set_eliminate 70.15% : 0.029703s : 183: match.getattr_setattr_resolve 28.14% : 0.011917s : 310: match.inline 0.06% : 0.000026s : 1: match.merge_addn 0.09% : 0.000037s : 7: match.partial_eliminate 0.23% : 0.000097s : 25: match.replace_applicator 0.14% : 0.000058s : 34: match.switch_simplify 0.07% : 0.000031s : 6: match.tuple_list_get_item_depend_reorder 0.97% : 0.000413s : 191: match.tuple_list_get_item_eliminator 0.02% : 0.000008s : 1: match.updatestate_useless_node_eliminater 0.02% : 0.000010s : 1: match.virtual_dataset_eliminate ------[predicate.] 0.020708131318 0.79% : 0.000164s : 1198: predicate.accumulaten_eliminater 0.28% : 0.000058s : 254: predicate.ad_related_special_op_eliminate 0.56% : 0.000116s : 835: predicate.addn_check_dump 0.78% : 0.000162s : 1198: predicate.addn_zero_filter 0.76% : 0.000157s : 1198: predicate.adjust_all_reduce_mul_add 1.83% : 0.000379s : 2034: predicate.arithmetic_simplify 1.12% : 0.000232s : 1586: predicate.cast_eliminate 3.10% : 0.000642s : 3484: predicate.check_bprop_eliminate 0.56% : 0.000116s : 835: predicate.compare_switch_simplify 0.05% : 0.000010s : 135: predicate.const_output_eliminate 0.17% : 0.000035s : 242: predicate.convert_tensor_all_eliminate 1.12% : 0.000232s : 1399: predicate.convert_tensor_eliminate 0.57% : 0.000119s : 838: predicate.depend_value_elim 0.83% : 0.000172s : 1202: predicate.dict_get_item_const_eliminator 0.85% : 0.000177s : 1202: predicate.dict_get_item_eliminator 0.82% : 0.000170s : 1202: predicate.dict_set_item_eliminator 0.05% : 0.000009s : 126: predicate.elim_not_effective 0.10% : 0.000022s : 126: predicate.elim_shapecalc_of_broadcastargs 0.83% : 0.000171s : 1334: predicate.environ_add_const_eliminate 1.14% : 0.000235s : 1337: predicate.environ_get_add_eliminate 0.83% : 0.000172s : 1334: predicate.environ_get_depend_swap 1.49% : 0.000309s : 2172: predicate.environ_get_eliminate 0.83% : 0.000173s : 1337: predicate.environ_get_set_eliminate 1.15% : 0.000237s : 1717: predicate.exchange_switch_depend_value 1.40% : 0.000291s : 1717: predicate.float_depend_g_call 0.56% : 0.000117s : 835: predicate.float_environ_get_switch 0.65% : 0.000135s : 970: predicate.float_tuple_getitem_switch 0.04% : 0.000009s : 126: predicate.fold_const_symbol 0.28% : 0.000058s : 395: predicate.get_grad_eliminate 2.42% : 0.000501s : 1893: predicate.getattr_setattr_resolve 0.05% : 0.000011s : 126: predicate.graph_param_transform 0.55% : 0.000115s : 835: predicate.incorporate_call 0.55% : 0.000113s : 835: predicate.incorporate_call_switch 4.00% : 0.000828s : 4602: predicate.inline 2.42% : 0.000501s : 2203: predicate.inline_without_move 0.14% : 0.000030s : 395: predicate.j_node_and_user_rematch 0.35% : 0.000072s : 388: predicate.less_batch_normalization 1.12% : 0.000232s : 1660: predicate.list_to_tuple_eliminator_ 1.89% : 0.000391s : 2874: predicate.load_eliminater 0.20% : 0.000041s : 135: predicate.loop_unroll_after_grad 2.34% : 0.000484s : 2640: predicate.loop_unroll_before_grad 0.98% : 0.000199s : 1478: predicate.make_slice_get_slice_eliminator 0.64% : 0.000131s : 837: predicate.merge_addn 2.98% : 0.000607s : 3380: predicate.micro_step_allgather_replace 2.99% : 0.000608s : 3380: predicate.mini_step_allgather_replace 0.76% : 0.000155s : 1199: predicate.minmaximum_grad 0.18% : 0.000038s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.11% : 0.000022s : 135: predicate.parallel_virtual_node 2.06% : 0.000420s : 1717: predicate.partial_defer_inline 1.31% : 0.000266s : 1541: predicate.partial_eliminate 1.03% : 0.000210s : 1198: predicate.print_const_string_wrapper 0.57% : 0.000117s : 824: predicate.reduce_all_const_elim 1.02% : 0.000207s : 1199: predicate.reduce_eliminate 0.15% : 0.000030s : 395: predicate.remove_not_recompute_node 1.99% : 0.000406s : 4829: predicate.replace_applicator 0.82% : 0.000167s : 2203: predicate.replace_old_param 0.05% : 0.000011s : 135: predicate.reset_defer_inline 0.79% : 0.000162s : 1199: predicate.reshape_eliminate 3.03% : 0.000617s : 3380: predicate.row_tensor_add_zeros_like 0.11% : 0.000022s : 135: predicate.row_tensor_eliminate 3.17% : 0.000646s : 3484: predicate.same_eliminate 0.24% : 0.000049s : 633: predicate.set_cell_output_no_recompute 0.30% : 0.000060s : 395: predicate.shard_identity_eliminate 2.07% : 0.000422s : 2338: predicate.special_op_eliminate 0.66% : 0.000134s : 837: predicate.specialize_transform 3.31% : 0.000674s : 3380: predicate.split_environ_get_set_with_tuple_value 1.61% : 0.000327s : 2203: predicate.stack_unstack_eliminate 1.86% : 0.000380s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.31% : 0.000266s : 1717: predicate.switch_defer_inline 4.47% : 0.000910s : 5201: predicate.switch_layer_defer_inline 4.29% : 0.000874s : 5262: predicate.switch_simplify 0.78% : 0.000159s : 1199: predicate.tile_eliminate 0.77% : 0.000156s : 1199: predicate.transpose_eliminate 1.09% : 0.000221s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.12% : 0.000229s : 1469: predicate.tuple_list_get_item_const_eliminator 0.99% : 0.000202s : 1469: predicate.tuple_list_get_item_depend_reorder 1.94% : 0.000394s : 2495: predicate.tuple_list_get_item_eliminator 1.01% : 0.000205s : 1469: predicate.tuple_list_get_set_item_eliminator 1.69% : 0.000344s : 2304: predicate.tuple_list_set_item_eliminator 1.10% : 0.000223s : 1660: predicate.tuple_to_list_eliminator_ 1.88% : 0.000383s : 2874: predicate.updatestate_pure_node_eliminater 2.58% : 0.000526s : 3710: predicate.updatestate_useless_node_eliminater 0.11% : 0.000021s : 135: predicate.value_based_eliminate 0.29% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000056s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.060091 747 68.70% : 0.041284s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.47% : 0.001483s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.83% : 0.017323s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.451634 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.28% : 0.042578s : 1: a1a2 0.00% : 0.000158s : 1: add_cache_embedding 0.00% : 0.000156s : 1: add_comm_op_reuse_tag 0.00% : 0.000728s : 1: add_recomputation 0.00% : 0.000398s : 1: assign_add_opt 0.01% : 0.001949s : 1: auto_monad 0.00% : 0.000327s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_ predicate.loop_unroll_before_grad 0.95% : 0.000197s : 1478: predicate.make_slice_get_slice_eliminator 0.58% : 0.000119s : 837: predicate.merge_addn 3.01% : 0.000623s : 3380: predicate.micro_step_allgather_replace 3.02% : 0.000626s : 3380: predicate.mini_step_allgather_replace 0.78% : 0.000161s : 1199: predicate.minmaximum_grad 0.17% : 0.000036s : 242: predicate.mutable_eliminate 0.09% : 0.000019s : 126: predicate.opt_reshape 0.12% : 0.000025s : 135: predicate.parallel_virtual_node 2.08% : 0.000430s : 1717: predicate.partial_defer_inline 1.09% : 0.000227s : 1541: predicate.partial_eliminate 0.78% : 0.000162s : 1198: predicate.print_const_string_wrapper 0.57% : 0.000117s : 824: predicate.reduce_all_const_elim 0.99% : 0.000205s : 1199: predicate.reduce_eliminate 0.14% : 0.000030s : 395: predicate.remove_not_recompute_node 1.95% : 0.000403s : 4829: predicate.replace_applicator 0.79% : 0.000163s : 2203: predicate.replace_old_param 0.05% : 0.000010s : 135: predicate.reset_defer_inline 0.81% : 0.000168s : 1199: predicate.reshape_eliminate 3.16% : 0.000654s : 3380: predicate.row_tensor_add_zeros_like 0.10% : 0.000022s : 135: predicate.row_tensor_eliminate 3.20% : 0.000662s : 3484: predicate.same_eliminate 0.24% : 0.000050s : 633: predicate.set_cell_output_no_recompute 0.31% : 0.000063s : 395: predicate.shard_identity_eliminate 2.44% : 0.000505s : 2338: predicate.special_op_eliminate 0.64% : 0.000132s : 837: predicate.specialize_transform 3.29% : 0.000681s : 3380: predicate.split_environ_get_set_with_tuple_value 1.60% : 0.000332s : 2203: predicate.stack_unstack_eliminate 1.83% : 0.000379s : 2874: predicate.stopgrad_eliminater 0.09% : 0.000019s : 135: predicate.switch_call_monad_eliminater 1.26% : 0.000260s : 1717: predicate.switch_defer_inline 4.34% : 0.000899s : 5201: predicate.switch_layer_defer_inline 4.33% : 0.000897s : 5262: predicate.switch_simplify 0.77% : 0.000159s : 1199: predicate.tile_eliminate 0.76% : 0.000157s : 1199: predicate.transpose_eliminate 1.25% : 0.000259s : 1463: predicate.tuple_list_convert_item_index_to_positive 1.04% : 0.000216s : 1469: predicate.tuple_list_get_item_const_eliminator 0.94% : 0.000196s : 1469: predicate.tuple_list_get_item_depend_reorder 1.89% : 0.000392s : 2495: predicate.tuple_list_get_item_eliminator 0.98% : 0.000202s : 1469: predicate.tuple_list_get_set_item_eliminator 1.64% : 0.000340s : 2304: predicate.tuple_list_set_item_eliminator 1.12% : 0.000232s : 1660: predicate.tuple_to_list_eliminator_ 1.90% : 0.000393s : 2874: predicate.updatestate_pure_node_eliminater 2.56% : 0.000529s : 3710: predicate.updatestate_useless_node_eliminater 0.10% : 0.000020s : 135: predicate.value_based_eliminate 0.29% : 0.000059s : 397: predicate.virtual_dataset_eliminate 0.28% : 0.000058s : 395: predicate.virtual_output_eliminate 0.10% : 0.000021s : 135: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.061078 747 68.85% : 0.042053s : 338: func_graph_cloner_run.FuncGraphClonerGraph 2.43% : 0.001483s : 22: func_graph_cloner_run.FuncGraphClonerNode 28.72% : 0.017542s : 387: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 15.574208 346 0.00% : 0.000005s : 1: ForceFp32Comm 0.27% : 0.041638s : 1: a1a2 0.00% : 0.000162s : 1: add_cache_embedding 0.00% : 0.000156s : 1: add_comm_op_reuse_tag 0.00% : 0.000746s : 1: add_recomputation 0.00% : 0.000395s : 1: assign_add_opt 0.01% : 0.002017s : 1: auto_monad 0.00% : 0.000327s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.01% : 0.001516s : 1: bootstrap 0.00% : 0.000078s : 1: cconv 0.00% : 0.000154s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000120s : 1: convert_after_rewriter 0.00% : 0.000321s : 1: cse_after_recomputation 0.00% : 0.000148s : 1: dataset_repeat_opt 0.00% : 0.000390s : 1: distribtued_split 0.01% : 0.001475s : 1: eliminate_special_op_node 0.00% : 0.000106s : 1: environ_conv 0.00% : 0.000022s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000015s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000008s : 1: handle_group_info 0.29% : 0.044114s : 1: inline 0.01% : 0.001269s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000566s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000974s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.21% : 0.033107s : 61: opt.transform.a1a2 0.00% : 0.000178s : 1: opt.transform.loop_unroll_optimizer 0.58% : 0.090094s : 148: opt.transform.opt_a 0.01% : 0.000773s : 1: opt.transform.opt_after_cconv 0.02% : 0.003190s : 27: opt.transform.opt_b 0.24% : 0.037348s : 16: opt.transform.opt_resolve 0.01% : 0.000964s : 1: opt.transform.opt_trans_graph 0.01% : 0.000905s : 6: opt.transform.special_op_eliminate 0.00% : 0.000705s : 4: opt.transform.symbol_engine_opt 3.79% : 0.585030s : 1: opt_a 0.01% : 0.001610s : 1: opt_after_cconv 0.03% : 0.003967s : 1: opt_b 3.88% : 0.599678s : 1: optimize 0.00% : 0.000148s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000086s : 1: order_py_execute_after_rewriter 0.00% : 0.000126s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000171s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000017s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000110s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000210s : 1: parallel-infer-symbol 0.00% : 0.000009s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000009s : 1: pipeline_parallel_scheduler 0.00% : 0.000111s : 1: pipeline_split 0.00% : 0.000116s : 1: pre_auto_parallel 0.00% : 0.000133s : 1: py_interpret_to_execute 0.00% : 0.000156s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000109s : 1: remove_cast_before_assign_add 0.00% : 0.000621s : 1: remove_dup_value 0.89% : 0.137491s : 3: renormalize.infer 0.35% : 0.054696s : 3: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.001001s : 1: rewriter_after_opt_a 0.01% : 0.001790s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000152s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000136s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000815s : 1: symbol_engine_optimizer 84.42comm_swap 0.01% : 0.001579s : 1: bootstrap 0.00% : 0.000076s : 1: cconv 0.00% : 0.000152s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.00% : 0.000124s : 1: convert_after_rewriter 0.00% : 0.000352s : 1: cse_after_recomputation 0.00% : 0.000108s : 1: dataset_repeat_opt 0.00% : 0.000391s : 1: distribtued_split 0.01% : 0.001450s : 1: eliminate_special_op_node 0.00% : 0.000105s : 1: environ_conv 0.00% : 0.000023s : 1: execute 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000034s : 1: graph_reusing 0.00% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000007s : 1: handle_group_info 0.28% : 0.043175s : 1: inline 0.01% : 0.001341s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000539s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.01% : 0.000891s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.21% : 0.032585s : 61: opt.transform.a1a2 0.00% : 0.000179s : 1: opt.transform.loop_unroll_optimizer 0.59% : 0.091544s : 148: opt.transform.opt_a 0.01% : 0.000791s : 1: opt.transform.opt_after_cconv 0.02% : 0.003219s : 27: opt.transform.opt_b 0.24% : 0.037925s : 16: opt.transform.opt_resolve 0.01% : 0.000908s : 1: opt.transform.opt_trans_graph 0.01% : 0.000841s : 6: opt.transform.special_op_eliminate 0.00% : 0.000706s : 4: opt.transform.symbol_engine_opt 3.85% : 0.599128s : 1: opt_a 0.01% : 0.001581s : 1: opt_after_cconv 0.03% : 0.004006s : 1: opt_b 3.94% : 0.613671s : 1: optimize 0.00% : 0.000151s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000086s : 1: order_py_execute_after_rewriter 0.00% : 0.000123s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000169s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000021s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000082s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000180s : 1: parallel-infer-symbol 0.00% : 0.000010s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000008s : 1: pipeline_parallel_scheduler 0.00% : 0.000122s : 1: pipeline_split 0.00% : 0.000110s : 1: pre_auto_parallel 0.00% : 0.000148s : 1: py_interpret_to_execute 0.00% : 0.000156s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000116s : 1: remove_cast_before_assign_add 0.00% : 0.000645s : 1: remove_dup_value 0.91% : 0.141126s : 3: renormalize.infer 0.37% : 0.057563s : 3: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.01% : 0.000985s : 1: rewriter_after_opt_a 0.01% : 0.001802s : 2: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000152s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.00% : 0.000142s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000819s : 1: symbol_engine_optimizer 84.16% : 13.043526s : 1: task_emit 0.01% : 0.000997s : 1: tuple_transform 4.87% : 0.752309s : 1: type_inference 0.01% : 0.001319s : 1: validate % : 13.107187s : 1: task_emit 0.01% : 0.000942s : 1: tuple_transform 4.99% : 0.777037s : 1: type_inference 0.01% : 0.001334s : 1: validate distribute network loadcheckpoint. distribute network loadcheckpoint. distribute network loadcheckpoint. [WARNING] ME(169260:281473108241424,MainProcess):2025-02-07-15:54:29.236.264 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: stat., continue to load. [WARNING] ME(169260:281473108241424,MainProcess):2025-02-07-15:54:29.236.977 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 7 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(169260:281473108241424,MainProcess):2025-02-07-15:54:29.237.112 [mindspore/train/serialization.py:1828] ['fc2_weight', 'fc3_weight', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. [WARNING] ME(169251:281472835476496,MainProcess):2025-02-07-15:54:29.240.379 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: accum., continue to load. [WARNING] ME(169242:281473212484624,MainProcess):2025-02-07-15:54:29.240.491 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: stat., continue to load. [WARNING] ME(169251:281472835476496,MainProcess):2025-02-07-15:54:29.241.039 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 7 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(169251:281472835476496,MainProcess):2025-02-07-15:54:29.241.181 [mindspore/train/serialization.py:1828] ['fc2_weight', 'fc3_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. [WARNING] ME(169242:281473212484624,MainProcess):2025-02-07-15:54:29.241.200 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 6 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(169242:281473212484624,MainProcess):2025-02-07-15:54:29.241.337 [mindspore/train/serialization.py:1828] ['fc2_weight', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight'] are not loaded. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:29.261.738 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:29.263.798 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:29.265.845 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. distribute network loadcheckpoint. distribute network loadcheckpoint. [WARNING] ME(169273:281473192180752,MainProcess):2025-02-07-15:54:29.274.255 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 8 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(169273:281473192180752,MainProcess):2025-02-07-15:54:29.274.632 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc3_weight', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. distribute network loadcheckpoint. [WARNING] ME(169323:281472873683984,MainProcess):2025-02-07-15:54:29.277.912 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: accum., continue to load. distribute network loadcheckpoint. [WARNING] ME(169323:281472873683984,MainProcess):2025-02-07-15:54:29.278.672 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 7 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(169323:281472873683984,MainProcess):2025-02-07-15:54:29.278.817 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc2_weight', 'accum.fc1_weight', 'accum.fc2_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. [WARNING] ME(169285:281473220353040,MainProcess):2025-02-07-15:54:29.282.148 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: accum., continue to load. [WARNING] ME(169285:281473220353040,MainProcess):2025-02-07-15:54:29.282.888 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 7 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(169285:281473220353040,MainProcess):2025-02-07-15:54:29.283.034 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc3_weight', 'accum.fc1_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. [WARNING] ME(169309:281472978885648,MainProcess):2025-02-07-15:54:29.283.427 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 8 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(169309:281472978885648,MainProcess):2025-02-07-15:54:29.283.891 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc2_weight', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc2_weight', 'stat.fc3_weight'] are not loaded. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:29.301.907 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. distribute network loadcheckpoint. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:29.307.533 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] ME(169297:281473365978128,MainProcess):2025-02-07-15:54:29.308.603 [mindspore/train/serialization.py:1930] For 'load_param_into_net', remove parameter prefix name: stat., continue to load. [WARNING] ME(169297:281473365978128,MainProcess):2025-02-07-15:54:29.309.361 [mindspore/train/serialization.py:1827] For 'load_param_into_net', 7 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. Another possibility is that the redundant loading is not enabled, but the loaded checkpoint is saved with redundancy removed. [WARNING] ME(169297:281473365978128,MainProcess):2025-02-07-15:54:29.309.490 [mindspore/train/serialization.py:1828] ['fc1_weight', 'fc3_weight', 'accum.fc1_weight', 'accum.fc2_weight', 'accum.fc3_weight', 'stat.fc1_weight', 'stat.fc3_weight'] are not loaded. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:29.312.950 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:29.315.748 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. ...[WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:29.339.204 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.0933031, [21] [bootstrap]: 0.00039994 [type_inference]: 0.0049366 [auto_monad]: 0.00015504 [graph_reusing]: 2.62994e-06 [inline]: 1.41002e-06 [parallel-infer-symbol]: 2.33995e-06 [pre_auto_parallel]: 2.90701e-05 [insert-virtual-dataset]: 2.92994e-06 [parallel-infer-symbol-second]: 3.89991e-07 [dataset_repeat_opt]: 1.75997e-06 [pipeline_split]: 1.46998e-06 [optimize]: 0.00862521, [52] [py_interpret_to_execute]: 1.851e-05 [rewriter_before_opt_a]: 3.777e-05 [opt_a]: 0.00656578, [2] [Cycle 1]: 0.00179306, [43] [expand_dump_flag]: 4.19002e-06 [switch_simplify]: 3.149e-05 [loop_unroll]: 2.477e-05 [a_1]: 0.00038215 [recompute_prepare]: 8.99995e-06 [updatestate_depend_eliminate]: 8.47003e-06 [updatestate_assign_eliminate]: 7.76001e-06 [updatestate_loads_eliminate]: 7.88004e-06 [parameter_eliminate]: 3.49991e-06 [a_2]: 0.00012581 [accelerated_algorithm]: 8.86002e-06 [shard]: 2.10002e-06 [meta_shard_fg_expand]: 4.05009e-06 [shard_inline]: 1.007e-05 [auto_parallel]: 1.50501e-05 [parallel]: 9.47004e-06 [flash_sp]: 1.273e-05 [merge_comm]: 1.024e-05 [allreduce_fusion]: 7.33999e-06 [matmul_add_comm_reduction]: 1.405e-05 [allreduce_slice_to_reducescatter]: 4.29922e-07 [virtual_shard_identity]: 1.08001e-05 [virtual_dataset]: 9.01008e-06 [get_grad_eliminate_]: 9.29006e-06 [virtual_output]: 1.444e-05 [merge_forward]: 8.46002e-06 [cell_reuse_recompute_pass]: 2.09e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.02899e-05 [before_grad]: 2.202e-05 [inplace_validation]: 5.57001e-06 [meta_fg_expand]: 5.87001e-06 [inplace_validation_after_expand]: 7.85e-06 [flash_sp_send_recv_attached]: 3.03995e-06 [receive_attached]: 5.11995e-06 [after_resolve]: 1.544e-05 [a_after_grad]: 1.886e-05 [special_op_eliminate]: 1.218e-05 [renormalize]: 0.00054439 [add_forward_monad_depend]: 3.80003e-06 [auto_monad_grad]: 1.95997e-06 [auto_monad_eliminator]: 3.329e-05 [cse]: 3.43201e-05 [a_3]: 5.932e-05 [Cycle 2]: 0.00080944, [43] [expand_dump_flag]: 1.36008e-06 [switch_simplify]: 1.14901e-05 [loop_unroll]: 7.67002e-06 [a_1]: 0.00020926 [recompute_prepare]: 7.57002e-06 [updatestate_depend_eliminate]: 6.09003e-06 [updatestate_assign_eliminate]: 5.10993e-06 [updatestate_loads_eliminate]: 5.57001e-06 [parameter_eliminate]: 1.34995e-06 [a_2]: 0.0001069 [accelerated_algorithm]: 8.26991e-06 [shard]: 1.20001e-06 [meta_shard_fg_expand]: 2.59001e-06 [shard_inline]: 8.17003e-06 [auto_parallel]: 1.12499e-05 [parallel]: 3.7899e-06 [flash_sp]: 3.25998e-06 [merge_comm]: 5.87001e-06 [allreduce_fusion]: 4.67e-06 [matmul_add_comm_reduction]: 1.02901e-05 [allreduce_slice_to_reducescatter]: 2.49944e-07 [virtual_shard_identity]: 9.09995e-06 [virtual_dataset]: 7.42998e-06 [get_grad_eliminate_]: 7.02997e-06 [virtual_output]: 7.33999e-06 [merge_forward]: 4.57e-06 [cell_reuse_recompute_pass]: 2.00002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59499e-05 [before_grad]: 1.263e-05 [inplace_validation]: 4.25999e-06 [meta_fg_expand]: 4.71005e-06 [inplace_validation_after_expand]: 5.70994e-06 [flash_sp_send_recv_attached]: 9.29926e-07 [receive_attached]: 7.10017e-07 [after_resolve]: 9.9499e-06 [a_after_grad]: 1.211e-05 [special_op_eliminate]: 7.29994e-06 [renormalize]: 7.0082e-08 [add_forward_monad_depend]: 1.03994e-06 [auto_monad_grad]: 1.34995e-06 [auto_monad_eliminator]: 1.886e-05 [cse]: 2.37101e-05 [a_3]: 4.98299e-05 [py_interpret_to_execute_after_opt_a]: 1.21901e-05 [slice_cell_reuse_recomputed_activation]: 2.41003e-06 [rewriter_after_opt_a]: 0.00013641 [convert_after_rewriter]: 8.26002e-06 [order_py_execute_after_rewriter]: 5.64998e-06 [opt_b]: 0.00026398, [1] [Cycle 1]: 0.00025834, [7] [b_1]: 0.00016748 [b_2]: 1.15799e-05 [updatestate_depend_eliminate]: 5.39003e-06 [updatestate_assign_eliminate]: 6.47001e-06 [updatestate_loads_eliminate]: 6.49004e-06 [renormalize]: 2.30037e-07 [cse]: 2.441e-05 [optimize_parallel_all_gather_comm]: 1.069e-05 [overlap_param_gather]: 3.33006e-06 [cconv]: 2.49399e-05 [loop_unroll]: 0.00052774 [opt_after_cconv]: 0.000148, [1] [Cycle 1]: 0.00014162, [7] [c_1]: 5.915e-05 [parameter_eliminate]: 2.52994e-06 [updatestate_depend_eliminate]: 8.41008e-06 [updatestate_assign_eliminate]: 6.27991e-06 [updatestate_loads_eliminate]: 5.77001e-06 [cse]: 2.45899e-05 [renormalize]: 3.60073e-07 [remove_dup_value]: 1.378e-05 [tuple_transform]: 7.409e-05, [1] [Cycle 1]: 6.95901e-05, [2] [d_1]: 5.801e-05 [renormalize]: 1.79978e-07 [partial_unused_args_eliminate]: 2.06998e-06 [add_cache_embedding]: 1.383e-05 [add_recomputation]: 7.41701e-05 [cse_after_recomputation]: 3.12501e-05, [1] [Cycle 1]: 2.605e-05, [1] [cse]: 2.074e-05 [environ_conv]: 8.32998e-06 [swap_dp_allreduce_reducescatter]: 1.054e-05 [bias_add_comm_swap]: 2.07999e-06 [label_micro_interleaved_index]: 1.77999e-06 [label_fine_grained_interleaved_index]: 2.36998e-06 [merge_cast_opt]: 9.89996e-07 [slice_recompute_activation]: 1.75997e-06 [micro_interleaved_order_control]: 1.75997e-06 [assign_add_opt]: 3.195e-05 [ForceFp32Comm]: 9.2003e-07 [remove_cast_before_assign_add]: 9.36002e-06 [full_micro_interleaved_order_control]: 2.14006e-06 [reorder_send_recv_between_fp_bp]: 2.04996e-06 [comm_op_add_attrs]: 4.16801e-05 [add_comm_op_reuse_tag]: 2.85998e-06 [interleave_split_concat_branches]: 7.59959e-07 [interleave_parallel_branches]: 8.69972e-07 [overlap_opt_shard_in_pipeline]: 2.32994e-06 [overlap_opt_shard_grad_in_pipeline]: 2.11003e-06 [control_data_broadcast_order]: 1.04995e-06 [grouped_pairwise_exchange_alltoall]: 9.32999e-06 [offloading_packed_experts]: 2.10002e-06 [overlap_recompute_and_grad_model_parallel]: 2.34006e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.30041e-07 [overlap_recompute_allgather_and_fa_grad]: 8.267e-05 [overlap_grad_ring_attention]: 2.36009e-06 [overlap_grad_flash_sp]: 1.72399e-05 [begin_end_overlap_inline]: 7.59959e-07 [split_matmul_comm_elemetwise]: 1.91003e-06 [split_layernorm_comm]: 1.62004e-06 [handle_group_info]: 6.82997e-06 [symbol_engine_optimizer]: 0.0001129, [1] [Cycle 1]: 0.00010614, [6] [build]: 4.71005e-06 [elim_shapecalc]: 1.568e-05 [elim_not_effective]: 2.312e-05 [opt_reshape]: 1.061e-05 [fold_const_symbol]: 1.839e-05 [renormalize]: 2.40048e-07 [pipeline_parallel_scheduler]: 1.39e-06 [auto_monad_reorder]: 3.315e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 4.10015e-07 [eliminate_special_op_node]: 0.00053159 [distribtued_split]: 4.22e-05 [validate]: 3.783e-05 [task_emit]: 0.0782121 [execute]: 1.12001e-05 Sums bootstrap : 0.000400s : 0.45% type_inference : 0.004937s : 5.60% auto_monad : 0.000155s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000029s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.02% optimize.rewriter_before_opt_a : 0.000038s : 0.04% optimize.opt_a.expand_dump_flag : 0.000006s : 0.01% optimize.opt_a.switch_simplify : 0.000043s : 0.05% optimize.opt_a.loop_unroll : 0.000032s : 0.04% optimize.opt_a.a_1 : 0.000591s : 0.67% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000233s : 0.26% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000026s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000024s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000022s : 0.02% optimize.opt_a.merge_forward : 0.000013s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000036s : 0.04% optimize.opt_a.before_grad : 0.000035s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.01% optimize.opt_a.after_resolve : 0.000025s : 0.03% optimize.opt_a.a_after_grad : 0.000031s : 0.04% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000544s : 0.62% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.06% optimize.opt_a.cse : 0.000058s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.12% optimize.py_interpret_to_execute_after_opt_a : 0.000012s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000136s : 0.15% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000167s : 0.19% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000024s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000011s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000528s : 0.60% optimize.opt_after_cconv.c_1 : 0.000059s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000025s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000058s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000074s : 0.08% optimize.cse_after_recomputation.cse : 0.000021s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000011s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000032s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000009s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000042s : 0.05% optimize.add_comm_op_reuse_tag : 0.000003s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000083s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000016s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000023s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000018s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000532s : 0.60% distribtued_split : 0.000042s : 0.05% validate : 0.000038s : 0.04% task_emit : 0.078212s : 88.67% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000154 63 4.26% : 0.000007s : 2: substitution.depend_value_elim 2.57% : 0.000004s : 5: substitution.elim_not_effective 2.31% : 0.000004s : 5: substitution.fold_const_symbol 5.90% : 0.000009s : 6: substitution.graph_param_transform 51.55% : 0.000080s : 1: substitution.inline 3.69% : 0.000006s : 10: substitution.j_node_and_user_rematch 2.81% : 0.000004s : 6: substitution.load_eliminater 2.18% : 0.000003s : 2: substitution.reduce_all_const_elim 7.05% : 0.000011s : 10: substitution.remove_not_recompute_node 2.09% : 0.000003s : 2: substitution.replace_old_param 7.53% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.06% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.004904 2 92.81% : 0.004551s : 1: type_inference.infer 7.19% : 0.000353s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000078 1 100.00% : 0.000078s : 1: match.inline ------[predicate.] 0.000234 1420 0.84% : 0.000002s : 13: predicate.accumulaten_eliminater 1.15% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.83% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.13% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.51% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.85% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.67% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000005s : 31: predicate.environ_get_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.75% : 0.000013s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.11% : 0.000003s : 12: predicate.less_batch_normalization 1.74% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.46% : 0.000006s : 38: predicate.load_eliminater 1.30% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.33% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.74% : 0.000002s : 6: predicate.mutable_eliminate 0.50% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.24% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.79% : 0.000002s : 12: predicate.reduce_all_const_elim 1.04% : 0.000002s : 13: predicate.reduce_eliminate 0.66% : 0.000002s : 12: predicate.remove_not_recompute_node 1.05% : 0.000002s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.95% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.47% : 0.000003s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 1.12% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.09% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.28% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.44% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.73% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.62% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.42% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.62% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.36% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.62% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000220 4 7.45% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.55% : 0.000204s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.103972 192 0.01% : 0.000006s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000006s : 1: add_comm_op_reuse_tag 0.08% : 0.000079s : 1: add_recomputation 0.03% : 0.000036s : 1: assign_add_opt 0.16% : 0.000169s : 1: auto_monad 0.04% : 0.000042s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.42% : 0.000438s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.05% : 0.000047s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000036s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.53% : 0.000546s : 1: eliminate_special_op_node 0.01% : 0.000013s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000010s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000007s : 1: label_micro_interleaved_index 0.52% : 0.000537s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000007s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.16% : 0.001210s : 80: opt.transform.opt_a 0.06% : 0.000058s : 1: opt.transform.opt_after_cconv 0.15% : 0.000159s : 27: opt.transform.opt_b 0.05% : 0.000057s : 1: opt.transform.opt_trans_graph 0.03% : 0.000034s : 3: opt.transform.special_op_eliminate 0.06% : 0.000062s : 4: opt.transform.symbol_engine_opt 6.32% : 0.006570s : 1: opt_a 0.15% : 0.000154s : 1: opt_after_cconv 0.26% : 0.000267s : 1: opt_b 8.30% : 0.008634s : 1: optimize 0.01% : 0.000015s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000022s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000007s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000008s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000007s : 1: overlap_param_gather 0.08% : 0.000088s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000006s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000035s : 1: pre_auto_parallel 0.02% : 0.000023s : 1: py_interpret_to_execute 0.02% : 0.000017s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000013s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.27% : 0.000285s : 1: renormalize.infer 0.24% : 0.000253s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000142s : 1: rewriter_after_opt_a 0.04% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000014s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000117s : 1: symbol_engine_optimizer 75.25% : 0.078240s : 1: task_emit 0.08% : 0.000080s : 1: tuple_transform 4.77% : 0.004956s : 1: type_inference 0.08% : 0.000079s : 1: validate TotalTime = 0.0913863, [21] [bootstrap]: 0.00037911 [type_inference]: 0.00462792 [auto_monad]: 0.00014195 [graph_reusing]: 1.97999e-06 [inline]: 1.26008e-06 [parallel-infer-symbol]: 1.99e-06 [pre_auto_parallel]: 2.99399e-05 [insert-virtual-dataset]: 1.95997e-06 [parallel-infer-symbol-second]: 4.49945e-07 [dataset_repeat_opt]: 9.39937e-07 [pipeline_split]: 1.07009e-06 [optimize]: 0.00836104, [52] [py_interpret_to_execute]: 1.944e-05 [rewriter_before_opt_a]: 3.407e-05 [opt_a]: 0.00643729, [2] [Cycle 1]: 0.00180791, [43] [expand_dump_flag]: 2.36998e-06 [switch_simplify]: 2.764e-05 [loop_unroll]: 2.11999e-05 [a_1]: 0.00036218 [recompute_prepare]: 8.74e-06 [updatestate_depend_eliminate]: 8.55001e-06 [updatestate_assign_eliminate]: 6.94999e-06 [updatestate_loads_eliminate]: 5.70994e-06 [parameter_eliminate]: 2.92005e-06 [a_2]: 0.00012956 [accelerated_algorithm]: 1.08699e-05 [shard]: 2.16998e-06 [meta_shard_fg_expand]: 3.52005e-06 [shard_inline]: 1.052e-05 [auto_parallel]: 1.46499e-05 [parallel]: 7.8599e-06 [flash_sp]: 9.92999e-06 [merge_comm]: 8.30006e-06 [allreduce_fusion]: 6.4699e-06 [matmul_add_comm_reduction]: 9.66003e-06 [allreduce_slice_to_reducescatter]: 4.59957e-07 [virtual_shard_identity]: 1.131e-05 [virtual_dataset]: 8.64989e-06 [get_grad_eliminate_]: 8.75001e-06 [virtual_output]: 1.431e-05 [merge_forward]: 6.68003e-06 [cell_reuse_recompute_pass]: 1.54006e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.108e-05 [before_grad]: 1.96899e-05 [inplace_validation]: 5.21995e-06 [meta_fg_expand]: 5.39992e-06 [inplace_validation_after_expand]: 6.61996e-06 [flash_sp_send_recv_attached]: 2.17999e-06 [receive_attached]: 3.0899e-06 [after_resolve]: 1.332e-05 [a_after_grad]: 1.879e-05 [special_op_eliminate]: 1.09499e-05 [renormalize]: 0.00062939 [add_forward_monad_depend]: 2.73006e-06 [auto_monad_grad]: 2.00991e-06 [auto_monad_eliminator]: 2.602e-05 [cse]: 2.783e-05 [a_3]: 5.849e-05 [Cycle 2]: 0.00081512, [43] [expand_dump_flag]: 8.70088e-07 [switch_simplify]: 9.60007e-06 [loop_unroll]: 7.58003e-06 [a_1]: 0.00020621 [recompute_prepare]: 7.68993e-06 [updatestate_depend_eliminate]: 6.01995e-06 [updatestate_assign_eliminate]: 5.24998e-06 [updatestate_loads_eliminate]: 5.19992e-06 [parameter_eliminate]: 1.17999e-06 [a_2]: 0.00010846 [accelerated_algorithm]: 8.32998e-06 [shard]: 1.19e-06 [meta_shard_fg_expand]: 2.49001e-06 [shard_inline]: 7.91997e-06 [auto_parallel]: 1.022e-05 [parallel]: 2.93995e-06 [flash_sp]: 2.64996e-06 [merge_comm]: 5.70994e-06 [allreduce_fusion]: 4.79002e-06 [matmul_add_comm_reduction]: 7.22997e-06 [allreduce_slice_to_reducescatter]: 2.39932e-07 [virtual_shard_identity]: 9.66003e-06 [virtual_dataset]: 7.77002e-06 [get_grad_eliminate_]: 7.30995e-06 [virtual_output]: 7.67002e-06 [merge_forward]: 4.35009e-06 [cell_reuse_recompute_pass]: 1.65997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.727e-05 [before_grad]: 1.291e-05 [inplace_validation]: 4.09991e-06 [meta_fg_expand]: 4.89003e-06 [inplace_validation_after_expand]: 5.57001e-06 [flash_sp_send_recv_attached]: 9.00007e-07 [receive_attached]: 6.70087e-07 [after_resolve]: 1.00901e-05 [a_after_grad]: 1.163e-05 [special_op_eliminate]: 7.46001e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 9.89996e-07 [auto_monad_grad]: 1.00001e-06 [auto_monad_eliminator]: 1.857e-05 [cse]: 2.262e-05 [a_3]: 5.26101e-05 [py_interpret_to_execute_after_opt_a]: 1.12699e-05 [slice_cell_reuse_recomputed_activation]: 1.73005e-06 [rewriter_after_opt_a]: 0.00012808 [convert_after_rewriter]: 1.051e-05 [order_py_execute_after_rewriter]: 5.62996e-06 [opt_b]: 0.00026077, [1] [Cycle 1]: 0.00025548, [7] [b_1]: 0.00017186 [b_2]: 1.212e-05 [updatestate_depend_eliminate]: 5.33997e-06 [updatestate_assign_eliminate]: 5.78002e-06 [updatestate_loads_eliminate]: 5.33997e-06 [renormalize]: 2.40048e-07 [cse]: 2.033e-05 [optimize_parallel_all_gather_comm]: 7.89005e-06 [overlap_param_gather]: 3.24007e-06 [cconv]: 1.699e-05 [loop_unroll]: 0.00049683 [opt_after_cconv]: 0.00013854, [1] [Cycle 1]: 0.00013316, [7] [c_1]: 5.515e-05 [parameter_eliminate]: 1.89e-06 [updatestate_depend_eliminate]: 8.89995e-06 [updatestate_assign_eliminate]: 5.25999e-06 [updatestate_loads_eliminate]: 5.34998e-06 [cse]: 2.272e-05 [renormalize]: 3.69968e-07 [remove_dup_value]: 1.085e-05 [tuple_transform]: 7.512e-05, [1] [Cycle 1]: 7.091e-05, [2] [d_1]: 5.85699e-05 [renormalize]: 1.70083e-07 [partial_unused_args_eliminate]: 1.56998e-06 [add_cache_embedding]: 1.30499e-05 [add_recomputation]: 6.001e-05 [cse_after_recomputation]: 3.07e-05, [1] [Cycle 1]: 2.532e-05, [1] [cse]: 1.999e-05 [environ_conv]: 7.59005e-06 [swap_dp_allreduce_reducescatter]: 8.88004e-06 [bias_add_comm_swap]: 1.55007e-06 [label_micro_interleaved_index]: 1.30991e-06 [label_fine_grained_interleaved_index]: 1.89e-06 [merge_cast_opt]: 7.29924e-07 [slice_recompute_activation]: 1.16997e-06 [micro_interleaved_order_control]: 1.63005e-06 [assign_add_opt]: 2.825e-05 [ForceFp32Comm]: 6.59958e-07 [remove_cast_before_assign_add]: 7.73999e-06 [full_micro_interleaved_order_control]: 1.34995e-06 [reorder_send_recv_between_fp_bp]: 1.17999e-06 [comm_op_add_attrs]: 3.5e-05 [add_comm_op_reuse_tag]: 1.59e-06 [interleave_split_concat_branches]: 5.80098e-07 [interleave_parallel_branches]: 5.39934e-07 [overlap_opt_shard_in_pipeline]: 1.76998e-06 [overlap_opt_shard_grad_in_pipeline]: 1.61002e-06 [control_data_broadcast_order]: 7.10017e-07 [grouped_pairwise_exchange_alltoall]: 7.63999e-06 [offloading_packed_experts]: 1.89e-06 [overlap_recompute_and_grad_model_parallel]: 1.53005e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.89993e-07 [overlap_recompute_allgather_and_fa_grad]: 7.26801e-05 [overlap_grad_ring_attention]: 1.63005e-06 [overlap_grad_flash_sp]: 1.44e-05 [begin_end_overlap_inline]: 5.79981e-07 [split_matmul_comm_elemetwise]: 1.55997e-06 [split_layernorm_comm]: 1.11002e-06 [handle_group_info]: 4.85999e-06 [symbol_engine_optimizer]: 0.00010971, [1] [Cycle 1]: 0.00010413, [6] [build]: 4.48991e-06 [elim_shapecalc]: 1.73299e-05 [elim_not_effective]: 2.13301e-05 [opt_reshape]: 1.031e-05 [fold_const_symbol]: 1.668e-05 [renormalize]: 3.10014e-07 [pipeline_parallel_scheduler]: 1.05007e-06 [auto_monad_reorder]: 2.654e-05 [get_jit_bprop_graph]: 3.60073e-07 [rewriter_after_jit_bprop_graph]: 5.19911e-07 [eliminate_special_op_node]: 0.0005178 [distribtued_split]: 5.44901e-05 [validate]: 3.34e-05 [task_emit]: 0.0769381 [execute]: 9.17003e-06 Sums bootstrap : 0.000379s : 0.44% type_inference : 0.004628s : 5.35% auto_monad : 0.000142s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000030s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000037s : 0.04% optimize.opt_a.loop_unroll : 0.000029s : 0.03% optimize.opt_a.a_1 : 0.000568s : 0.66% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000238s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000019s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.01% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000021s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000022s : 0.03% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000038s : 0.04% optimize.opt_a.before_grad : 0.000033s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000023s : 0.03% optimize.opt_a.a_after_grad : 0.000030s : 0.04% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000629s : 0.73% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000045s : 0.05% optimize.opt_a.cse : 0.000050s : 0.06% optimize.opt_a.a_3 : 0.000111s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000011s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000128s : 0.15% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000172s : 0.20% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000497s : 0.57% optimize.opt_after_cconv.c_1 : 0.000055s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000059s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000060s : 0.07% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000008s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000035s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000073s : 0.08% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000017s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000021s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000518s : 0.60% distribtued_split : 0.000054s : 0.06% validate : 0.000033s : 0.04% task_emit : 0.076938s : 88.99% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000130 63 4.32% : 0.000006s : 2: substitution.depend_value_elim 2.11% : 0.000003s : 5: substitution.elim_not_effective 1.80% : 0.000002s : 5: substitution.fold_const_symbol 4.98% : 0.000006s : 6: substitution.graph_param_transform 49.55% : 0.000065s : 1: substitution.inline 4.62% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.09% : 0.000004s : 6: substitution.load_eliminater 2.17% : 0.000003s : 2: substitution.reduce_all_const_elim 8.20% : 0.000011s : 10: substitution.remove_not_recompute_node 2.27% : 0.000003s : 2: substitution.replace_old_param 8.11% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.77% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.004600 2 93.71% : 0.004311s : 1: type_inference.infer 6.29% : 0.000289s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000234 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 1.01% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.10% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.30% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.95% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.99% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.29% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.98% : 0.000005s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.30% : 0.000001s : 6: predicate.graph_param_transform 0.74% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.50% : 0.000013s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.11% : 0.000003s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.52% : 0.000006s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.27% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.88% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.80% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.67% : 0.000002s : 6: predicate.mutable_eliminate 0.52% : 0.000001s : 6: predicate.opt_reshape 0.57% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.80% : 0.000002s : 12: predicate.reduce_all_const_elim 1.03% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.09% : 0.000003s : 25: predicate.replace_applicator 0.42% : 0.000001s : 12: predicate.replace_old_param 0.29% : 0.000001s : 6: predicate.reset_defer_inline 0.80% : 0.000002s : 13: predicate.reshape_eliminate 0.76% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.62% : 0.000004s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 1.08% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.54% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.23% : 0.000010s : 43: predicate.switch_simplify 0.92% : 0.000002s : 13: predicate.tile_eliminate 0.85% : 0.000002s : 13: predicate.transpose_eliminate 1.74% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.74% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.55% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.91% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000180 4 6.41% : 0.000012s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.59% : 0.000169s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.101849 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000006s : 1: add_comm_op_reuse_tag 0.06% : 0.000065s : 1: add_recomputation 0.03% : 0.000032s : 1: assign_add_opt 0.15% : 0.000154s : 1: auto_monad 0.03% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.41% : 0.000413s : 1: bootstrap 0.02% : 0.000021s : 1: cconv 0.04% : 0.000042s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000034s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.06% : 0.000063s : 1: distribtued_split 0.52% : 0.000531s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000009s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.50% : 0.000507s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.01% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.16% : 0.001182s : 80: opt.transform.opt_a 0.05% : 0.000054s : 1: opt.transform.opt_after_cconv 0.16% : 0.000161s : 27: opt.transform.opt_b 0.06% : 0.000057s : 1: opt.transform.opt_trans_graph 0.04% : 0.000037s : 3: opt.transform.special_op_eliminate 0.06% : 0.000060s : 4: opt.transform.symbol_engine_opt 6.32% : 0.006441s : 1: opt_a 0.14% : 0.000142s : 1: opt_after_cconv 0.26% : 0.000264s : 1: opt_b 8.22% : 0.008371s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000007s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000007s : 1: overlap_param_gather 0.08% : 0.000078s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000036s : 1: pre_auto_parallel 0.02% : 0.000024s : 1: py_interpret_to_execute 0.02% : 0.000016s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.38% : 0.000383s : 1: renormalize.infer 0.24% : 0.000241s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.13% : 0.000134s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000113s : 1: symbol_engine_optimizer 75.57% : 0.076964s : 1: task_emit 0.08% : 0.000079s : 1: tuple_transform 4.56% : 0.004645s : 1: type_inference 0.07% : 0.000069s : 1: validate TotalTime = 0.0918199, [21] [bootstrap]: 0.00041909 [type_inference]: 0.00489752 [auto_monad]: 0.00017992 [graph_reusing]: 2.39001e-06 [inline]: 1.24006e-06 [parallel-infer-symbol]: 2.43005e-06 [pre_auto_parallel]: 3.266e-05 [insert-virtual-dataset]: 3.03006e-06 [parallel-infer-symbol-second]: 4.60073e-07 [dataset_repeat_opt]: 1.03994e-06 [pipeline_split]: 1.83005e-06 [optimize]: 0.00856988, [52] [py_interpret_to_execute]: 2.79699e-05 [rewriter_before_opt_a]: 3.821e-05 [opt_a]: 0.00651684, [2] [Cycle 1]: 0.00183427, [43] [expand_dump_flag]: 3.86999e-06 [switch_simplify]: 2.943e-05 [loop_unroll]: 2.175e-05 [a_1]: 0.00038189 [recompute_prepare]: 9.12999e-06 [updatestate_depend_eliminate]: 8.62009e-06 [updatestate_assign_eliminate]: 7.26001e-06 [updatestate_loads_eliminate]: 8.95001e-06 [parameter_eliminate]: 3.51004e-06 [a_2]: 0.00013025 [accelerated_algorithm]: 1.051e-05 [shard]: 2.55997e-06 [meta_shard_fg_expand]: 4.20993e-06 [shard_inline]: 9.10007e-06 [auto_parallel]: 1.437e-05 [parallel]: 8.36002e-06 [flash_sp]: 1.243e-05 [merge_comm]: 1.03599e-05 [allreduce_fusion]: 7.09004e-06 [matmul_add_comm_reduction]: 1.16799e-05 [allreduce_slice_to_reducescatter]: 9.39937e-07 [virtual_shard_identity]: 1.076e-05 [virtual_dataset]: 9.71009e-06 [get_grad_eliminate_]: 9.42999e-06 [virtual_output]: 1.41701e-05 [merge_forward]: 8.31997e-06 [cell_reuse_recompute_pass]: 2.48e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.07401e-05 [before_grad]: 2.186e-05 [inplace_validation]: 6.40005e-06 [meta_fg_expand]: 5.74999e-06 [inplace_validation_after_expand]: 6.40005e-06 [flash_sp_send_recv_attached]: 2.86999e-06 [receive_attached]: 4.58001e-06 [after_resolve]: 1.513e-05 [a_after_grad]: 2.058e-05 [special_op_eliminate]: 1.011e-05 [renormalize]: 0.00059609 [add_forward_monad_depend]: 3.78001e-06 [auto_monad_grad]: 1.94006e-06 [auto_monad_eliminator]: 3.366e-05 [cse]: 3.62101e-05 [a_3]: 5.959e-05 [Cycle 2]: 0.00081172, [43] [expand_dump_flag]: 1.04995e-06 [switch_simplify]: 9.40997e-06 [loop_unroll]: 7.93999e-06 [a_1]: 0.00020956 [recompute_prepare]: 7.6599e-06 [updatestate_depend_eliminate]: 6.01995e-06 [updatestate_assign_eliminate]: 5.12996e-06 [updatestate_loads_eliminate]: 5.33997e-06 [parameter_eliminate]: 1.64006e-06 [a_2]: 0.00010772 [accelerated_algorithm]: 8.58994e-06 [shard]: 1.21002e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 8.07003e-06 [auto_parallel]: 1.107e-05 [parallel]: 3.95009e-06 [flash_sp]: 4.04997e-06 [merge_comm]: 6.16e-06 [allreduce_fusion]: 4.69002e-06 [matmul_add_comm_reduction]: 7.91997e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 9.39996e-06 [virtual_dataset]: 7.53009e-06 [get_grad_eliminate_]: 7.43999e-06 [virtual_output]: 7.71997e-06 [merge_forward]: 4.53007e-06 [cell_reuse_recompute_pass]: 2.01003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.583e-05 [before_grad]: 1.234e-05 [inplace_validation]: 4.64998e-06 [meta_fg_expand]: 4.76011e-06 [inplace_validation_after_expand]: 5.47001e-06 [flash_sp_send_recv_attached]: 8.90112e-07 [receive_attached]: 7.50064e-07 [after_resolve]: 1.056e-05 [a_after_grad]: 1.214e-05 [special_op_eliminate]: 7.22997e-06 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 1.20001e-06 [auto_monad_grad]: 1.17009e-06 [auto_monad_eliminator]: 2.086e-05 [cse]: 2.5e-05 [a_3]: 5.048e-05 [py_interpret_to_execute_after_opt_a]: 1.11101e-05 [slice_cell_reuse_recomputed_activation]: 2.40002e-06 [rewriter_after_opt_a]: 0.00014625 [convert_after_rewriter]: 1.103e-05 [order_py_execute_after_rewriter]: 6.57002e-06 [opt_b]: 0.00026319, [1] [Cycle 1]: 0.00025791, [7] [b_1]: 0.00017278 [b_2]: 1.30599e-05 [updatestate_depend_eliminate]: 5.61005e-06 [updatestate_assign_eliminate]: 4.88001e-06 [updatestate_loads_eliminate]: 6.60005e-06 [renormalize]: 3.00002e-07 [cse]: 2.281e-05 [optimize_parallel_all_gather_comm]: 1.086e-05 [overlap_param_gather]: 2.32994e-06 [cconv]: 2.51801e-05 [loop_unroll]: 0.00051162 [opt_after_cconv]: 0.00014634, [1] [Cycle 1]: 0.00013962, [7] [c_1]: 5.67799e-05 [parameter_eliminate]: 2.75008e-06 [updatestate_depend_eliminate]: 9.75002e-06 [updatestate_assign_eliminate]: 4.99003e-06 [updatestate_loads_eliminate]: 6.43998e-06 [cse]: 2.55101e-05 [renormalize]: 4.20026e-07 [remove_dup_value]: 1.401e-05 [tuple_transform]: 7.6e-05, [1] [Cycle 1]: 7.13799e-05, [2] [d_1]: 6.114e-05 [renormalize]: 2.19909e-07 [partial_unused_args_eliminate]: 2.46998e-06 [add_cache_embedding]: 1.409e-05 [add_recomputation]: 7.174e-05 [cse_after_recomputation]: 3.08e-05, [1] [Cycle 1]: 2.52801e-05, [1] [cse]: 2.00999e-05 [environ_conv]: 8.48004e-06 [swap_dp_allreduce_reducescatter]: 8.17992e-06 [bias_add_comm_swap]: 2.54996e-06 [label_micro_interleaved_index]: 2.34996e-06 [label_fine_grained_interleaved_index]: 1.94996e-06 [merge_cast_opt]: 1.07998e-06 [slice_recompute_activation]: 1.76998e-06 [micro_interleaved_order_control]: 2.14006e-06 [assign_add_opt]: 3.161e-05 [ForceFp32Comm]: 8.00006e-07 [remove_cast_before_assign_add]: 9.41998e-06 [full_micro_interleaved_order_control]: 2.35008e-06 [reorder_send_recv_between_fp_bp]: 2.06998e-06 [comm_op_add_attrs]: 4.256e-05 [add_comm_op_reuse_tag]: 2.76999e-06 [interleave_split_concat_branches]: 9.79984e-07 [interleave_parallel_branches]: 8.30041e-07 [overlap_opt_shard_in_pipeline]: 1.90001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.62004e-06 [control_data_broadcast_order]: 1.2801e-06 [grouped_pairwise_exchange_alltoall]: 1.061e-05 [offloading_packed_experts]: 2.26998e-06 [overlap_recompute_and_grad_model_parallel]: 2.02004e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.90111e-07 [overlap_recompute_allgather_and_fa_grad]: 7.501e-05 [overlap_grad_ring_attention]: 2.13005e-06 [overlap_grad_flash_sp]: 1.707e-05 [begin_end_overlap_inline]: 8.40053e-07 [split_matmul_comm_elemetwise]: 1.95007e-06 [split_layernorm_comm]: 2.16998e-06 [handle_group_info]: 6.37001e-06 [symbol_engine_optimizer]: 0.0001071, [1] [Cycle 1]: 0.00010165, [6] [build]: 5.51995e-06 [elim_shapecalc]: 1.588e-05 [elim_not_effective]: 2.311e-05 [opt_reshape]: 9.92999e-06 [fold_const_symbol]: 1.716e-05 [renormalize]: 4.7998e-07 [pipeline_parallel_scheduler]: 1.42003e-06 [auto_monad_reorder]: 3.24299e-05 [get_jit_bprop_graph]: 4.30038e-07 [rewriter_after_jit_bprop_graph]: 4.7998e-07 [eliminate_special_op_node]: 0.00052936 [distribtued_split]: 4.02699e-05 [validate]: 3.88999e-05 [task_emit]: 0.0767869 [execute]: 1.033e-05 Sums bootstrap : 0.000419s : 0.48% type_inference : 0.004898s : 5.64% auto_monad : 0.000180s : 0.21% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000033s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000028s : 0.03% optimize.rewriter_before_opt_a : 0.000038s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.04% optimize.opt_a.loop_unroll : 0.000030s : 0.03% optimize.opt_a.a_1 : 0.000591s : 0.68% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000238s : 0.27% optimize.opt_a.accelerated_algorithm : 0.000019s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000017s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000017s : 0.02% optimize.opt_a.virtual_output : 0.000022s : 0.03% optimize.opt_a.merge_forward : 0.000013s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000037s : 0.04% optimize.opt_a.before_grad : 0.000034s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.01% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000033s : 0.04% optimize.opt_a.special_op_eliminate : 0.000017s : 0.02% optimize.opt_a.renormalize : 0.000596s : 0.69% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000055s : 0.06% optimize.opt_a.cse : 0.000061s : 0.07% optimize.opt_a.a_3 : 0.000110s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000011s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.17% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000173s : 0.20% optimize.opt_b.b_2 : 0.000013s : 0.02% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000007s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000023s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000011s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000512s : 0.59% optimize.opt_after_cconv.c_1 : 0.000057s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000010s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000026s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000061s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000072s : 0.08% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000032s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000009s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000043s : 0.05% optimize.add_comm_op_reuse_tag : 0.000003s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000075s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.01% optimize.symbol_engine_optimizer.build : 0.000006s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000016s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000023s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000529s : 0.61% distribtued_split : 0.000040s : 0.05% validate : 0.000039s : 0.04% task_emit : 0.076787s : 88.42% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000147 63 4.63% : 0.000007s : 2: substitution.depend_value_elim 2.42% : 0.000004s : 5: substitution.elim_not_effective 2.42% : 0.000004s : 5: substitution.fold_const_symbol 5.59% : 0.000008s : 6: substitution.graph_param_transform 50.03% : 0.000074s : 1: substitution.inline 4.19% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.01% : 0.000004s : 6: substitution.load_eliminater 2.72% : 0.000004s : 2: substitution.reduce_all_const_elim 6.95% : 0.000010s : 10: substitution.remove_not_recompute_node 2.27% : 0.000003s : 2: substitution.replace_old_param 7.57% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.20% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.004864 2 93.19% : 0.004533s : 1: type_inference.infer 6.81% : 0.000331s : 1: type_inference.specialize ------[replace.] 0.000014 1 100.00% : 0.000014s : 1: replace.inline ------[match.] 0.000072 1 100.00% : 0.000072s : 1: match.inline ------[predicate.] 0.000234 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.13% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.29% : 0.000005s : 25: predicate.arithmetic_simplify 0.91% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.27% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.95% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.92% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_depend_swap 1.92% : 0.000004s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.35% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 6: predicate.fold_const_symbol 0.84% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.77% : 0.000014s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.72% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000006s : 38: predicate.load_eliminater 1.36% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.93% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000003s : 13: predicate.reduce_eliminate 0.64% : 0.000001s : 12: predicate.remove_not_recompute_node 1.06% : 0.000002s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.79% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.01% : 0.000002s : 12: predicate.shard_identity_eliminate 1.43% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.10% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.03% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.13% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.92% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.80% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.74% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.39% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.33% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000218 4 7.64% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.36% : 0.000202s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.102488 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000006s : 1: add_comm_op_reuse_tag 0.07% : 0.000077s : 1: add_recomputation 0.03% : 0.000036s : 1: assign_add_opt 0.19% : 0.000193s : 1: auto_monad 0.04% : 0.000040s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000007s : 1: bias_add_comm_swap 0.45% : 0.000456s : 1: bootstrap 0.05% : 0.000047s : 1: cconv 0.05% : 0.000047s : 1: comm_op_add_attrs 0.01% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000034s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.53% : 0.000544s : 1: eliminate_special_op_node 0.01% : 0.000013s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000007s : 1: full_micro_interleaved_order_control 0.01% : 0.000007s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000011s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.51% : 0.000522s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.18% : 0.001210s : 80: opt.transform.opt_a 0.05% : 0.000055s : 1: opt.transform.opt_after_cconv 0.16% : 0.000165s : 27: opt.transform.opt_b 0.06% : 0.000060s : 1: opt.transform.opt_trans_graph 0.03% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000061s : 4: opt.transform.symbol_engine_opt 6.36% : 0.006522s : 1: opt_a 0.15% : 0.000151s : 1: opt_after_cconv 0.26% : 0.000266s : 1: opt_b 8.37% : 0.008578s : 1: optimize 0.01% : 0.000015s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000006s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000006s : 1: overlap_param_gather 0.08% : 0.000081s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000039s : 1: pre_auto_parallel 0.03% : 0.000032s : 1: py_interpret_to_execute 0.01% : 0.000015s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000012s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.34% : 0.000348s : 1: renormalize.infer 0.24% : 0.000241s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000152s : 1: rewriter_after_opt_a 0.04% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000007s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000007s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000111s : 1: symbol_engine_optimizer 74.95% : 0.076815s : 1: task_emit 0.08% : 0.000079s : 1: tuple_transform 4.80% : 0.004918s : 1: type_inference 0.08% : 0.000077s : 1: validate .... TotalTime = 0.0888976, [21] [bootstrap]: 0.00042022 [type_inference]: 0.00491419 [auto_monad]: 0.00015867 [graph_reusing]: 2.33995e-06 [inline]: 1.34995e-06 [parallel-infer-symbol]: 2.61993e-06 [pre_auto_parallel]: 2.86601e-05 [insert-virtual-dataset]: 3.04997e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 1.49e-06 [pipeline_split]: 1.91003e-06 [optimize]: 0.00851945, [52] [py_interpret_to_execute]: 1.89e-05 [rewriter_before_opt_a]: 3.80001e-05 [opt_a]: 0.00647958, [2] [Cycle 1]: 0.00181717, [43] [expand_dump_flag]: 3.62995e-06 [switch_simplify]: 3.175e-05 [loop_unroll]: 2.414e-05 [a_1]: 0.00037636 [recompute_prepare]: 9.09006e-06 [updatestate_depend_eliminate]: 8.56002e-06 [updatestate_assign_eliminate]: 6.73998e-06 [updatestate_loads_eliminate]: 7.58003e-06 [parameter_eliminate]: 3.33996e-06 [a_2]: 0.00013463 [accelerated_algorithm]: 9.17003e-06 [shard]: 2.25997e-06 [meta_shard_fg_expand]: 4.08001e-06 [shard_inline]: 1.033e-05 [auto_parallel]: 1.362e-05 [parallel]: 9.81009e-06 [flash_sp]: 1.29e-05 [merge_comm]: 9.29995e-06 [allreduce_fusion]: 6.50005e-06 [matmul_add_comm_reduction]: 1.328e-05 [allreduce_slice_to_reducescatter]: 5.20027e-07 [virtual_shard_identity]: 1.285e-05 [virtual_dataset]: 9.79996e-06 [get_grad_eliminate_]: 9.12999e-06 [virtual_output]: 1.323e-05 [merge_forward]: 7.50995e-06 [cell_reuse_recompute_pass]: 1.93005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.95e-05 [before_grad]: 1.994e-05 [inplace_validation]: 5.37001e-06 [meta_fg_expand]: 5.63008e-06 [inplace_validation_after_expand]: 6.98993e-06 [flash_sp_send_recv_attached]: 3.01993e-06 [receive_attached]: 4.81994e-06 [after_resolve]: 1.63701e-05 [a_after_grad]: 2.07101e-05 [special_op_eliminate]: 1.074e-05 [renormalize]: 0.00057178 [add_forward_monad_depend]: 3.66999e-06 [auto_monad_grad]: 2.0701e-06 [auto_monad_eliminator]: 3.417e-05 [cse]: 3.665e-05 [a_3]: 5.99499e-05 [Cycle 2]: 0.00079851, [43] [expand_dump_flag]: 1.01002e-06 [switch_simplify]: 9.04e-06 [loop_unroll]: 7.42008e-06 [a_1]: 0.00020872 [recompute_prepare]: 7.37992e-06 [updatestate_depend_eliminate]: 5.87991e-06 [updatestate_assign_eliminate]: 4.95999e-06 [updatestate_loads_eliminate]: 5.33008e-06 [parameter_eliminate]: 1.15996e-06 [a_2]: 0.00010639 [accelerated_algorithm]: 8.61008e-06 [shard]: 1.21002e-06 [meta_shard_fg_expand]: 2.61993e-06 [shard_inline]: 7.82998e-06 [auto_parallel]: 1.07699e-05 [parallel]: 3.51004e-06 [flash_sp]: 3.26999e-06 [merge_comm]: 5.79003e-06 [allreduce_fusion]: 4.90993e-06 [matmul_add_comm_reduction]: 7.91997e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 9.01998e-06 [virtual_dataset]: 7.67992e-06 [get_grad_eliminate_]: 7.18993e-06 [virtual_output]: 7.76991e-06 [merge_forward]: 4.42995e-06 [cell_reuse_recompute_pass]: 1.87999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.562e-05 [before_grad]: 1.29e-05 [inplace_validation]: 4.22006e-06 [meta_fg_expand]: 5.07e-06 [inplace_validation_after_expand]: 5.58002e-06 [flash_sp_send_recv_attached]: 9.30042e-07 [receive_attached]: 6.89994e-07 [after_resolve]: 1.004e-05 [a_after_grad]: 1.185e-05 [special_op_eliminate]: 7.53999e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 1.12003e-06 [auto_monad_grad]: 1.21002e-06 [auto_monad_eliminator]: 1.839e-05 [cse]: 2.64e-05 [a_3]: 5.056e-05 [py_interpret_to_execute_after_opt_a]: 1.166e-05 [slice_cell_reuse_recomputed_activation]: 2.40002e-06 [rewriter_after_opt_a]: 0.00013537 [convert_after_rewriter]: 1.143e-05 [order_py_execute_after_rewriter]: 6.41996e-06 [opt_b]: 0.00028333, [1] [Cycle 1]: 0.00025386, [7] [b_1]: 0.00016725 [b_2]: 1.2e-05 [updatestate_depend_eliminate]: 5.51995e-06 [updatestate_assign_eliminate]: 5.52996e-06 [updatestate_loads_eliminate]: 5.74999e-06 [renormalize]: 3.99887e-07 [cse]: 2.243e-05 [optimize_parallel_all_gather_comm]: 1.34599e-05 [overlap_param_gather]: 2.74007e-06 [cconv]: 2.327e-05 [loop_unroll]: 0.00050935 [opt_after_cconv]: 0.00014741, [1] [Cycle 1]: 0.00014078, [7] [c_1]: 5.816e-05 [parameter_eliminate]: 2.45997e-06 [updatestate_depend_eliminate]: 8.22998e-06 [updatestate_assign_eliminate]: 6.12997e-06 [updatestate_loads_eliminate]: 5.64009e-06 [cse]: 2.544e-05 [renormalize]: 5.30039e-07 [remove_dup_value]: 1.252e-05 [tuple_transform]: 7.385e-05, [1] [Cycle 1]: 6.937e-05, [2] [d_1]: 5.77701e-05 [renormalize]: 1.79978e-07 [partial_unused_args_eliminate]: 2.05997e-06 [add_cache_embedding]: 1.488e-05 [add_recomputation]: 7.23701e-05 [cse_after_recomputation]: 3.179e-05, [1] [Cycle 1]: 2.60801e-05, [1] [cse]: 2.099e-05 [environ_conv]: 7.27002e-06 [swap_dp_allreduce_reducescatter]: 8.37003e-06 [bias_add_comm_swap]: 2.06009e-06 [label_micro_interleaved_index]: 1.93994e-06 [label_fine_grained_interleaved_index]: 2.17999e-06 [merge_cast_opt]: 1.09989e-06 [slice_recompute_activation]: 1.66998e-06 [micro_interleaved_order_control]: 2.10991e-06 [assign_add_opt]: 3.296e-05 [ForceFp32Comm]: 9.40054e-07 [remove_cast_before_assign_add]: 7.70995e-06 [full_micro_interleaved_order_control]: 2.06009e-06 [reorder_send_recv_between_fp_bp]: 2.13005e-06 [comm_op_add_attrs]: 4.8e-05 [add_comm_op_reuse_tag]: 2.06009e-06 [interleave_split_concat_branches]: 7.79983e-07 [interleave_parallel_branches]: 8.40053e-07 [overlap_opt_shard_in_pipeline]: 1.92004e-06 [overlap_opt_shard_grad_in_pipeline]: 2.00991e-06 [control_data_broadcast_order]: 1.46008e-06 [grouped_pairwise_exchange_alltoall]: 1.002e-05 [offloading_packed_experts]: 2.25997e-06 [overlap_recompute_and_grad_model_parallel]: 2.24996e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.10018e-07 [overlap_recompute_allgather_and_fa_grad]: 7.709e-05 [overlap_grad_ring_attention]: 2.14996e-06 [overlap_grad_flash_sp]: 1.59601e-05 [begin_end_overlap_inline]: 8.30041e-07 [split_matmul_comm_elemetwise]: 2.09e-06 [split_layernorm_comm]: 1.79e-06 [handle_group_info]: 7.13009e-06 [symbol_engine_optimizer]: 0.00010653, [1] [Cycle 1]: 0.00010128, [6] [build]: 5.07e-06 [elim_shapecalc]: 1.49701e-05 [elim_not_effective]: 2.172e-05 [opt_reshape]: 1.013e-05 [fold_const_symbol]: 1.86e-05 [renormalize]: 2.69967e-07 [pipeline_parallel_scheduler]: 1.59e-06 [auto_monad_reorder]: 3.50199e-05 [get_jit_bprop_graph]: 6.89994e-07 [rewriter_after_jit_bprop_graph]: 5.00004e-07 [eliminate_special_op_node]: 0.0005407 [distribtued_split]: 4.15599e-05 [validate]: 3.738e-05 [task_emit]: 0.0739082 [execute]: 1.01899e-05 Sums bootstrap : 0.000420s : 0.50% type_inference : 0.004914s : 5.86% auto_monad : 0.000159s : 0.19% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000029s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.02% optimize.rewriter_before_opt_a : 0.000038s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000032s : 0.04% optimize.opt_a.a_1 : 0.000585s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000241s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000021s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.03% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000021s : 0.03% optimize.opt_a.merge_forward : 0.000012s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000035s : 0.04% optimize.opt_a.before_grad : 0.000033s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.01% optimize.opt_a.after_resolve : 0.000026s : 0.03% optimize.opt_a.a_after_grad : 0.000033s : 0.04% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000572s : 0.68% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000053s : 0.06% optimize.opt_a.cse : 0.000063s : 0.08% optimize.opt_a.a_3 : 0.000111s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000012s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000135s : 0.16% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000167s : 0.20% optimize.opt_b.b_2 : 0.000012s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000022s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000013s : 0.02% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000509s : 0.61% optimize.opt_after_cconv.c_1 : 0.000058s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000025s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000013s : 0.01% optimize.tuple_transform.d_1 : 0.000058s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000072s : 0.09% optimize.cse_after_recomputation.cse : 0.000021s : 0.03% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000033s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000008s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000048s : 0.06% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000077s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000015s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000022s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000019s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000035s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000541s : 0.64% distribtued_split : 0.000042s : 0.05% validate : 0.000037s : 0.04% task_emit : 0.073908s : 88.08% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000150 63 5.18% : 0.000008s : 2: substitution.depend_value_elim 2.41% : 0.000004s : 5: substitution.elim_not_effective 2.39% : 0.000004s : 5: substitution.fold_const_symbol 5.44% : 0.000008s : 6: substitution.graph_param_transform 50.79% : 0.000076s : 1: substitution.inline 3.72% : 0.000006s : 10: substitution.j_node_and_user_rematch 2.96% : 0.000004s : 6: substitution.load_eliminater 2.32% : 0.000003s : 2: substitution.reduce_all_const_elim 6.63% : 0.000010s : 10: substitution.remove_not_recompute_node 2.24% : 0.000003s : 2: substitution.replace_old_param 7.80% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.11% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.004881 2 93.18% : 0.004548s : 1: type_inference.infer 6.82% : 0.000333s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000075 1 100.00% : 0.000075s : 1: match.inline ------[predicate.] 0.000238 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.14% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.27% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.45% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.90% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.95% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.25% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000005s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.38% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.65% : 0.000002s : 12: predicate.incorporate_call_switch 6.03% : 0.000014s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000002s : 12: predicate.less_batch_normalization 1.72% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.43% : 0.000006s : 38: predicate.load_eliminater 1.35% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.74% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.70% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.54% : 0.000001s : 6: predicate.parallel_virtual_node 1.17% : 0.000003s : 14: predicate.partial_defer_inline 1.35% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.81% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000003s : 13: predicate.reduce_eliminate 0.63% : 0.000002s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.80% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.55% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.38% : 0.000003s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.20% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.97% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.12% : 0.000010s : 43: predicate.switch_simplify 0.85% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.74% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.82% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.49% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.80% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.38% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.34% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.88% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000216 4 7.36% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.64% : 0.000201s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.099483 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000077s : 1: add_recomputation 0.04% : 0.000037s : 1: assign_add_opt 0.17% : 0.000172s : 1: auto_monad 0.04% : 0.000043s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.46% : 0.000459s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.05% : 0.000053s : 1: comm_op_add_attrs 0.01% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.04% : 0.000035s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.56% : 0.000555s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000007s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000010s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.52% : 0.000519s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000007s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001207s : 80: opt.transform.opt_a 0.06% : 0.000057s : 1: opt.transform.opt_after_cconv 0.16% : 0.000158s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000038s : 3: opt.transform.special_op_eliminate 0.06% : 0.000060s : 4: opt.transform.symbol_engine_opt 6.52% : 0.006483s : 1: opt_a 0.15% : 0.000152s : 1: opt_after_cconv 0.29% : 0.000288s : 1: opt_b 8.57% : 0.008528s : 1: optimize 0.02% : 0.000017s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000007s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000007s : 1: overlap_param_gather 0.08% : 0.000082s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000008s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000024s : 1: py_interpret_to_execute 0.02% : 0.000016s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.31% : 0.000308s : 1: renormalize.infer 0.26% : 0.000257s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000006s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000141s : 1: rewriter_after_opt_a 0.04% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000111s : 1: symbol_engine_optimizer 74.32% : 0.073934s : 1: task_emit 0.08% : 0.000078s : 1: tuple_transform 4.96% : 0.004935s : 1: type_inference 0.08% : 0.000075s : 1: validate TotalTime = 0.091871, [21] [bootstrap]: 0.0004065 [type_inference]: 0.00490565 [auto_monad]: 0.00016346 [graph_reusing]: 2.56998e-06 [inline]: 1.56998e-06 [parallel-infer-symbol]: 2.39001e-06 [pre_auto_parallel]: 2.86399e-05 [insert-virtual-dataset]: 3.08e-06 [parallel-infer-symbol-second]: 3.59956e-07 [dataset_repeat_opt]: 1.97999e-06 [pipeline_split]: 2.00002e-06 [optimize]: 0.00854934, [52] [py_interpret_to_execute]: 1.865e-05 [rewriter_before_opt_a]: 3.79999e-05 [opt_a]: 0.00654546, [2] [Cycle 1]: 0.00188267, [43] [expand_dump_flag]: 3.97e-06 [switch_simplify]: 3.20401e-05 [loop_unroll]: 2.406e-05 [a_1]: 0.00037814 [recompute_prepare]: 8.92999e-06 [updatestate_depend_eliminate]: 8.99006e-06 [updatestate_assign_eliminate]: 7.35989e-06 [updatestate_loads_eliminate]: 8.79006e-06 [parameter_eliminate]: 3.99002e-06 [a_2]: 0.00013149 [accelerated_algorithm]: 8.76002e-06 [shard]: 2.49001e-06 [meta_shard_fg_expand]: 4.13996e-06 [shard_inline]: 1.07801e-05 [auto_parallel]: 1.41399e-05 [parallel]: 9.79996e-06 [flash_sp]: 1.293e-05 [merge_comm]: 1.037e-05 [allreduce_fusion]: 5.30004e-06 [matmul_add_comm_reduction]: 1.37801e-05 [allreduce_slice_to_reducescatter]: 5.49946e-07 [virtual_shard_identity]: 1.13399e-05 [virtual_dataset]: 8.27003e-06 [get_grad_eliminate_]: 9.18005e-06 [virtual_output]: 1.43599e-05 [merge_forward]: 9.08994e-06 [cell_reuse_recompute_pass]: 1.86998e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.003e-05 [before_grad]: 2.15099e-05 [inplace_validation]: 6.21006e-06 [meta_fg_expand]: 5.53997e-06 [inplace_validation_after_expand]: 7.76001e-06 [flash_sp_send_recv_attached]: 3.51993e-06 [receive_attached]: 5.25999e-06 [after_resolve]: 1.553e-05 [a_after_grad]: 1.91701e-05 [special_op_eliminate]: 1.055e-05 [renormalize]: 0.00063191 [add_forward_monad_depend]: 3.94997e-06 [auto_monad_grad]: 2.35997e-06 [auto_monad_eliminator]: 3.595e-05 [cse]: 3.498e-05 [a_3]: 5.841e-05 [Cycle 2]: 0.00080562, [43] [expand_dump_flag]: 1.22993e-06 [switch_simplify]: 9.37004e-06 [loop_unroll]: 7.66001e-06 [a_1]: 0.00020436 [recompute_prepare]: 1.21001e-05 [updatestate_depend_eliminate]: 6.26e-06 [updatestate_assign_eliminate]: 5.09992e-06 [updatestate_loads_eliminate]: 5.20004e-06 [parameter_eliminate]: 1.44995e-06 [a_2]: 0.00010716 [accelerated_algorithm]: 9.08005e-06 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 2.48e-06 [shard_inline]: 8.47992e-06 [auto_parallel]: 1.06599e-05 [parallel]: 3.5899e-06 [flash_sp]: 3.20002e-06 [merge_comm]: 5.73997e-06 [allreduce_fusion]: 4.62995e-06 [matmul_add_comm_reduction]: 1.08499e-05 [allreduce_slice_to_reducescatter]: 2.69967e-07 [virtual_shard_identity]: 9.44e-06 [virtual_dataset]: 7.76001e-06 [get_grad_eliminate_]: 7.75e-06 [virtual_output]: 7.78004e-06 [merge_forward]: 4.68001e-06 [cell_reuse_recompute_pass]: 1.92004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.567e-05 [before_grad]: 1.21599e-05 [inplace_validation]: 4.17e-06 [meta_fg_expand]: 4.82006e-06 [inplace_validation_after_expand]: 5.58991e-06 [flash_sp_send_recv_attached]: 1.05996e-06 [receive_attached]: 7.10017e-07 [after_resolve]: 9.90997e-06 [a_after_grad]: 1.17201e-05 [special_op_eliminate]: 7.16001e-06 [renormalize]: 1.09896e-07 [add_forward_monad_depend]: 9.60077e-07 [auto_monad_grad]: 1.24995e-06 [auto_monad_eliminator]: 1.86299e-05 [cse]: 2.124e-05 [a_3]: 5.04199e-05 [py_interpret_to_execute_after_opt_a]: 1.082e-05 [slice_cell_reuse_recomputed_activation]: 2.48e-06 [rewriter_after_opt_a]: 0.0001316 [convert_after_rewriter]: 8.41997e-06 [order_py_execute_after_rewriter]: 8.08004e-06 [opt_b]: 0.00026483, [1] [Cycle 1]: 0.00025947, [7] [b_1]: 0.00017364 [b_2]: 9.51008e-06 [updatestate_depend_eliminate]: 5.71006e-06 [updatestate_assign_eliminate]: 6.37001e-06 [updatestate_loads_eliminate]: 6.28992e-06 [renormalize]: 4.10015e-07 [cse]: 2.424e-05 [optimize_parallel_all_gather_comm]: 9.46003e-06 [overlap_param_gather]: 2.68e-06 [cconv]: 2.482e-05 [loop_unroll]: 0.00051045 [opt_after_cconv]: 0.00014481, [1] [Cycle 1]: 0.0001385, [7] [c_1]: 5.447e-05 [parameter_eliminate]: 2.13995e-06 [updatestate_depend_eliminate]: 9.22999e-06 [updatestate_assign_eliminate]: 5.15999e-06 [updatestate_loads_eliminate]: 6.15011e-06 [cse]: 2.42201e-05 [renormalize]: 3.10014e-07 [remove_dup_value]: 1.628e-05 [tuple_transform]: 7.41701e-05, [1] [Cycle 1]: 6.978e-05, [2] [d_1]: 5.83599e-05 [renormalize]: 1.59955e-07 [partial_unused_args_eliminate]: 2.51993e-06 [add_cache_embedding]: 1.46801e-05 [add_recomputation]: 7.39e-05 [cse_after_recomputation]: 3.129e-05, [1] [Cycle 1]: 2.52699e-05, [1] [cse]: 1.987e-05 [environ_conv]: 8.16002e-06 [swap_dp_allreduce_reducescatter]: 8.79995e-06 [bias_add_comm_swap]: 2.25008e-06 [label_micro_interleaved_index]: 2.12004e-06 [label_fine_grained_interleaved_index]: 2.17999e-06 [merge_cast_opt]: 1.19e-06 [slice_recompute_activation]: 1.85007e-06 [micro_interleaved_order_control]: 1.80001e-06 [assign_add_opt]: 3.07e-05 [ForceFp32Comm]: 9.00007e-07 [remove_cast_before_assign_add]: 9.11998e-06 [full_micro_interleaved_order_control]: 2.49001e-06 [reorder_send_recv_between_fp_bp]: 2.76999e-06 [comm_op_add_attrs]: 4.66999e-05 [add_comm_op_reuse_tag]: 3.22005e-06 [interleave_split_concat_branches]: 7.89994e-07 [interleave_parallel_branches]: 9.99891e-07 [overlap_opt_shard_in_pipeline]: 2.27999e-06 [overlap_opt_shard_grad_in_pipeline]: 2.36998e-06 [control_data_broadcast_order]: 1.10001e-06 [grouped_pairwise_exchange_alltoall]: 1.11699e-05 [offloading_packed_experts]: 2.46998e-06 [overlap_recompute_and_grad_model_parallel]: 2.22994e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.00006e-07 [overlap_recompute_allgather_and_fa_grad]: 7.227e-05 [overlap_grad_ring_attention]: 2.27999e-06 [overlap_grad_flash_sp]: 1.629e-05 [begin_end_overlap_inline]: 7.69971e-07 [split_matmul_comm_elemetwise]: 2.12004e-06 [split_layernorm_comm]: 2.04996e-06 [handle_group_info]: 6.67002e-06 [symbol_engine_optimizer]: 0.00010285, [1] [Cycle 1]: 9.727e-05, [6] [build]: 4.61005e-06 [elim_shapecalc]: 1.361e-05 [elim_not_effective]: 2.10101e-05 [opt_reshape]: 9.92999e-06 [fold_const_symbol]: 1.65601e-05 [renormalize]: 4.30038e-07 [pipeline_parallel_scheduler]: 1.52003e-06 [auto_monad_reorder]: 3.45199e-05 [get_jit_bprop_graph]: 4.59957e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00051243 [distribtued_split]: 4.126e-05 [validate]: 3.645e-05 [task_emit]: 0.0768903 [execute]: 1.163e-05 Sums bootstrap : 0.000407s : 0.47% type_inference : 0.004906s : 5.64% auto_monad : 0.000163s : 0.19% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000029s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.02% optimize.rewriter_before_opt_a : 0.000038s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000032s : 0.04% optimize.opt_a.a_1 : 0.000583s : 0.67% optimize.opt_a.recompute_prepare : 0.000021s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000239s : 0.27% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000019s : 0.02% optimize.opt_a.auto_parallel : 0.000025s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000016s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000025s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000021s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000017s : 0.02% optimize.opt_a.virtual_output : 0.000022s : 0.03% optimize.opt_a.merge_forward : 0.000014s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000036s : 0.04% optimize.opt_a.before_grad : 0.000034s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000006s : 0.01% optimize.opt_a.after_resolve : 0.000025s : 0.03% optimize.opt_a.a_after_grad : 0.000031s : 0.04% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000632s : 0.73% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000055s : 0.06% optimize.opt_a.cse : 0.000056s : 0.06% optimize.opt_a.a_3 : 0.000109s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000011s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000132s : 0.15% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000008s : 0.01% optimize.opt_b.b_1 : 0.000174s : 0.20% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000024s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000510s : 0.59% optimize.opt_after_cconv.c_1 : 0.000054s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000016s : 0.02% optimize.tuple_transform.d_1 : 0.000058s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000074s : 0.09% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000031s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000009s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000047s : 0.05% optimize.add_comm_op_reuse_tag : 0.000003s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.08% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000021s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000035s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000512s : 0.59% distribtued_split : 0.000041s : 0.05% validate : 0.000036s : 0.04% task_emit : 0.076890s : 88.48% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000148 63 4.43% : 0.000007s : 2: substitution.depend_value_elim 2.40% : 0.000004s : 5: substitution.elim_not_effective 1.93% : 0.000003s : 5: substitution.fold_const_symbol 5.14% : 0.000008s : 6: substitution.graph_param_transform 50.96% : 0.000075s : 1: substitution.inline 4.29% : 0.000006s : 10: substitution.j_node_and_user_rematch 2.89% : 0.000004s : 6: substitution.load_eliminater 2.69% : 0.000004s : 2: substitution.reduce_all_const_elim 6.91% : 0.000010s : 10: substitution.remove_not_recompute_node 2.28% : 0.000003s : 2: substitution.replace_old_param 8.01% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.08% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.004874 2 92.80% : 0.004523s : 1: type_inference.infer 7.20% : 0.000351s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000074 1 100.00% : 0.000074s : 1: match.inline ------[predicate.] 0.000234 1420 0.93% : 0.000002s : 13: predicate.accumulaten_eliminater 1.17% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.12% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.41% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.46% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.94% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.70% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.11% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_depend_swap 1.97% : 0.000005s : 31: predicate.environ_get_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.31% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.90% : 0.000014s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000003s : 12: predicate.less_batch_normalization 1.69% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000006s : 38: predicate.load_eliminater 1.25% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.25% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.75% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.50% : 0.000001s : 6: predicate.opt_reshape 0.61% : 0.000001s : 6: predicate.parallel_virtual_node 1.23% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.18% : 0.000003s : 13: predicate.reduce_eliminate 0.61% : 0.000001s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.08% : 0.000003s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.02% : 0.000002s : 12: predicate.shard_identity_eliminate 1.48% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.02% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.93% : 0.000002s : 14: predicate.switch_defer_inline 1.69% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.19% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.53% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.30% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.68% : 0.000002s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.94% : 0.000002s : 12: predicate.virtual_output_eliminate 0.58% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000213 4 8.19% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.81% : 0.000196s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.102542 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.01% : 0.000007s : 1: add_comm_op_reuse_tag 0.08% : 0.000079s : 1: add_recomputation 0.03% : 0.000035s : 1: assign_add_opt 0.17% : 0.000177s : 1: auto_monad 0.04% : 0.000042s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.44% : 0.000446s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.05% : 0.000052s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000035s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.51% : 0.000527s : 1: eliminate_special_op_node 0.01% : 0.000013s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000007s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000010s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.51% : 0.000520s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.01% : 0.000007s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.18% : 0.001207s : 80: opt.transform.opt_a 0.05% : 0.000053s : 1: opt.transform.opt_after_cconv 0.16% : 0.000163s : 27: opt.transform.opt_b 0.06% : 0.000057s : 1: opt.transform.opt_trans_graph 0.03% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000056s : 4: opt.transform.symbol_engine_opt 6.39% : 0.006549s : 1: opt_a 0.15% : 0.000150s : 1: opt_after_cconv 0.26% : 0.000268s : 1: opt_b 8.35% : 0.008558s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000012s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000007s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000007s : 1: overlap_param_gather 0.08% : 0.000078s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000035s : 1: pre_auto_parallel 0.02% : 0.000023s : 1: py_interpret_to_execute 0.01% : 0.000015s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000013s : 1: remove_cast_before_assign_add 0.02% : 0.000021s : 1: remove_dup_value 0.29% : 0.000301s : 1: renormalize.infer 0.32% : 0.000324s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.13% : 0.000137s : 1: rewriter_after_opt_a 0.04% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000106s : 1: symbol_engine_optimizer 75.01% : 0.076922s : 1: task_emit 0.08% : 0.000077s : 1: tuple_transform 4.80% : 0.004926s : 1: type_inference 0.07% : 0.000072s : 1: validate TotalTime = 0.0984497, [21] [bootstrap]: 0.00050381 [type_inference]: 0.00555946 [auto_monad]: 0.00022477 [graph_reusing]: 2.88012e-06 [inline]: 1.33994e-06 [parallel-infer-symbol]: 2.58989e-06 [pre_auto_parallel]: 3.903e-05 [insert-virtual-dataset]: 3.36999e-06 [parallel-infer-symbol-second]: 4.80097e-07 [dataset_repeat_opt]: 1.56008e-06 [pipeline_split]: 2.03005e-06 [optimize]: 0.0092903, [52] [py_interpret_to_execute]: 2.958e-05 [rewriter_before_opt_a]: 4.036e-05 [opt_a]: 0.00708864, [2] [Cycle 1]: 0.00181285, [43] [expand_dump_flag]: 3.88001e-06 [switch_simplify]: 3.248e-05 [loop_unroll]: 1.32199e-05 [a_1]: 0.00041495 [recompute_prepare]: 9.4201e-06 [updatestate_depend_eliminate]: 8.5301e-06 [updatestate_assign_eliminate]: 7.63999e-06 [updatestate_loads_eliminate]: 7.59994e-06 [parameter_eliminate]: 3.59002e-06 [a_2]: 0.00014046 [accelerated_algorithm]: 9.12999e-06 [shard]: 2.49001e-06 [meta_shard_fg_expand]: 4.30003e-06 [shard_inline]: 9.37004e-06 [auto_parallel]: 1.285e-05 [parallel]: 9.8201e-06 [flash_sp]: 1.382e-05 [merge_comm]: 1.272e-05 [allreduce_fusion]: 8.36991e-06 [matmul_add_comm_reduction]: 1.33701e-05 [allreduce_slice_to_reducescatter]: 6.50063e-07 [virtual_shard_identity]: 1.22801e-05 [virtual_dataset]: 8.98005e-06 [get_grad_eliminate_]: 9.69006e-06 [virtual_output]: 8.09995e-06 [merge_forward]: 7.72008e-06 [cell_reuse_recompute_pass]: 1.76998e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.08e-05 [before_grad]: 1.617e-05 [inplace_validation]: 6.71006e-06 [meta_fg_expand]: 5.66e-06 [inplace_validation_after_expand]: 6.48003e-06 [flash_sp_send_recv_attached]: 3.21004e-06 [receive_attached]: 4.84998e-06 [after_resolve]: 1.487e-05 [a_after_grad]: 1.509e-05 [special_op_eliminate]: 1.065e-05 [renormalize]: 0.00054577 [add_forward_monad_depend]: 3.74997e-06 [auto_monad_grad]: 1.95007e-06 [auto_monad_eliminator]: 3.401e-05 [cse]: 3.637e-05 [a_3]: 5.88001e-05 [Cycle 2]: 0.00081294, [43] [expand_dump_flag]: 1.14995e-06 [switch_simplify]: 9.35001e-06 [loop_unroll]: 7.73999e-06 [a_1]: 0.00020854 [recompute_prepare]: 7.89994e-06 [updatestate_depend_eliminate]: 5.96e-06 [updatestate_assign_eliminate]: 5.10004e-06 [updatestate_loads_eliminate]: 5.33997e-06 [parameter_eliminate]: 1.51002e-06 [a_2]: 0.00010697 [accelerated_algorithm]: 8.66002e-06 [shard]: 1.12993e-06 [meta_shard_fg_expand]: 2.58e-06 [shard_inline]: 8.52998e-06 [auto_parallel]: 1.091e-05 [parallel]: 3.43006e-06 [flash_sp]: 3.73006e-06 [merge_comm]: 5.68992e-06 [allreduce_fusion]: 4.5799e-06 [matmul_add_comm_reduction]: 7.83999e-06 [allreduce_slice_to_reducescatter]: 3.00002e-07 [virtual_shard_identity]: 9.47004e-06 [virtual_dataset]: 7.55e-06 [get_grad_eliminate_]: 7.18003e-06 [virtual_output]: 7.45e-06 [merge_forward]: 4.70993e-06 [cell_reuse_recompute_pass]: 1.75997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.587e-05 [before_grad]: 1.245e-05 [inplace_validation]: 4.09002e-06 [meta_fg_expand]: 4.85999e-06 [inplace_validation_after_expand]: 5.48991e-06 [flash_sp_send_recv_attached]: 8.2003e-07 [receive_attached]: 7.79983e-07 [after_resolve]: 1.01901e-05 [a_after_grad]: 1.174e-05 [special_op_eliminate]: 7.06001e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 1.09e-06 [auto_monad_grad]: 1.11002e-06 [auto_monad_eliminator]: 2.264e-05 [cse]: 2.978e-05 [a_3]: 5.196e-05 [py_interpret_to_execute_after_opt_a]: 1.33599e-05 [slice_cell_reuse_recomputed_activation]: 2.43005e-06 [rewriter_after_opt_a]: 0.00014719 [convert_after_rewriter]: 1.06799e-05 [order_py_execute_after_rewriter]: 6.43008e-06 [opt_b]: 0.00026006, [1] [Cycle 1]: 0.00025497, [7] [b_1]: 0.00016964 [b_2]: 1.002e-05 [updatestate_depend_eliminate]: 5.39003e-06 [updatestate_assign_eliminate]: 4.83007e-06 [updatestate_loads_eliminate]: 7.19004e-06 [renormalize]: 2.00002e-07 [cse]: 2.424e-05 [optimize_parallel_all_gather_comm]: 1.061e-05 [overlap_param_gather]: 3.25008e-06 [cconv]: 2.517e-05 [loop_unroll]: 0.00052154 [opt_after_cconv]: 0.00015631, [1] [Cycle 1]: 0.00015002, [7] [c_1]: 5.85599e-05 [parameter_eliminate]: 2.44996e-06 [updatestate_depend_eliminate]: 1.03499e-05 [updatestate_assign_eliminate]: 6.10994e-06 [updatestate_loads_eliminate]: 5.68002e-06 [cse]: 2.833e-05 [renormalize]: 4.69969e-07 [remove_dup_value]: 1.344e-05 [tuple_transform]: 7.70999e-05, [1] [Cycle 1]: 7.24799e-05, [2] [d_1]: 6.09601e-05 [renormalize]: 1.79978e-07 [partial_unused_args_eliminate]: 2.73006e-06 [add_cache_embedding]: 1.408e-05 [add_recomputation]: 0.00010151 [cse_after_recomputation]: 3.335e-05, [1] [Cycle 1]: 2.829e-05, [1] [cse]: 2.234e-05 [environ_conv]: 8.65001e-06 [swap_dp_allreduce_reducescatter]: 9.50997e-06 [bias_add_comm_swap]: 2.30991e-06 [label_micro_interleaved_index]: 4.26699e-05 [label_fine_grained_interleaved_index]: 2.22004e-06 [merge_cast_opt]: 1.69e-06 [slice_recompute_activation]: 1.91992e-06 [micro_interleaved_order_control]: 2.06998e-06 [assign_add_opt]: 3.527e-05 [ForceFp32Comm]: 8.5996e-07 [remove_cast_before_assign_add]: 1.12499e-05 [full_micro_interleaved_order_control]: 2.51003e-06 [reorder_send_recv_between_fp_bp]: 2.51993e-06 [comm_op_add_attrs]: 5.59699e-05 [add_comm_op_reuse_tag]: 2.31003e-06 [interleave_split_concat_branches]: 1.17999e-06 [interleave_parallel_branches]: 9.69972e-07 [overlap_opt_shard_in_pipeline]: 2.68e-06 [overlap_opt_shard_grad_in_pipeline]: 2.40002e-06 [control_data_broadcast_order]: 1.33005e-06 [grouped_pairwise_exchange_alltoall]: 1.265e-05 [offloading_packed_experts]: 2.11992e-06 [overlap_recompute_and_grad_model_parallel]: 1.84996e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.89995e-07 [overlap_recompute_allgather_and_fa_grad]: 8.315e-05 [overlap_grad_ring_attention]: 2.0999e-06 [overlap_grad_flash_sp]: 1.8e-05 [begin_end_overlap_inline]: 8.69972e-07 [split_matmul_comm_elemetwise]: 2.37999e-06 [split_layernorm_comm]: 1.96008e-06 [handle_group_info]: 7.35e-06 [symbol_engine_optimizer]: 0.00011411, [1] [Cycle 1]: 0.00010799, [6] [build]: 5.53997e-06 [elim_shapecalc]: 1.749e-05 [elim_not_effective]: 2.31899e-05 [opt_reshape]: 1.17701e-05 [fold_const_symbol]: 1.807e-05 [renormalize]: 4.20026e-07 [pipeline_parallel_scheduler]: 1.67999e-06 [auto_monad_reorder]: 3.78999e-05 [get_jit_bprop_graph]: 4.49945e-07 [rewriter_after_jit_bprop_graph]: 4.7998e-07 [eliminate_special_op_node]: 0.00055071 [distribtued_split]: 4.301e-05 [validate]: 3.99599e-05 [task_emit]: 0.0818527 [execute]: 1.142e-05 Sums bootstrap : 0.000504s : 0.54% type_inference : 0.005559s : 5.99% auto_monad : 0.000225s : 0.24% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000039s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000030s : 0.03% optimize.rewriter_before_opt_a : 0.000040s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000042s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.02% optimize.opt_a.a_1 : 0.000623s : 0.67% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.01% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000247s : 0.27% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000013s : 0.01% optimize.opt_a.flash_sp : 0.000018s : 0.02% optimize.opt_a.merge_comm : 0.000018s : 0.02% optimize.opt_a.allreduce_fusion : 0.000013s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000021s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000022s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000017s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000037s : 0.04% optimize.opt_a.before_grad : 0.000029s : 0.03% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.01% optimize.opt_a.after_resolve : 0.000025s : 0.03% optimize.opt_a.a_after_grad : 0.000027s : 0.03% optimize.opt_a.special_op_eliminate : 0.000018s : 0.02% optimize.opt_a.renormalize : 0.000546s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000057s : 0.06% optimize.opt_a.cse : 0.000066s : 0.07% optimize.opt_a.a_3 : 0.000111s : 0.12% optimize.py_interpret_to_execute_after_opt_a : 0.000013s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000147s : 0.16% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000170s : 0.18% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000007s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000024s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000011s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000522s : 0.56% optimize.opt_after_cconv.c_1 : 0.000059s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000010s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000028s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.01% optimize.tuple_transform.d_1 : 0.000061s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000102s : 0.11% optimize.cse_after_recomputation.cse : 0.000022s : 0.02% optimize.environ_conv : 0.000009s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000010s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000043s : 0.05% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000035s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000011s : 0.01% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000056s : 0.06% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000003s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000013s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000083s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000018s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000007s : 0.01% optimize.symbol_engine_optimizer.build : 0.000006s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000017s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000023s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000012s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000018s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000038s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000551s : 0.59% distribtued_split : 0.000043s : 0.05% validate : 0.000040s : 0.04% task_emit : 0.081853s : 88.16% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000179 63 4.18% : 0.000007s : 2: substitution.depend_value_elim 1.88% : 0.000003s : 5: substitution.elim_not_effective 1.95% : 0.000003s : 5: substitution.fold_const_symbol 5.09% : 0.000009s : 6: substitution.graph_param_transform 56.03% : 0.000100s : 1: substitution.inline 3.89% : 0.000007s : 10: substitution.j_node_and_user_rematch 2.56% : 0.000005s : 6: substitution.load_eliminater 2.41% : 0.000004s : 2: substitution.reduce_all_const_elim 6.14% : 0.000011s : 10: substitution.remove_not_recompute_node 2.00% : 0.000004s : 2: substitution.replace_old_param 6.84% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.03% : 0.000013s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.005525 2 93.18% : 0.005148s : 1: type_inference.infer 6.82% : 0.000377s : 1: type_inference.specialize ------[replace.] 0.000014 1 100.00% : 0.000014s : 1: replace.inline ------[match.] 0.000099 1 100.00% : 0.000099s : 1: match.inline ------[predicate.] 0.000237 1420 0.86% : 0.000002s : 13: predicate.accumulaten_eliminater 1.16% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.86% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.29% : 0.000005s : 25: predicate.arithmetic_simplify 0.95% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.40% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.34% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.87% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.93% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.33% : 0.000001s : 6: predicate.elim_not_effective 0.51% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.23% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_depend_swap 1.95% : 0.000005s : 31: predicate.environ_get_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.69% : 0.000002s : 12: predicate.float_environ_get_switch 1.14% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.82% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.75% : 0.000014s : 63: predicate.inline 1.08% : 0.000003s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.15% : 0.000003s : 12: predicate.less_batch_normalization 1.74% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000006s : 38: predicate.load_eliminater 1.25% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.74% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.85% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.84% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.54% : 0.000001s : 6: predicate.parallel_virtual_node 1.26% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.88% : 0.000002s : 13: predicate.print_const_string_wrapper 0.77% : 0.000002s : 12: predicate.reduce_all_const_elim 1.18% : 0.000003s : 13: predicate.reduce_eliminate 0.67% : 0.000002s : 12: predicate.remove_not_recompute_node 1.12% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.22% : 0.000001s : 6: predicate.reset_defer_inline 0.89% : 0.000002s : 13: predicate.reshape_eliminate 0.87% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.56% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.44% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.53% : 0.000004s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.23% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.94% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.22% : 0.000010s : 43: predicate.switch_simplify 0.84% : 0.000002s : 13: predicate.tile_eliminate 0.85% : 0.000002s : 13: predicate.transpose_eliminate 1.66% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.67% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.41% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.30% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000243 4 7.16% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.84% : 0.000225s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.109802 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000006s : 1: add_comm_op_reuse_tag 0.10% : 0.000107s : 1: add_recomputation 0.04% : 0.000039s : 1: assign_add_opt 0.22% : 0.000240s : 1: auto_monad 0.04% : 0.000046s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.50% : 0.000545s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.06% : 0.000061s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000037s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.05% : 0.000051s : 1: distribtued_split 0.52% : 0.000566s : 1: eliminate_special_op_node 0.01% : 0.000014s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000007s : 1: full_micro_interleaved_order_control 0.01% : 0.000008s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000016s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000012s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000007s : 1: label_fine_grained_interleaved_index 0.05% : 0.000050s : 1: label_micro_interleaved_index 0.48% : 0.000533s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.01% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.12% : 0.001228s : 80: opt.transform.opt_a 0.05% : 0.000057s : 1: opt.transform.opt_after_cconv 0.15% : 0.000160s : 27: opt.transform.opt_b 0.05% : 0.000059s : 1: opt.transform.opt_trans_graph 0.03% : 0.000036s : 3: opt.transform.special_op_eliminate 0.06% : 0.000064s : 4: opt.transform.symbol_engine_opt 6.46% : 0.007093s : 1: opt_a 0.15% : 0.000162s : 1: opt_after_cconv 0.24% : 0.000263s : 1: opt_b 8.47% : 0.009299s : 1: optimize 0.01% : 0.000015s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000023s : 1: overlap_grad_flash_sp 0.01% : 0.000006s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000007s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000008s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000008s : 1: overlap_param_gather 0.08% : 0.000090s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000007s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000046s : 1: pre_auto_parallel 0.03% : 0.000034s : 1: py_interpret_to_execute 0.02% : 0.000018s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000014s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000297s : 1: renormalize.infer 0.22% : 0.000243s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000153s : 1: rewriter_after_opt_a 0.04% : 0.000045s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000013s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000118s : 1: symbol_engine_optimizer 74.57% : 0.081882s : 1: task_emit 0.07% : 0.000081s : 1: tuple_transform 5.08% : 0.005580s : 1: type_inference 0.07% : 0.000078s : 1: validate TotalTime = 0.0963942, [21] [bootstrap]: 0.00043362 [type_inference]: 0.00538455 [auto_monad]: 0.00013423 [graph_reusing]: 1.43994e-06 [inline]: 1.22003e-06 [parallel-infer-symbol]: 1.32993e-06 [pre_auto_parallel]: 2.597e-05 [insert-virtual-dataset]: 1.90001e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 7.79983e-07 [pipeline_split]: 1.02003e-06 [optimize]: 0.00915627, [52] [py_interpret_to_execute]: 1.953e-05 [rewriter_before_opt_a]: 3.44201e-05 [opt_a]: 0.00716941, [2] [Cycle 1]: 0.00181567, [43] [expand_dump_flag]: 2.48e-06 [switch_simplify]: 2.75901e-05 [loop_unroll]: 2.582e-05 [a_1]: 0.00037286 [recompute_prepare]: 9.08994e-06 [updatestate_depend_eliminate]: 7.59005e-06 [updatestate_assign_eliminate]: 6.72997e-06 [updatestate_loads_eliminate]: 5.62007e-06 [parameter_eliminate]: 2.00002e-06 [a_2]: 0.00013295 [accelerated_algorithm]: 9.21998e-06 [shard]: 1.51002e-06 [meta_shard_fg_expand]: 3.25998e-06 [shard_inline]: 8.98005e-06 [auto_parallel]: 1.413e-05 [parallel]: 7.41996e-06 [flash_sp]: 1.04e-05 [merge_comm]: 9.94001e-06 [allreduce_fusion]: 6.72997e-06 [matmul_add_comm_reduction]: 1.196e-05 [allreduce_slice_to_reducescatter]: 3.30037e-07 [virtual_shard_identity]: 1.309e-05 [virtual_dataset]: 9.58005e-06 [get_grad_eliminate_]: 1.065e-05 [virtual_output]: 1.617e-05 [merge_forward]: 7.00005e-06 [cell_reuse_recompute_pass]: 1.61992e-06 [cell_reuse_handle_not_recompute_node_pass]: 2.03899e-05 [before_grad]: 2.246e-05 [inplace_validation]: 5.20004e-06 [meta_fg_expand]: 5.39992e-06 [inplace_validation_after_expand]: 6.84999e-06 [flash_sp_send_recv_attached]: 1.86998e-06 [receive_attached]: 3.44997e-06 [after_resolve]: 1.483e-05 [a_after_grad]: 2.316e-05 [special_op_eliminate]: 1.258e-05 [renormalize]: 0.00060897 [add_forward_monad_depend]: 2.64996e-06 [auto_monad_grad]: 1.30001e-06 [auto_monad_eliminator]: 2.36799e-05 [cse]: 2.723e-05 [a_3]: 6.069e-05 [Cycle 2]: 0.00081233, [43] [expand_dump_flag]: 1.01002e-06 [switch_simplify]: 9.51008e-06 [loop_unroll]: 7.90006e-06 [a_1]: 0.00020954 [recompute_prepare]: 8.07003e-06 [updatestate_depend_eliminate]: 6.10994e-06 [updatestate_assign_eliminate]: 5.11005e-06 [updatestate_loads_eliminate]: 4.77e-06 [parameter_eliminate]: 1.17999e-06 [a_2]: 0.00010998 [accelerated_algorithm]: 8.98005e-06 [shard]: 1.07009e-06 [meta_shard_fg_expand]: 2.69001e-06 [shard_inline]: 8.35001e-06 [auto_parallel]: 1.032e-05 [parallel]: 3.25998e-06 [flash_sp]: 2.46998e-06 [merge_comm]: 5.51995e-06 [allreduce_fusion]: 4.71005e-06 [matmul_add_comm_reduction]: 7.31007e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 9.75991e-06 [virtual_dataset]: 7.93999e-06 [get_grad_eliminate_]: 7.31996e-06 [virtual_output]: 8.26991e-06 [merge_forward]: 4.20003e-06 [cell_reuse_recompute_pass]: 1.64006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.578e-05 [before_grad]: 1.241e-05 [inplace_validation]: 4.05998e-06 [meta_fg_expand]: 4.83007e-06 [inplace_validation_after_expand]: 5.44009e-06 [flash_sp_send_recv_attached]: 8.29925e-07 [receive_attached]: 6.89994e-07 [after_resolve]: 1.013e-05 [a_after_grad]: 1.195e-05 [special_op_eliminate]: 7.10995e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 9.69972e-07 [auto_monad_grad]: 9.10019e-07 [auto_monad_eliminator]: 1.75501e-05 [cse]: 2.65201e-05 [a_3]: 5.143e-05 [py_interpret_to_execute_after_opt_a]: 8.97003e-06 [slice_cell_reuse_recomputed_activation]: 1.81003e-06 [rewriter_after_opt_a]: 0.00012269 [convert_after_rewriter]: 1.01899e-05 [order_py_execute_after_rewriter]: 5.53997e-06 [opt_b]: 0.00026873, [1] [Cycle 1]: 0.00026387, [7] [b_1]: 0.00017907 [b_2]: 1.11e-05 [updatestate_depend_eliminate]: 5.29992e-06 [updatestate_assign_eliminate]: 5.94009e-06 [updatestate_loads_eliminate]: 5.27001e-06 [renormalize]: 2.59955e-07 [cse]: 2.232e-05 [optimize_parallel_all_gather_comm]: 8.51997e-06 [overlap_param_gather]: 3.84997e-06 [cconv]: 1.80299e-05 [loop_unroll]: 0.00051764 [opt_after_cconv]: 0.00014791, [1] [Cycle 1]: 0.00014192, [7] [c_1]: 5.78701e-05 [parameter_eliminate]: 1.77999e-06 [updatestate_depend_eliminate]: 9.48005e-06 [updatestate_assign_eliminate]: 5.01005e-06 [updatestate_loads_eliminate]: 5.38002e-06 [cse]: 2.665e-05 [renormalize]: 3.59956e-07 [remove_dup_value]: 1.033e-05 [tuple_transform]: 7.56499e-05, [1] [Cycle 1]: 7.08799e-05, [2] [d_1]: 6.03399e-05 [renormalize]: 1.59955e-07 [partial_unused_args_eliminate]: 1.67009e-06 [add_cache_embedding]: 1.401e-05 [add_recomputation]: 6.14501e-05 [cse_after_recomputation]: 3.123e-05, [1] [Cycle 1]: 2.643e-05, [1] [cse]: 2.07101e-05 [environ_conv]: 6.90005e-06 [swap_dp_allreduce_reducescatter]: 7.93999e-06 [bias_add_comm_swap]: 1.52003e-06 [label_micro_interleaved_index]: 1.41002e-06 [label_fine_grained_interleaved_index]: 1.07998e-06 [merge_cast_opt]: 7.3004e-07 [slice_recompute_activation]: 1.07998e-06 [micro_interleaved_order_control]: 1.26997e-06 [assign_add_opt]: 2.75599e-05 [ForceFp32Comm]: 6.49947e-07 [remove_cast_before_assign_add]: 6.53998e-06 [full_micro_interleaved_order_control]: 1.05007e-06 [reorder_send_recv_between_fp_bp]: 1.09e-06 [comm_op_add_attrs]: 4.392e-05 [add_comm_op_reuse_tag]: 3.08e-06 [interleave_split_concat_branches]: 7.39936e-07 [interleave_parallel_branches]: 5.00004e-07 [overlap_opt_shard_in_pipeline]: 2.39001e-06 [overlap_opt_shard_grad_in_pipeline]: 1.44995e-06 [control_data_broadcast_order]: 6.20028e-07 [grouped_pairwise_exchange_alltoall]: 7.49994e-06 [offloading_packed_experts]: 1.61992e-06 [overlap_recompute_and_grad_model_parallel]: 1.49e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.7998e-07 [overlap_recompute_allgather_and_fa_grad]: 6.951e-05 [overlap_grad_ring_attention]: 1.33005e-06 [overlap_grad_flash_sp]: 1.559e-05 [begin_end_overlap_inline]: 4.7998e-07 [split_matmul_comm_elemetwise]: 1.26997e-06 [split_layernorm_comm]: 9.59961e-07 [handle_group_info]: 5.14998e-06 [symbol_engine_optimizer]: 0.00011415, [1] [Cycle 1]: 0.00010791, [6] [build]: 4.90004e-06 [elim_shapecalc]: 1.904e-05 [elim_not_effective]: 2.237e-05 [opt_reshape]: 1.14701e-05 [fold_const_symbol]: 1.82e-05 [renormalize]: 3.40049e-07 [pipeline_parallel_scheduler]: 1.06997e-06 [auto_monad_reorder]: 2.689e-05 [get_jit_bprop_graph]: 6.6997e-07 [rewriter_after_jit_bprop_graph]: 3.39933e-07 [eliminate_special_op_node]: 0.00055955 [distribtued_split]: 6.697e-05 [validate]: 3.53201e-05 [task_emit]: 0.0802853 [execute]: 8.87003e-06 Sums bootstrap : 0.000434s : 0.48% type_inference : 0.005385s : 5.94% auto_monad : 0.000134s : 0.15% graph_reusing : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000020s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000037s : 0.04% optimize.opt_a.loop_unroll : 0.000034s : 0.04% optimize.opt_a.a_1 : 0.000582s : 0.64% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000010s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000243s : 0.27% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.01% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000023s : 0.03% optimize.opt_a.virtual_dataset : 0.000018s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000024s : 0.03% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000036s : 0.04% optimize.opt_a.before_grad : 0.000035s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000025s : 0.03% optimize.opt_a.a_after_grad : 0.000035s : 0.04% optimize.opt_a.special_op_eliminate : 0.000020s : 0.02% optimize.opt_a.renormalize : 0.000609s : 0.67% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.05% optimize.opt_a.cse : 0.000054s : 0.06% optimize.opt_a.a_3 : 0.000112s : 0.12% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000123s : 0.14% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000179s : 0.20% optimize.opt_b.b_2 : 0.000011s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000022s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000004s : 0.00% optimize.cconv : 0.000018s : 0.02% optimize.loop_unroll : 0.000518s : 0.57% optimize.opt_after_cconv.c_1 : 0.000058s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000027s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000060s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000061s : 0.07% optimize.cse_after_recomputation.cse : 0.000021s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000028s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000044s : 0.05% optimize.add_comm_op_reuse_tag : 0.000003s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000070s : 0.08% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000019s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000022s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000018s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.03% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000560s : 0.62% distribtued_split : 0.000067s : 0.07% validate : 0.000035s : 0.04% task_emit : 0.080285s : 88.49% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000127 63 3.75% : 0.000005s : 2: substitution.depend_value_elim 3.13% : 0.000004s : 5: substitution.elim_not_effective 1.99% : 0.000003s : 5: substitution.fold_const_symbol 6.29% : 0.000008s : 6: substitution.graph_param_transform 48.40% : 0.000062s : 1: substitution.inline 4.29% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.13% : 0.000004s : 6: substitution.load_eliminater 2.08% : 0.000003s : 2: substitution.reduce_all_const_elim 8.17% : 0.000010s : 10: substitution.remove_not_recompute_node 1.89% : 0.000002s : 2: substitution.replace_old_param 8.29% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.58% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.005359 2 94.36% : 0.005056s : 1: type_inference.infer 5.64% : 0.000302s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000060 1 100.00% : 0.000060s : 1: match.inline ------[predicate.] 0.000241 1420 0.93% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.88% : 0.000002s : 13: predicate.addn_zero_filter 0.71% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.37% : 0.000006s : 25: predicate.arithmetic_simplify 0.98% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.50% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.20% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.67% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.21% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.05% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.05% : 0.000003s : 19: predicate.environ_get_depend_swap 1.89% : 0.000005s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.29% : 0.000003s : 14: predicate.float_depend_g_call 0.68% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.92% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.29% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.17% : 0.000003s : 12: predicate.less_batch_normalization 1.74% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.31% : 0.000006s : 38: predicate.load_eliminater 1.40% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.17% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.79% : 0.000002s : 12: predicate.merge_addn 0.85% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.83% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.56% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.24% : 0.000003s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.06% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.93% : 0.000002s : 13: predicate.reshape_eliminate 0.87% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.61% : 0.000001s : 6: predicate.row_tensor_eliminate 0.97% : 0.000002s : 12: predicate.same_eliminate 0.45% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.05% : 0.000003s : 12: predicate.shard_identity_eliminate 1.40% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.13% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.03% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000006s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.95% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.12% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.91% : 0.000002s : 13: predicate.transpose_eliminate 1.76% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.77% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.61% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.52% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.32% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.87% : 0.000002s : 12: predicate.virtual_output_eliminate 0.58% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000206 4 5.35% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.65% : 0.000195s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.107681 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000006s : 1: add_comm_op_reuse_tag 0.06% : 0.000066s : 1: add_recomputation 0.03% : 0.000032s : 1: assign_add_opt 0.14% : 0.000146s : 1: auto_monad 0.03% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000005s : 1: bias_add_comm_swap 0.43% : 0.000466s : 1: bootstrap 0.02% : 0.000022s : 1: cconv 0.05% : 0.000049s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000036s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.07% : 0.000076s : 1: distribtued_split 0.53% : 0.000573s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000009s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000010s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000005s : 1: interleave_parallel_branches 0.01% : 0.000006s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000005s : 1: label_micro_interleaved_index 0.49% : 0.000528s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.01% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.13% : 0.001219s : 80: opt.transform.opt_a 0.05% : 0.000056s : 1: opt.transform.opt_after_cconv 0.16% : 0.000169s : 27: opt.transform.opt_b 0.05% : 0.000059s : 1: opt.transform.opt_trans_graph 0.04% : 0.000044s : 3: opt.transform.special_op_eliminate 0.06% : 0.000064s : 4: opt.transform.symbol_engine_opt 6.66% : 0.007174s : 1: opt_a 0.14% : 0.000152s : 1: opt_after_cconv 0.25% : 0.000272s : 1: opt_b 8.51% : 0.009165s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000007s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000008s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000008s : 1: overlap_param_gather 0.07% : 0.000075s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000009s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000024s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.01% : 0.000016s : 1: remove_dup_value 0.31% : 0.000333s : 1: renormalize.infer 0.25% : 0.000270s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.12% : 0.000128s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000117s : 1: symbol_engine_optimizer 74.59% : 0.080318s : 1: task_emit 0.07% : 0.000081s : 1: tuple_transform 5.02% : 0.005402s : 1: type_inference 0.07% : 0.000071s : 1: validate . TotalTime = 0.0979948, [21] [bootstrap]: 0.00053592 [type_inference]: 0.00528258 [auto_monad]: 0.00019668 [graph_reusing]: 1.81992e-06 [inline]: 1.06997e-06 [parallel-infer-symbol]: 1.80001e-06 [pre_auto_parallel]: 2.61001e-05 [insert-virtual-dataset]: 2.09e-06 [parallel-infer-symbol-second]: 3.60073e-07 [dataset_repeat_opt]: 1.15996e-06 [pipeline_split]: 1.36998e-06 [optimize]: 0.00886519, [52] [py_interpret_to_execute]: 1.696e-05 [rewriter_before_opt_a]: 4.92501e-05 [opt_a]: 0.00683938, [2] [Cycle 1]: 0.00178455, [43] [expand_dump_flag]: 2.60992e-06 [switch_simplify]: 2.80599e-05 [loop_unroll]: 2.38899e-05 [a_1]: 0.00035929 [recompute_prepare]: 8.47003e-06 [updatestate_depend_eliminate]: 8.31997e-06 [updatestate_assign_eliminate]: 2.611e-05 [updatestate_loads_eliminate]: 7.23999e-06 [parameter_eliminate]: 2.96999e-06 [a_2]: 0.00012787 [accelerated_algorithm]: 9.04e-06 [shard]: 2.30991e-06 [meta_shard_fg_expand]: 4.27e-06 [shard_inline]: 8.98994e-06 [auto_parallel]: 1.329e-05 [parallel]: 6.26e-06 [flash_sp]: 8.68004e-06 [merge_comm]: 9.10007e-06 [allreduce_fusion]: 6.61996e-06 [matmul_add_comm_reduction]: 1.15901e-05 [allreduce_slice_to_reducescatter]: 4.09898e-07 [virtual_shard_identity]: 1.164e-05 [virtual_dataset]: 9.48005e-06 [get_grad_eliminate_]: 9.49006e-06 [virtual_output]: 1.32e-05 [merge_forward]: 7.23009e-06 [cell_reuse_recompute_pass]: 2.03005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.91301e-05 [before_grad]: 2.198e-05 [inplace_validation]: 7.09004e-06 [meta_fg_expand]: 5.02996e-06 [inplace_validation_after_expand]: 7.01007e-06 [flash_sp_send_recv_attached]: 2.21003e-06 [receive_attached]: 3.76999e-06 [after_resolve]: 1.37499e-05 [a_after_grad]: 2.18401e-05 [special_op_eliminate]: 1.00201e-05 [renormalize]: 0.00055507 [add_forward_monad_depend]: 3.79991e-06 [auto_monad_grad]: 2.14006e-06 [auto_monad_eliminator]: 2.839e-05 [cse]: 2.72801e-05 [a_3]: 8.919e-05 [Cycle 2]: 0.0007857, [43] [expand_dump_flag]: 1.10001e-06 [switch_simplify]: 9.70997e-06 [loop_unroll]: 7.49005e-06 [a_1]: 0.00020194 [recompute_prepare]: 7.50995e-06 [updatestate_depend_eliminate]: 6.33998e-06 [updatestate_assign_eliminate]: 5.04998e-06 [updatestate_loads_eliminate]: 5.62996e-06 [parameter_eliminate]: 1.33005e-06 [a_2]: 0.00010574 [accelerated_algorithm]: 8.27003e-06 [shard]: 1.21002e-06 [meta_shard_fg_expand]: 2.61993e-06 [shard_inline]: 7.80006e-06 [auto_parallel]: 1.015e-05 [parallel]: 3.29001e-06 [flash_sp]: 3.24997e-06 [merge_comm]: 6.14999e-06 [allreduce_fusion]: 4.74998e-06 [matmul_add_comm_reduction]: 1.26499e-05 [allreduce_slice_to_reducescatter]: 3.69968e-07 [virtual_shard_identity]: 9.09006e-06 [virtual_dataset]: 7.71007e-06 [get_grad_eliminate_]: 7.22997e-06 [virtual_output]: 7.42998e-06 [merge_forward]: 4.40993e-06 [cell_reuse_recompute_pass]: 1.89e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.532e-05 [before_grad]: 1.23901e-05 [inplace_validation]: 4.08001e-06 [meta_fg_expand]: 4.80004e-06 [inplace_validation_after_expand]: 5.42006e-06 [flash_sp_send_recv_attached]: 9.29926e-07 [receive_attached]: 7.10017e-07 [after_resolve]: 1.003e-05 [a_after_grad]: 1.14799e-05 [special_op_eliminate]: 7.17002e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 1.06008e-06 [auto_monad_grad]: 1.25007e-06 [auto_monad_eliminator]: 1.956e-05 [cse]: 2.137e-05 [a_3]: 4.967e-05 [py_interpret_to_execute_after_opt_a]: 1.195e-05 [slice_cell_reuse_recomputed_activation]: 2.22994e-06 [rewriter_after_opt_a]: 0.00012588 [convert_after_rewriter]: 1.073e-05 [order_py_execute_after_rewriter]: 5.28002e-06 [opt_b]: 0.00025951, [1] [Cycle 1]: 0.00025386, [7] [b_1]: 0.00016914 [b_2]: 1.11499e-05 [updatestate_depend_eliminate]: 5.54998e-06 [updatestate_assign_eliminate]: 6.16e-06 [updatestate_loads_eliminate]: 6.26e-06 [renormalize]: 2.19909e-07 [cse]: 2.352e-05 [optimize_parallel_all_gather_comm]: 9.79996e-06 [overlap_param_gather]: 2.71003e-06 [cconv]: 2.098e-05 [loop_unroll]: 0.00049795 [opt_after_cconv]: 0.00014481, [1] [Cycle 1]: 0.00013833, [7] [c_1]: 5.468e-05 [parameter_eliminate]: 2.37999e-06 [updatestate_depend_eliminate]: 8.28994e-06 [updatestate_assign_eliminate]: 5.88002e-06 [updatestate_loads_eliminate]: 6.50005e-06 [cse]: 2.438e-05 [renormalize]: 4.39934e-07 [remove_dup_value]: 1.25001e-05 [tuple_transform]: 7.27399e-05, [1] [Cycle 1]: 6.81101e-05, [2] [d_1]: 5.741e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 2.01992e-06 [add_cache_embedding]: 1.418e-05 [add_recomputation]: 8.95399e-05 [cse_after_recomputation]: 3.01299e-05, [1] [Cycle 1]: 2.532e-05, [1] [cse]: 2.02099e-05 [environ_conv]: 7.19994e-06 [swap_dp_allreduce_reducescatter]: 8.99995e-06 [bias_add_comm_swap]: 1.89e-06 [label_micro_interleaved_index]: 2.20991e-06 [label_fine_grained_interleaved_index]: 1.87999e-06 [merge_cast_opt]: 1.12003e-06 [slice_recompute_activation]: 2.07999e-06 [micro_interleaved_order_control]: 1.88011e-06 [assign_add_opt]: 2.859e-05 [ForceFp32Comm]: 8.69972e-07 [remove_cast_before_assign_add]: 7.96991e-06 [full_micro_interleaved_order_control]: 1.50001e-06 [reorder_send_recv_between_fp_bp]: 1.84006e-06 [comm_op_add_attrs]: 3.715e-05 [add_comm_op_reuse_tag]: 3.05008e-06 [interleave_split_concat_branches]: 6.3004e-07 [interleave_parallel_branches]: 7.00005e-07 [overlap_opt_shard_in_pipeline]: 1.471e-05 [overlap_opt_shard_grad_in_pipeline]: 1.61002e-06 [control_data_broadcast_order]: 9.2003e-07 [grouped_pairwise_exchange_alltoall]: 8.64e-06 [offloading_packed_experts]: 1.40001e-06 [overlap_recompute_and_grad_model_parallel]: 1.55997e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.05996e-06 [overlap_recompute_allgather_and_fa_grad]: 8.635e-05 [overlap_grad_ring_attention]: 1.45996e-06 [overlap_grad_flash_sp]: 2.609e-05 [begin_end_overlap_inline]: 5.59958e-07 [split_matmul_comm_elemetwise]: 1.72004e-06 [split_layernorm_comm]: 1.44995e-06 [handle_group_info]: 4.94998e-06 [symbol_engine_optimizer]: 0.0001078, [1] [Cycle 1]: 0.00010209, [6] [build]: 4.37e-06 [elim_shapecalc]: 1.442e-05 [elim_not_effective]: 2.25699e-05 [opt_reshape]: 1.08499e-05 [fold_const_symbol]: 1.687e-05 [renormalize]: 3.30037e-07 [pipeline_parallel_scheduler]: 1.56998e-06 [auto_monad_reorder]: 2.892e-05 [get_jit_bprop_graph]: 4.50062e-07 [rewriter_after_jit_bprop_graph]: 4.10015e-07 [eliminate_special_op_node]: 0.00052133 [distribtued_split]: 3.64501e-05 [validate]: 3.57799e-05 [task_emit]: 0.082187 [execute]: 1.083e-05 Sums bootstrap : 0.000536s : 0.58% type_inference : 0.005283s : 5.70% auto_monad : 0.000197s : 0.21% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000049s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000038s : 0.04% optimize.opt_a.loop_unroll : 0.000031s : 0.03% optimize.opt_a.a_1 : 0.000561s : 0.61% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000031s : 0.03% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000234s : 0.25% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.01% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000024s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000021s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000017s : 0.02% optimize.opt_a.virtual_output : 0.000021s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000034s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000024s : 0.03% optimize.opt_a.a_after_grad : 0.000033s : 0.04% optimize.opt_a.special_op_eliminate : 0.000017s : 0.02% optimize.opt_a.renormalize : 0.000555s : 0.60% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000048s : 0.05% optimize.opt_a.cse : 0.000049s : 0.05% optimize.opt_a.a_3 : 0.000139s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000012s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000126s : 0.14% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000169s : 0.18% optimize.opt_b.b_2 : 0.000011s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000024s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000010s : 0.01% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000021s : 0.02% optimize.loop_unroll : 0.000498s : 0.54% optimize.opt_after_cconv.c_1 : 0.000055s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.cse : 0.000024s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.01% optimize.tuple_transform.d_1 : 0.000057s : 0.06% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000090s : 0.10% optimize.cse_after_recomputation.cse : 0.000020s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000009s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000008s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000037s : 0.04% optimize.add_comm_op_reuse_tag : 0.000003s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000015s : 0.02% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000086s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000026s : 0.03% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000004s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000023s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000011s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000017s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000521s : 0.56% distribtued_split : 0.000036s : 0.04% validate : 0.000036s : 0.04% task_emit : 0.082187s : 88.72% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000134 63 4.24% : 0.000006s : 2: substitution.depend_value_elim 2.44% : 0.000003s : 5: substitution.elim_not_effective 2.37% : 0.000003s : 5: substitution.fold_const_symbol 5.06% : 0.000007s : 6: substitution.graph_param_transform 49.57% : 0.000066s : 1: substitution.inline 4.60% : 0.000006s : 10: substitution.j_node_and_user_rematch 2.93% : 0.000004s : 6: substitution.load_eliminater 2.04% : 0.000003s : 2: substitution.reduce_all_const_elim 7.07% : 0.000009s : 10: substitution.remove_not_recompute_node 2.33% : 0.000003s : 2: substitution.replace_old_param 8.27% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 9.09% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.005254 2 94.06% : 0.004942s : 1: type_inference.infer 5.94% : 0.000312s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000229 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 1.06% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.20% : 0.000005s : 25: predicate.arithmetic_simplify 0.77% : 0.000002s : 13: predicate.cast_eliminate 0.84% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.20% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.87% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.83% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 1.95% : 0.000004s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.77% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.65% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.48% : 0.000006s : 38: predicate.load_eliminater 1.19% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.27% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.79% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.75% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.55% : 0.000001s : 6: predicate.parallel_virtual_node 1.17% : 0.000003s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.17% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.99% : 0.000002s : 12: predicate.shard_identity_eliminate 1.52% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.46% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.74% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.62% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.44% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.56% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.94% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000196 4 7.05% : 0.000014s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.95% : 0.000182s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.108879 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000006s : 1: add_comm_op_reuse_tag 0.09% : 0.000095s : 1: add_recomputation 0.03% : 0.000032s : 1: assign_add_opt 0.19% : 0.000211s : 1: auto_monad 0.03% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.52% : 0.000565s : 1: bootstrap 0.02% : 0.000025s : 1: cconv 0.04% : 0.000043s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000035s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.04% : 0.000044s : 1: distribtued_split 0.49% : 0.000535s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000006s : 1: label_micro_interleaved_index 0.47% : 0.000508s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.01% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.08% : 0.001174s : 80: opt.transform.opt_a 0.05% : 0.000053s : 1: opt.transform.opt_after_cconv 0.15% : 0.000159s : 27: opt.transform.opt_b 0.05% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000041s : 3: opt.transform.special_op_eliminate 0.06% : 0.000060s : 4: opt.transform.symbol_engine_opt 6.29% : 0.006843s : 1: opt_a 0.14% : 0.000150s : 1: opt_after_cconv 0.24% : 0.000263s : 1: opt_b 8.15% : 0.008874s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.03% : 0.000031s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.02% : 0.000019s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000007s : 1: overlap_param_gather 0.08% : 0.000092s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000007s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000016s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000299s : 1: renormalize.infer 0.23% : 0.000251s : 1: renormalize.specialize 0.00% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.12% : 0.000131s : 1: rewriter_after_opt_a 0.05% : 0.000054s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000006s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000111s : 1: symbol_engine_optimizer 75.51% : 0.082217s : 1: task_emit 0.07% : 0.000077s : 1: tuple_transform 4.87% : 0.005302s : 1: type_inference 0.07% : 0.000072s : 1: validate [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:29.462.023 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:29.462.509 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:29.462.510 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:29.462.634 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:29.462.634 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:29.462.718 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:29.463.026 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:29.463.142 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.0801446, [21] [bootstrap]: 0.00032227 [type_inference]: 0.00258535 [auto_monad]: 0.0001347 [graph_reusing]: 1.27999e-06 [inline]: 1.06997e-06 [parallel-infer-symbol]: 1.12003e-06 [pre_auto_parallel]: 2.60799e-05 [insert-virtual-dataset]: 2.34006e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 9.30042e-07 [pipeline_split]: 1.16997e-06 [optimize]: 0.0073877, [52] [py_interpret_to_execute]: 1.592e-05 [rewriter_before_opt_a]: 3.426e-05 [opt_a]: 0.00555797, [2] [Cycle 1]: 0.00158597, [43] [expand_dump_flag]: 3.54007e-06 [switch_simplify]: 3.038e-05 [loop_unroll]: 1.397e-05 [a_1]: 0.00035527 [recompute_prepare]: 9.36002e-06 [updatestate_depend_eliminate]: 8.25e-06 [updatestate_assign_eliminate]: 5.43008e-06 [updatestate_loads_eliminate]: 5.96e-06 [parameter_eliminate]: 2.85008e-06 [a_2]: 0.00012143 [accelerated_algorithm]: 8.68004e-06 [shard]: 2.46998e-06 [meta_shard_fg_expand]: 4.24008e-06 [shard_inline]: 8.70996e-06 [auto_parallel]: 1.231e-05 [parallel]: 8.06001e-06 [flash_sp]: 1.04599e-05 [merge_comm]: 8.33999e-06 [allreduce_fusion]: 5.99993e-06 [matmul_add_comm_reduction]: 1.061e-05 [allreduce_slice_to_reducescatter]: 4.69969e-07 [virtual_shard_identity]: 9.74e-06 [virtual_dataset]: 8.39995e-06 [get_grad_eliminate_]: 7.91997e-06 [virtual_output]: 8.07003e-06 [merge_forward]: 5.72007e-06 [cell_reuse_recompute_pass]: 1.84006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.718e-05 [before_grad]: 1.384e-05 [inplace_validation]: 5.4501e-06 [meta_fg_expand]: 5.56e-06 [inplace_validation_after_expand]: 6.62007e-06 [flash_sp_send_recv_attached]: 5.13997e-06 [receive_attached]: 2.90002e-06 [after_resolve]: 1.105e-05 [a_after_grad]: 1.285e-05 [special_op_eliminate]: 8.20996e-06 [renormalize]: 0.00046362 [add_forward_monad_depend]: 3.73996e-06 [auto_monad_grad]: 2.10002e-06 [auto_monad_eliminator]: 3.255e-05 [cse]: 3.40299e-05 [a_3]: 6.04599e-05 [Cycle 2]: 0.00086553, [43] [expand_dump_flag]: 1.15007e-06 [switch_simplify]: 9.27004e-06 [loop_unroll]: 7.96001e-06 [a_1]: 0.00021216 [recompute_prepare]: 7.76001e-06 [updatestate_depend_eliminate]: 6.40005e-06 [updatestate_assign_eliminate]: 5.51995e-06 [updatestate_loads_eliminate]: 5.38002e-06 [parameter_eliminate]: 1.36008e-06 [a_2]: 0.00010825 [accelerated_algorithm]: 9.02999e-06 [shard]: 1.21992e-06 [meta_shard_fg_expand]: 2.70002e-06 [shard_inline]: 9.18005e-06 [auto_parallel]: 1.18e-05 [parallel]: 3.62995e-06 [flash_sp]: 2.34996e-06 [merge_comm]: 6.00994e-06 [allreduce_fusion]: 4.83997e-06 [matmul_add_comm_reduction]: 7.95e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 9.67004e-06 [virtual_dataset]: 7.80995e-06 [get_grad_eliminate_]: 7.53999e-06 [virtual_output]: 7.98993e-06 [merge_forward]: 4.23007e-06 [cell_reuse_recompute_pass]: 1.96998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.616e-05 [before_grad]: 1.275e-05 [inplace_validation]: 4.24008e-06 [meta_fg_expand]: 5.27001e-06 [inplace_validation_after_expand]: 5.99003e-06 [flash_sp_send_recv_attached]: 1.02003e-06 [receive_attached]: 8.79983e-07 [after_resolve]: 1.069e-05 [a_after_grad]: 1.235e-05 TotalTime = 0.0801452, [21] [bootstrap]: 0.0003229 [type_inference]: 0.00259541 [auto_monad]: 0.00012382 [graph_reusing]: 2.44007e-06 [inline]: 1.51002e-06 [parallel-infer-symbol]: 2.14996e-06 [pre_auto_parallel]: 2.669e-05 [insert-virtual-dataset]: 3.04007e-06 [parallel-infer-symbol-second]: 3.40049e-07 [dataset_repeat_opt]: 1.41002e-06 [pipeline_split]: 1.67999e-06 [optimize]: 0.00738774, [52] [py_interpret_to_execute]: 1.81301e-05 [rewriter_before_opt_a]: 3.24701e-05 [opt_a]: 0.00553125, [2] [Cycle 1]: 0.00158187, [43] [expand_dump_flag]: 3.69002e-06 [switch_simplify]: 3.041e-05 [loop_unroll]: 1.305e-05 [a_1]: 0.00035026 [recompute_prepare]: 9.27993e-06 [updatestate_depend_eliminate]: 9.06002e-06 [updatestate_assign_eliminate]: 6.16e-06 [updatestate_loads_eliminate]: 7.52008e-06 [parameter_eliminate]: 3.91004e-06 [a_2]: 0.00012122 [accelerated_algorithm]: 8.69995e-06 [shard]: 1.91992e-06 [meta_shard_fg_expand]: 4.22006e-06 [shard_inline]: 9.18994e-06 [auto_parallel]: 1.27e-05 [parallel]: 7.49005e-06 [flash_sp]: 1.046e-05 [merge_comm]: 8.66002e-06 [allreduce_fusion]: 5.6799e-06 [matmul_add_comm_reduction]: 1.071e-05 [allreduce_slice_to_reducescatter]: 4.4005e-07 [virtual_shard_identity]: 9.75002e-06 [virtual_dataset]: 8.3599e-06 [get_grad_eliminate_]: 7.87003e-06 [virtual_output]: 7.9301e-06 [merge_forward]: 5.82007e-06 [cell_reuse_recompute_pass]: 1.72004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.747e-05 [before_grad]: 1.385e-05 [inplace_validation]: 5.07e-06 [meta_fg_expand]: 5.73997e-06 [inplace_validation_after_expand]: 6.34999e-06 [flash_sp_send_recv_attached]: 5.28991e-06 [receive_attached]: 2.78e-06 [after_resolve]: 1.166e-05 [a_after_grad]: 1.32601e-05 [special_op_eliminate]: 8.36002e-06 [renormalize]: 0.00046351 [add_forward_monad_depend]: 3.83006e-06 [auto_monad_grad]: 1.39e-06 [auto_monad_eliminator]: 3.213e-05 [cse]: 3.37301e-05 [a_3]: 5.68901e-05 [Cycle 2]: 0.00084134, [43] [expand_dump_flag]: 9.40054e-07 [switch_simplify]: 9.14e-06 [loop_unroll]: 7.77002e-06 [a_1]: 0.00020329 [recompute_prepare]: 7.99005e-06 [updatestate_depend_eliminate]: 6.09003e-06 [updatestate_assign_eliminate]: 4.70993e-06 [updatestate_loads_eliminate]: 5.40004e-06 [parameter_eliminate]: 1.43005e-06 [a_2]: 0.00010606 [accelerated_algorithm]: 8.65001e-06 [shard]: 1.30001e-06 [meta_shard_fg_expand]: 2.50002e-06 [shard_inline]: 7.90996e-06 [auto_parallel]: 6.08501e-05 [parallel]: 3.45998e-06 [flash_sp]: 3.93996e-06 [merge_comm]: 5.96e-06 [allreduce_fusion]: 4.79992e-06 [matmul_add_comm_reduction]: 8.0301e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 9.26002e-06 [virtual_dataset]: 7.82998e-06 [get_grad_eliminate_]: 7.50995e-06 [virtual_output]: 7.29994e-06 [merge_forward]: 4.40003e-06 [cell_reuse_recompute_pass]: 1.94006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.6e-05 [before_grad]: 1.232e-05 [inplace_validation]: 4.21004e-06 [meta_fg_expand]: 4.72006e-06 [inplace_validation_after_expand]: 5.47001e-06 [flash_sp_send_recv_attached]: 8.60076e-07 [receive_attached]: 8.79983e-07 [after_resolve]: 1.03799e-05 [a_after_grad]: 1.15599e-05 [special_op_eliminate]: 7.45e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 1.01002e-06 [auto_monad_grad]: 1.21992e-06 [auto_monad_eliminator]: 1.885e-05 [cse]: 2.08099e-05 [a_3]: 4.85999e-05 [py_interpret_to_execute_after_opt_a]: 9.17003e-06 [slice_cell_reuse_recomputed_activation]: 1.24006e-06 [rewriter_after_opt_a]: 0.00011645 [convert_after_rewriter]: 1.04001e-05 [order_py_execute_after_rewriter]: 5.09992e-06 [opt_b]: 0.00025079, [1] [Cycle 1]: 0.00024523, [7] [b_1]: 0.00016682 [b_2]: 9.47004e-06 [updatestate_depend_eliminate]: 5.53997e-06 [updatestate_assign_eliminate]: 4.80993e-06 [updatestate_loads_eliminate]: 5.64009e-06 [renormalize]: 3.7998e-07 [cse]: 2.025e-05 [optimize_parallel_all_gather_comm]: 8.18004e-06 [overlap_param_gather]: 6.49947e-07 [cconv]: 1.72601e-05 [loop_unroll]: 0.00049931 [opt_after_cconv]: 0.00013708, [1] [Cycle 1]: 0.00013124, [7] [c_1]: 5.455e-05 [parameter_eliminate]: 2.23995e-06 [updatestate_depend_eliminate]: 7.88993e-06 [updatestate_assign_eliminate]: 5.61995e-06 [updatestate_loads_eliminate]: 5.87001e-06 [cse]: 2.259e-05 [renormalize]: 3.7998e-07 [remove_dup_value]: 1.41499e-05 [tuple_transform]: 7.49499e-05, [1] [Cycle 1]: 7.05301e-05, [2] [d_1]: 6.08901e-05 [renormalize]: 1.40048e-07 [partial_unused_args_eliminate]: 1.41992e-06 [add_cache_embedding]: 1.08799e-05 [add_recomputation]: 6.113e-05 [cse_after_recomputation]: 2.706e-05, [1] [Cycle 1]: 2.25201e-05, [1] [cse]: 1.732e-05 [environ_conv]: 5.96e-06 [swap_dp_allreduce_reducescatter]: 6.44999e-06 [bias_add_comm_swap]: 1.44995e-06 [label_micro_interleaved_index]: 2.15007e-06 [label_fine_grained_interleaved_index]: 2.10002e-06 [merge_cast_opt]: 1.12993e-06 [slice_recompute_activation]: 2.01003e-06 [micro_interleaved_order_control]: 1.81003e-06 [assign_add_opt]: 2.868e-05 [ForceFp32Comm]: 6.70087e-07 [remove_cast_before_assign_add]: 7.31007e-06 [full_micro_interleaved_order_control]: 2.12993e-06 [reorder_send_recv_between_fp_bp]: 2.04996e-06 [comm_op_add_attrs]: 2.762e-05 [add_comm_op_reuse_tag]: 1.90001e-06 [interleave_split_concat_branches]: 1.15996e-06 [interleave_parallel_branches]: 9.10019e-07 [overlap_opt_shard_in_pipeline]: 9.69972e-07 [overlap_opt_shard_grad_in_pipeline]: 2.09e-06 [control_data_broadcast_order]: 8.30041e-07 [grouped_pairwise_exchange_alltoall]: 9.41008e-06 [offloading_packed_experts]: 2.53995e-06 [overlap_recompute_and_grad_model_parallel]: 1.93005e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.69972e-07 [overlap_recompute_allgather_and_fa_grad]: 7.225e-05 [overlap_grad_ring_attention]: 1.84996e-06 [overlap_grad_flash_sp]: 1.431e-05 [begin_end_overlap_inline]: 8.40053e-07 [split_matmul_comm_elemetwise]: 1.92004e-06 [split_layernorm_comm]: 1.90001e-06 [handle_group_info]: 4.87e-06 [symbol_engine_optimizer]: 9.143e-05, [1] [Cycle 1]: 8.652e-05, [6] [build]: 4.58001e-06 [elim_shapecalc]: 1.46501e-05 [elim_not_effective]: 1.65399e-05 [opt_reshape]: 9.28005e-06 [fold_const_symbol]: 1.382e-05 [renormalize]: 1.79978e-07 [pipeline_parallel_scheduler]: 1.99e-06 [auto_monad_reorder]: 3.113e-05 [get_jit_bprop_graph]: 4.1991e-07 [rewriter_after_jit_bprop_graph]: 3.89991e-07 [eliminate_special_op_node]: 0.00051807 [distribtued_split]: 3.32e-05 [validate]: 3.059e-05 [task_emit]: 0.0688053 [execute]: 1.16e-05 Sums bootstrap : 0.000322s : 0.42% type_inference : 0.002585s : 3.40% auto_monad : 0.000135s : 0.18% graph_reusing [special_op_eliminate]: 7.60995e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 1.12003e-06 [auto_monad_grad]: 1.82993e-06 [auto_monad_eliminator]: 2.06101e-05 [cse]: 2.066e-05 [a_3]: 4.858e-05 [py_interpret_to_execute_after_opt_a]: 9.80007e-06 [slice_cell_reuse_recomputed_activation]: 2.23995e-06 [rewriter_after_opt_a]: 0.00013747 [convert_after_rewriter]: 8.38994e-06 [order_py_execute_after_rewriter]: 6.17001e-06 [opt_b]: 0.00024412, [1] [Cycle 1]: 0.00023828, [7] [b_1]: 0.00016458 [b_2]: 1.003e-05 [updatestate_depend_eliminate]: 5.67001e-06 [updatestate_assign_eliminate]: 4.50993e-06 [updatestate_loads_eliminate]: 5.23007e-06 [renormalize]: 3.29921e-07 [cse]: 1.74401e-05 [optimize_parallel_all_gather_comm]: 8.77003e-06 [overlap_param_gather]: 1.41002e-06 [cconv]: 2.484e-05 [loop_unroll]: 0.00050849 [opt_after_cconv]: 0.00013405, [1] [Cycle 1]: 0.0001275, [7] [c_1]: 5.28099e-05 [parameter_eliminate]: 2.34996e-06 [updatestate_depend_eliminate]: 8.30996e-06 [updatestate_assign_eliminate]: 4.72006e-06 [updatestate_loads_eliminate]: 5.88002e-06 [cse]: 2.126e-05 [renormalize]: 4.50062e-07 [remove_dup_value]: 8.69995e-06 [tuple_transform]: 6.74301e-05, [1] [Cycle 1]: 6.335e-05, [2] [d_1]: 5.45899e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 2.24996e-06 [add_cache_embedding]: 1.333e-05 [add_recomputation]: 6.35999e-05 [cse_after_recomputation]: 2.646e-05, [1] [Cycle 1]: 2.159e-05, [1] [cse]: 1.691e-05 [environ_conv]: 6.63009e-06 [swap_dp_allreduce_reducescatter]: 7.25e-06 [bias_add_comm_swap]: 2.33995e-06 [label_micro_interleaved_index]: 1.94996e-06 [label_fine_grained_interleaved_index]: 2.06998e-06 [merge_cast_opt]: 1.04995e-06 [slice_recompute_activation]: 2.13995e-06 [micro_interleaved_order_control]: 1.72004e-06 [assign_add_opt]: 2.863e-05 [ForceFp32Comm]: 8.79983e-07 [remove_cast_before_assign_add]: 6.97991e-06 [full_micro_interleaved_order_control]: 2.22994e-06 [reorder_send_recv_between_fp_bp]: 2.16998e-06 [comm_op_add_attrs]: 2.71299e-05 [add_comm_op_reuse_tag]: 2.07999e-06 [interleave_split_concat_branches]: 1.27999e-06 [interleave_parallel_branches]: 7.10017e-07 [overlap_opt_shard_in_pipeline]: 9.50065e-07 [overlap_opt_shard_grad_in_pipeline]: 2.43995e-06 [control_data_broadcast_order]: 1.09e-06 [grouped_pairwise_exchange_alltoall]: 9.52999e-06 [offloading_packed_experts]: 2.72994e-06 [overlap_recompute_and_grad_model_parallel]: 1.46998e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.20029e-07 [overlap_recompute_allgather_and_fa_grad]: 7.523e-05 [overlap_grad_ring_attention]: 1.10001e-06 [overlap_grad_flash_sp]: 1.15801e-05 [begin_end_overlap_inline]: 6.59958e-07 [split_matmul_comm_elemetwise]: 1.90001e-06 [split_layernorm_comm]: 1.88011e-06 [handle_group_info]: 4.99992e-06 [symbol_engine_optimizer]: 9.151e-05, [1] [Cycle 1]: 8.667e-05, [6] [build]: 4.59002e-06 [elim_shapecalc]: 1.364e-05 [elim_not_effective]: 1.75e-05 [opt_reshape]: 8.61008e-06 [fold_const_symbol]: 1.478e-05 [renormalize]: 3.49944e-07 [pipeline_parallel_scheduler]: 1.71002e-06 [auto_monad_reorder]: 3.09701e-05 [get_jit_bprop_graph]: 4.50062e-07 [rewriter_after_jit_bprop_graph]: 4.10015e-07 [eliminate_special_op_node]: 0.0004971 [distribtued_split]: 4.094e-05 [validate]: 3.30099e-05 [task_emit]: 0.0688126 [execute]: 1.151e-05 Sums bootstrap : 0.000323s : 0.42% type_inference : 0.002595s : 3.41% auto_monad : 0.000124s : 0.16% graph : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000567s : 0.75% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000230s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000018s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000464s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000116s : 0.15% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000167s : 0.22% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : _reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000018s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000554s : 0.73% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000227s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000074s : 0.10% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000464s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000053s : 0.07% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000137s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_elimi TotalTime = 0.0803739, [21] [bootstrap]: 0.00028799 [type_inference]: 0.00237868 [auto_monad]: 0.00011 [graph_reusing]: 2.07999e-06 [inline]: 1.12003e-06 [parallel-infer-symbol]: 1.61002e-06 [pre_auto_parallel]: 2.321e-05 [insert-virtual-dataset]: 2.09e-06 [parallel-infer-symbol-second]: 4.20026e-07 [dataset_repeat_opt]: 9.39937e-07 [pipeline_split]: 1.04005e-06 [optimize]: 0.00716235, [52] [py_interpret_to_execute]: 1.70199e-05 [rewriter_before_opt_a]: 3.158e-05 [opt_a]: 0.00534268, [2] [Cycle 1]: 0.00150968, [43] [expand_dump_flag]: 2.42004e-06 [switch_simplify]: 2.919e-05 [loop_unroll]: 1.373e-05 [a_1]: 0.00033327 [recompute_prepare]: 8.61997e-06 [updatestate_depend_eliminate]: 8.46002e-06 [updatestate_assign_eliminate]: 5.69003e-06 [updatestate_loads_eliminate]: 6.31006e-06 [parameter_eliminate]: 2.69001e-06 [a_2]: 0.0001186 [accelerated_algorithm]: 8.76002e-06 [shard]: 1.4999e-06 [meta_shard_fg_expand]: 3.68e-06 [shard_inline]: 8.81997e-06 [auto_parallel]: 1.13901e-05 [parallel]: 6.28992e-06 [flash_sp]: 8.38004e-06 [merge_comm]: 7.13998e-06 [allreduce_fusion]: 5.53997e-06 [matmul_add_comm_reduction]: 9.10996e-06 [allreduce_slice_to_reducescatter]: 3.69968e-07 [virtual_shard_identity]: 9.59006e-06 [virtual_dataset]: 7.75e-06 [get_grad_eliminate_]: 7.68004e-06 [virtual_output]: 7.59005e-06 [merge_forward]: 5.21005e-06 [cell_reuse_recompute_pass]: 1.40001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.76999e-05 [before_grad]: 1.397e-05 [inplace_validation]: 5.10993e-06 [meta_fg_expand]: 5.53997e-06 [inplace_validation_after_expand]: 5.62007e-06 [flash_sp_send_recv_attached]: 3.71004e-06 [receive_attached]: 2.26009e-06 [after_resolve]: 1.189e-05 [a_after_grad]: 1.33701e-05 [special_op_eliminate]: 8.58004e-06 [renormalize]: 0.00043967 [add_forward_monad_depend]: 2.65997e-06 [auto_monad_grad]: 1.50001e-06 [auto_monad_eliminator]: 2.54801e-05 [cse]: 2.651e-05 [a_3]: 5.84399e-05 [Cycle 2]: 0.00079706, [43] [expand_dump_flag]: 1.19e-06 [switch_simplify]: 9.22009e-06 [loop_unroll]: 7.57002e-06 [a_1]: 0.00020731 [recompute_prepare]: 7.20005e-06 [updatestate_depend_eliminate]: 5.71006e-06 [updatestate_assign_eliminate]: 5.08002e-06 [updatestate_loads_eliminate]: 4.71005e-06 [parameter_eliminate]: 1.31002e-06 [a_2]: 0.00010646 [accelerated_algorithm]: 8.72009e-06 [shard]: 1.03994e-06 [meta_shard_fg_expand]: 2.79001e-06 [shard_inline]: 8.10996e-06 [auto_parallel]: 1.006e-05 [parallel]: 3.23006e-06 [flash_sp]: 2.5999e-06 [merge_comm]: 5.56e-06 [allreduce_fusion]: 4.83997e-06 [matmul_add_comm_reduction]: 7.27002e-06 [allreduce_slice_to_reducescatter]: 2.60072e-07 [virtual_shard_identity]: 9.94001e-06 [virtual_dataset]: 7.82008e-06 [get_grad_eliminate_]: 7.18993e-06 [virtual_output]: 7.53999e-06 [merge_forward]: 4.28001e-06 [cell_reuse_recompute_pass]: 1.65997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.63401e-05 [before_grad]: 1.27e-05 [inplace_validation]: 4.30003e-06 [meta_fg_expand]: 4.82006e-06 [inplace_validation_after_expand]: 5.32996e-06 [flash_sp_send_recv_attached]: 9.89996e-07 [receive_attached]: 8.10018e-07 [after_resolve]: 1.055e-05 [a_after_grad]: 1. 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000499s : 0.66% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000061s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.10% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000015s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_ennate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000508s : 0.67% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000075s : 0.10% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.s228e-05 [special_op_eliminate]: 8.01007e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.2003e-07 [auto_monad_grad]: 1.00001e-06 [auto_monad_eliminator]: 1.717e-05 [cse]: 1.92301e-05 [a_3]: 5.10601e-05 [py_interpret_to_execute_after_opt_a]: 8.74e-06 [slice_cell_reuse_recomputed_activation]: 1.54995e-06 [rewriter_after_opt_a]: 0.00012518 [convert_after_rewriter]: 1.027e-05 [order_py_execute_after_rewriter]: 7.60006e-06 [opt_b]: 0.00025193, [1] [Cycle 1]: 0.00024625, [7] [b_1]: 0.0001687 [b_2]: 9.84001e-06 [updatestate_depend_eliminate]: 5.56e-06 [updatestate_assign_eliminate]: 4.23996e-06 [updatestate_loads_eliminate]: 5.14009e-06 [renormalize]: 3.20026e-07 [cse]: 1.889e-05 [optimize_parallel_all_gather_comm]: 7.53999e-06 [overlap_param_gather]: 9.00007e-07 [cconv]: 1.62501e-05 [loop_unroll]: 0.00049166 [opt_after_cconv]: 0.00013182, [1] [Cycle 1]: 0.00012643, [7] [c_1]: 5.33699e-05 [parameter_eliminate]: 1.72004e-06 [updatestate_depend_eliminate]: 7.13998e-06 [updatestate_assign_eliminate]: 5.13997e-06 [updatestate_loads_eliminate]: 5.34009e-06 [cse]: 2.036e-05 [renormalize]: 3.09898e-07 [remove_dup_value]: 1.134e-05 [tuple_transform]: 7.14901e-05, [1] [Cycle 1]: 6.726e-05, [2] [d_1]: 5.744e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.5999e-06 [add_cache_embedding]: 1.205e-05 [add_recomputation]: 5.355e-05 [cse_after_recomputation]: 2.738e-05, [1] [Cycle 1]: 2.276e-05, [1] [cse]: 1.732e-05 [environ_conv]: 6.37001e-06 [swap_dp_allreduce_reducescatter]: 6.88992e-06 [bias_add_comm_swap]: 1.71002e-06 [label_micro_interleaved_index]: 1.43005e-06 [label_fine_grained_interleaved_index]: 1.40001e-06 [merge_cast_opt]: 7.20029e-07 [slice_recompute_activation]: 1.32993e-06 [micro_interleaved_order_control]: 1.27999e-06 [assign_add_opt]: 2.65499e-05 [ForceFp32Comm]: 7.39936e-07 [remove_cast_before_assign_add]: 6.23998e-06 [full_micro_interleaved_order_control]: 1.20001e-06 [reorder_send_recv_between_fp_bp]: 1.11002e-06 [comm_op_add_attrs]: 2.233e-05 [add_comm_op_reuse_tag]: 1.69e-06 [interleave_split_concat_branches]: 5.10016e-07 [interleave_parallel_branches]: 5.79981e-07 [overlap_opt_shard_in_pipeline]: 8.5996e-07 [overlap_opt_shard_grad_in_pipeline]: 1.30001e-06 [control_data_broadcast_order]: 6.79982e-07 [grouped_pairwise_exchange_alltoall]: 7.48003e-06 [offloading_packed_experts]: 1.30001e-06 [overlap_recompute_and_grad_model_parallel]: 1.33005e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.79981e-07 [overlap_recompute_allgather_and_fa_grad]: 6.65999e-05 [overlap_grad_ring_attention]: 1.45996e-06 [overlap_grad_flash_sp]: 1.26801e-05 [begin_end_overlap_inline]: 4.89992e-07 [split_matmul_comm_elemetwise]: 1.21002e-06 [split_layernorm_comm]: 1.07998e-06 [handle_group_info]: 3.10002e-06 [symbol_engine_optimizer]: 9.054e-05, [1] [Cycle 1]: 8.58699e-05, [6] [build]: 4.42006e-06 [elim_shapecalc]: 1.342e-05 [elim_not_effective]: 1.65401e-05 [opt_reshape]: 9.02999e-06 [fold_const_symbol]: 1.41e-05 [renormalize]: 3.60073e-07 [pipeline_parallel_scheduler]: 9.70089e-07 [auto_monad_reorder]: 2.54699e-05 [get_jit_bprop_graph]: 3.10014e-07 [rewriter_after_jit_bprop_graph]: 5.00004e-07 [eliminate_special_op_node]: 0.00050454 [distribtued_split]: 3.558e-05 [validate]: 3.13701e-05 [task_emit]: 0.0695623 [execute]: 9.02999e-06 Sums bootstrap : 0.000288s : 0.38% type_inference : 0.002379s : 3.12% auto_monad : 0.000110s : 0.14% gine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000518s : 0.68% distribtued_split : 0.000033s : 0.04% validate : 0.000031s : 0.04% task_emit : 0.068805s : 90.57% execute : 0.000012s : 0.02% ymbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000497s : 0.65% distribtued_split : 0.000041s : 0.05% validate : 0.000033s : 0.04% task_emit : 0.068813s : 90.51% execute : 0.000012s : 0.02% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000023s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000017s : 0.02% optimize.opt_a.renormalize : 0.000440s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000043s : 0.06% optimize.opt_a.cse : 0.000046s : 0.06% optimize.opt_a.a_3 : 0.000110s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000125s : 0.16% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000008s : 0.01% optimize.opt_b.b_1 : 0.000169s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000492s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000057s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000027s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000022s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000067s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000025s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000505s : 0.66% distribtued_split : 0.000036s : 0.05% validate : 0.000031s : 0.04% task_emit : 0.069562s : 91.18% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000134 63 5.08% : 0.000007s : 2: substitution.depend_value_elim 1.86% : 0.000002s : 5: substitution.elim_not_effective 1.79% : 0.000002s : 5: substitution.fold_const_symbol 5.60% : 0.000007s : 6: substitution.graph_param_transform 51.54% : 0.000069s : 1: substitution.inline 4.22% : 0.000006s : 10: substitution.j_node_and_user_rematch 2.98% : 0.000004s : 6: substitution.load_eliminater 2.89% : 0.000004s : 2: substitution.reduce_all_const_elim 6.25% : 0.000008s : 10: substitution.remove_not_recompute_node 2.21% : 0.000003s : 2: substitution.replace_old_param 8.12% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.46% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002556 2 88.15% : 0.002253s : 1: type_inference.infer 11.85% : 0.000303s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000239 1420 0.86% : 0.000002s : 13: predicate.accumulaten_eliminater 1.10% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.84% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.23% : 0.000005s : 25: predicate.arithmetic_simplify 0.77% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.20% : 0.000000s : 6: predicate.const_output_eliminate 0.41% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.37% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.80% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.08% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.93% : 0.000005s : 31: predicate.environ_get_eliminate 1.20% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.04% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.46% : 0.000001s : 6: predicate.graph_param_transform 0.73% : 0.000002s : 12: predicate.incorporate_call 0.65% : 0.000002s : 12: predicate.incorporate_call_switch 5.79% : 0.000014s : 63: predicate.inline 1.05% : 0.000003s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000003s : 12: predicate.less_batch_normalization 1.66% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.29% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicat Time group info: ------[substitution.] 0.000137 63 4.92% : 0.000007s : 2: substitution.depend_value_elim 2.32% : 0.000003s : 5: substitution.elim_not_effective 2.48% : 0.000003s : 5: substitution.fold_const_symbol 5.24% : 0.000007s : 6: substitution.graph_param_transform 49.82% : 0.000068s : 1: substitution.inline 3.88% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.26% : 0.000004s : 6: substitution.load_eliminater 2.89% : 0.000004s : 2: substitution.reduce_all_const_elim 6.27% : 0.000009s : 10: substitution.remove_not_recompute_node 2.64% : 0.000004s : 2: substitution.replace_old_param 7.89% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.39% : 0.000012s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002569 2 87.71% : 0.002253s : 1: type_inference.infer 12.29% : 0.000316s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000231 1420 0.88% : 0.000002s : 13: predicate.accumulaten_eliminater 1.21% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.15% : 0.000005s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.40% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000005s : 31: predicate.environ_get_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.26% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.98% : 0.000002s : 12: predicate.get_grad_eliminate 0.26% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.70% : 0.000013s : 63: predicate.inline 1.02% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.75% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.69% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.81% : 0.000002s : 12: predicate.reduce_all_const_elim 1.15% : 0.000003s : 13: predicate.reduce_eliminate 0.63% : 0.000001s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.22% : 0.000001s : 6: predicate.reset_defer_inline 0.91% : 0.000002s : 13: predicate.reshape_eliminate 0.94% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000002s : 12: predicate.shard_identity_eliminate 1.33% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.15% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.38% : 0.000006s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.93% : 0.000002s : 14: predicate.switch_defer_inline 1.59% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.17% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.72% : 0.000002s : 13: predicate.transpose_eliminate 1.86% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.96% : 0.000005s : 25: predicate.tuple_list_get_item_const_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.87% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.82% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.53% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.48% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.49% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000168 4 6.75% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.25% : 0.000156s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089426 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.17% : 0.000148s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.39% : 0.000351s : 1: bootstrap 0.02% : 0.000021s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.00004e.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.67% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.53% : 0.000001s : 6: predicate.parallel_virtual_node 1.23% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.81% : 0.000002s : 12: predicate.reduce_all_const_elim 1.00% : 0.000002s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.09% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.63% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.00% : 0.000002s : 12: predicate.shard_identity_eliminate 1.48% : 0.000003s : 18: predicate.special_op_eliminate 1.01% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.06% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.24% : 0.000010s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.84% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.64% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.61% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.46% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000188 4 9.46% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.54% : 0.000170s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089386 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.15% : 0.000136s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000350s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.03% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.60% : 0.000533s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000509s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.27% : 0.001140s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.07% : 0.000059s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.22% : 0.005562s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.28% : 0.000254s : 1: opt_b 8.27% : 0.007396s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000008s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.09% : 0.000077s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.29% : 0.000255s : 1: renormalize.infer 0.23% : 0.000202s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000122s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000095s : 1: symbol_engine_optimizer 76.98% : 0.068836s : 1: task_emit 0.09% : 0.000078s : 1: tuple_transform 2.91% : 0.002603s : 1: type_inference 0.07% : 0.000062s : 1: validate 9s : 1: distribtued_split 0.57% : 0.000511s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.58% : 0.000518s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001117s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.19% : 0.005535s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.28% : 0.000247s : 1: opt_b 8.27% : 0.007396s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.09% : 0.000081s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.03% : 0.000022s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.28% : 0.000254s : 1: renormalize.infer 0.23% : 0.000203s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000143s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000094s : 1: symbol_engine_optimizer 77.02% : 0.068843s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.92% : 0.002614s : 1: type_inference 0.08% : 0.000069s : 1: validate Time group info: ------[substitution.] 0.000117 63 4.89% : 0.000006s : 2: substitution.depend_value_elim 2.23% : 0.000003s : 5: substitution.elim_not_effective 1.90% : 0.000002s : 5: substitution.fold_const_symbol 6.06% : 0.000007s : 6: substitution.graph_param_transform 47.66% : 0.000056s : 1: substitution.inline 4.79% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.63% : 0.000004s : 6: substitution.load_eliminater 2.20% : 0.000003s : 2: substitution.reduce_all_const_elim 7.22% : 0.000008s : 10: substitution.remove_not_recompute_node 2.74% : 0.000003s : 2: substitution.replace_old_param 8.83% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.86% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002353 2 89.56% : 0.002108s : 1: type_inference.infer 10.44% : 0.000246s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000055 1 100.00% : 0.000055s : 1: match.inline ------[predicate.] 0.000236 1420 0.88% : 0.000002s : 13: predicate.accumulaten_eliminater 1.24% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.16% : 0.000005s : 25: predicate.arithmetic_simplify 0.76% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.20% : 0.000000s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.23% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.80% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.95% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.23% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000005s : 31: predicate.environ_get_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.19% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.44% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.65% : 0.000002s : 12: predicate.incorporate_call_switch 5.53% : 0.000013s : 63: predicate.inline 0.98% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.12% : 0.000003s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.47% : 0.000006s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.81% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.86% : 0.000002s : 13: predicate.print_const_string_wrapper 0.79% : 0.000002s : 12: predicate.reduce_all_const_elim 1.20% : 0.000003s : 13: predicate.reduce_eliminate 0.71% : 0.000002s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.30% : 0.000001s : 6: predicate.reset_defer_inline 0.80% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.66% : 0.000004s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 0.95% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.08% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.94% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.18% : 0.000010s : 43: predicate.switch_simplify 0.86% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.87% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.81% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.53% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.49% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.47% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.56% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000147 4 7.35% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.65% : 0.000136s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089370 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.06% : 0.000058s : 1: add_recomputation 0.03% : 0.000031s : 1: assign_add_opt 0.14% : 0.000122s : 1: auto_monad 0.04% : 0.000032s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000313s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.03% : 0.000027s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.05% : 0.000047s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000043s : 1: distribtued_split 0.58% : 0.000517s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000501s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001106s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.18% : 0.000158s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 5.98% : 0.005346s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.29% : 0.000255s : 1: opt_b 8.02% : 0.007171s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000012s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000072s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000029s : 1: pre_auto_parallel 0.03% : 0.000022s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.26% : 0.000233s : 1: renormalize.infer 0.23% : 0.000201s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000131s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000094s : 1: symbol_engine_optimizer 77.87% : 0.069588s : 1: task_emit 0.08% : 0.000075s : 1: tuple_transform 2.68% : 0.002396s : 1: type_inference 0.07% : 0.000065s : 1: validate TotalTime = 0.0814026, [21] [bootstrap]: 0.00028716 [type_inference]: 0.00255176 [auto_monad]: 0.00013845 [graph_reusing]: 2.26998e-06 [inline]: 1.34995e-06 [parallel-infer-symbol]: 2.20002e-06 [pre_auto_parallel]: 2.63e-05 [insert-virtual-dataset]: 2.40991e-06 [parallel-infer-symbol-second]: 4.10015e-07 [dataset_repeat_opt]: 1.17999e-06 [pipeline_split]: 1.55007e-06 [optimize]: 0.00743999, [52] [py_interpret_to_execute]: 1.661e-05 [rewriter_before_opt_a]: 3.44401e-05 [opt_a]: 0.00559233, [2] [Cycle 1]: 0.00167598, [43] [expand_dump_flag]: 3.13995e-06 [switch_simplify]: 3.018e-05 [loop_unroll]: 1.372e-05 [a_1]: 0.00034803 [recompute_prepare]: 9.35991e-06 [updatestate_depend_eliminate]: 8.54e-06 [updatestate_assign_eliminate]: 5.77001e-06 [updatestate_loads_eliminate]: 7.71997e-06 [parameter_eliminate]: 2.86999e-06 [a_2]: 0.00012123 [accelerated_algorithm]: 8.64e-06 [shard]: 1.96998e-06 [meta_shard_fg_expand]: 3.85998e-06 [shard_inline]: 8.82999e-06 [auto_parallel]: 1.22499e-05 [parallel]: 7.31996e-06 [flash_sp]: 1.027e-05 [merge_comm]: 8.28004e-06 [allreduce_fusion]: 5.39003e-06 [matmul_add_comm_reduction]: 1.209e-05 [allreduce_slice_to_reducescatter]: 4.50062e-07 [virtual_shard_identity]: 1.02e-05 [virtual_dataset]: 8.62998e-06 [get_grad_eliminate_]: 7.88004e-06 [virtual_output]: 7.93999e-06 [merge_forward]: 6.10005e-06 [cell_reuse_recompute_pass]: 1.81003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.692e-05 [before_grad]: 1.405e-05 [inplace_validation]: 5.6501e-06 [meta_fg_expand]: 5.73997e-06 [inplace_validation_after_expand]: 6.04009e-06 [flash_sp_send_recv_attached]: 4.57e-06 [receive_attached]: 2.65997e-06 [after_resolve]: 1.16399e-05 [a_after_grad]: 1.31699e-05 [special_op_eliminate]: 8.46991e-06 [renormalize]: 0.00055516 [add_forward_monad_depend]: 3.92995e-06 [auto_monad_grad]: 1.86998e-06 [auto_monad_eliminator]: 3.17401e-05 [cse]: 3.14401e-05 [a_3]: 5.91599e-05 [Cycle 2]: 0.00083281, [43] [expand_dump_flag]: 1.13004e-06 [switch_simplify]: 9.36002e-06 [loop_unroll]: 7.82998e-06 [a_1]: 0.00021012 [recompute_prepare]: 7.76001e-06 [updatestate_depend_eliminate]: 6.18992e-06 [updatestate_assign_eliminate]: 5.39003e-06 [updatestate_loads_eliminate]: 4.99003e-06 [parameter_eliminate]: 1.39e-06 [a_2]: 0.00010672 [accelerated_algorithm]: 8.32998e-06 [shard]: 1.15996e-06 [meta_shard_fg_expand]: 2.50002e-06 [shard_inline]: 8.57993e-06 [auto_parallel]: 1.075e-05 [parallel]: 3.63006e-06 [flash_sp]: 3.42994e-06 [merge_comm]: 5.83997e-06 [allreduce_fusion]: 4.94998e-06 [matmul_add_comm_reduction]: 8.31997e-06 [allreduce_slice_to_reducescatter]: 2.89991e-07 [virtual_shard_identity]: 9.30997e-06 [virtual_dataset]: 7.99005e-06 [get_grad_eliminate_]: 3.39099e-05 [virtual_output]: 8.37003e-06 [merge_forward]: 4.98001e-06 [cell_reuse_recompute_pass]: 1.84996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.66901e-05 [before_grad]: 1.31599e-05 [inplace_validation]: 4.57e-06 [meta_fg_expand]: 4.75999e-06 [inplace_validation_after_expand]: 5.44009e-06 [flash_sp_send_recv_attached]: 1.03004e-06 [receive_attached]: 1.11002e-06 [after_resolve]: 1.033e-05 [a_after_grad]: 1.219e-05 [special_op_eliminate]: 7.26001e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 1.11002e-06 [auto_monad_grad]: 1.25996e-06 [auto_monad_eliminator]: 1.931e-05 [cse]: 2.05199e-05 [a_3]: 4.98401e-05 [py_interpret_to_execute_after_opt_a]: 9.02999e-06 [slice_cell_reuse_recomputed_activation]: 1.99e-06 [rewriter_after_opt_a]: 0.00013625 [convert_after_rewriter]: 7.80006e-06 [order_py_execute_after_rewriter]: 8.21007e-06 [opt_b]: 0.00024831, [1] [Cycle 1]: 0.00024279, [7] [b_1]: 0.00016625 [b_2]: 9.72999e-06 [updatestate_depend_eliminate]: 5.47001e-06 [updatestate_assign_eliminate]: 4.68001e-06 [updatestate_loads_eliminate]: 5.49003e-06 [renormalize]: 3.30037e-07 [cse]: 1.93401e-05 [optimize_parallel_all_gather_comm]: 8.49005e-06 [overlap_param_gather]: 7.89994e-07 [cconv]: 2.00099e-05 [loop_unroll]: 0.0005032 [opt_after_cconv]: 0.00013622, [1] [Cycle 1]: 0.00013009, [7] [c_1]: 5.35799e-05 [parameter_eliminate]: 2.44007e-06 [updatestate_depend_eliminate]: 8.40996e-06 [updatestate_assign_eliminate]: 5.42006e-06 [updatestate_loads_eliminate]: 5.5501e-06 [cse]: 2.21201e-05 [renormalize]: 3.69968e-07 [remove_dup_value]: 1.28601e-05 [tuple_transform]: 7.062e-05, [1] [Cycle 1]: 6.61e-05, [2] [d_1]: 5.612e-05 [renormalize]: 1.79978e-07 [partial_unused_args_eliminate]: 1.89e-06 [add_cache_embedding]: 1.321e-05 [add_recomputation]: 5.93499e-05 [cse_after_recomputation]: 2.679e-05, [1] [Cycle 1]: 2.204e-05, [1] [cse]: 1.72401e-05 [environ_conv]: 7.60006e-06 [swap_dp_allreduce_reducescatter]: 7.33999e-06 [bias_add_comm_swap]: 1.84006e-06 [label_micro_interleaved_index]: 1.53005e-06 [label_fine_grained_interleaved_index]: 2.03995e-06 [merge_cast_opt]: 1.00001e-06 [slice_recompute_activation]: 1.82004e-06 [micro_interleaved_order_control]: 1.9701e-06 [assign_add_opt]: 2.81e-05 [ForceFp32Comm]: 1.05996e-06 [remove_cast_before_assign_add]: 7.21996e-06 [full_micro_interleaved_order_control]: 1.83005e-06 [reorder_send_recv_between_fp_bp]: 1.72993e-06 [comm_op_add_attrs]: 2.54599e-05 [add_comm_op_reuse_tag]: 1.8701e-06 [interleave_split_concat_branches]: 6.00005e-07 [interleave_parallel_branches]: 8.79983e-07 [overlap_opt_shard_in_pipeline]: 1.02003e-06 [overlap_opt_shard_grad_in_pipeline]: 1.69e-06 [control_data_broadcast_order]: 8.50065e-07 [grouped_pairwise_exchange_alltoall]: 9.40997e-06 [offloading_packed_experts]: 1.65007e-06 [overlap_recompute_and_grad_model_parallel]: 1.71002e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.00005e-07 [overlap_recompute_allgather_and_fa_grad]: 6.845e-05 [overlap_grad_ring_attention]: 1.92004e-06 [overlap_grad_flash_sp]: 1.506e-05 [begin_end_overlap_inline]: 6.70087e-07 [split_matmul_comm_elemetwise]: 1.62004e-06 [split_layernorm_comm]: 1.59e-06 [handle_group_info]: 3.94997e-06 [symbol_engine_optimizer]: 9.395e-05, [1] [Cycle 1]: 8.896e-05, [6] [build]: 4.91994e-06 [elim_shapecalc]: 1.40801e-05 [elim_not_effective]: 1.69501e-05 [opt_reshape]: 9.47004e-06 [fold_const_symbol]: 1.453e-05 [renormalize]: 3.60073e-07 [pipeline_parallel_scheduler]: 1.15007e-06 [auto_monad_reorder]: 2.779e-05 [get_jit_bprop_graph]: 4.10015e-07 [rewriter_after_jit_bprop_graph]: 3.50061e-07 [eliminate_special_op_node]: 0.0005179 [distribtued_split]: 3.943e-05 [validate]: 3.53501e-05 [task_emit]: 0.0700723 [execute]: 1.123e-05 Sums bootstrap : 0.000287s : 0.37% type_inference : 0.002552s : 3.30% auto_monad : 0.000138s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000558s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000228s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000042s : 0.05% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000555s : 0.72% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000136s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000008s : 0.01% optimize.opt_b.b_1 : 0.000166s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000020s : 0.03% optimize.loop_unroll : 0.000503s : 0.65% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000059s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000025s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000068s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000004s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000028s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000518s : 0.67% distribtued_split : 0.000039s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.070072s : 90.65% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000132 63 4.52% : 0.000006s : 2: substitution.depend_value_elim 2.22% : 0.000003s : 5: substitution.elim_not_effective 1.93% : 0.000003s : 5: substitution.fold_const_symbol 5.18% : 0.000007s : 6: substitution.graph_param_transform 50.75% : 0.000067s : 1: substitution.inline 4.26% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.21% : 0.000004s : 6: substitution.load_eliminater 2.88% : 0.000004s : 2: substitution.reduce_all_const_elim 6.24% : 0.000008s : 10: substitution.remove_not_recompute_node 2.50% : 0.000003s : 2: substitution.replace_old_param 8.56% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.76% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002521 2 87.74% : 0.002212s : 1: type_inference.infer 12.26% : 0.000309s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000231 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.78% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.09% : 0.000005s : 25: predicate.arithmetic_simplify 0.91% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.40% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.89% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.96% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.61% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 1.95% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.64% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.11% : 0.000003s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.43% : 0.000006s : 38: predicate.load_eliminater 1.50% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.91% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.83% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.20% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.06% : 0.000002s : 13: predicate.reduce_eliminate 0.64% : 0.000001s : 12: predicate.remove_not_recompute_node 1.08% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.76% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.52% : 0.000004s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.37% : 0.000010s : 43: predicate.switch_simplify 0.75% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.75% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.45% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.58% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.55% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.70% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.34% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.38% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.86% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000184 4 7.30% : 0.000013s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.70% : 0.000171s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090839 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.17% : 0.000152s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.34% : 0.000312s : 1: bootstrap 0.03% : 0.000024s : 1: cconv 0.03% : 0.000030s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.59% : 0.000533s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000513s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.27% : 0.001155s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.06% : 0.000051s : 4: opt.transform.symbol_engine_opt 6.16% : 0.005596s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.28% : 0.000251s : 1: opt_b 8.20% : 0.007449s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000012s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.36% : 0.000329s : 1: renormalize.infer 0.24% : 0.000219s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000142s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000097s : 1: symbol_engine_optimizer 77.17% : 0.070099s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.83% : 0.002570s : 1: type_inference 0.08% : 0.000069s : 1: validate TotalTime = 0.0824201, [21] [bootstrap]: 0.00031714 [type_inference]: 0.00273932 [auto_monad]: 0.00013704 [graph_reusing]: 2.35997e-06 [inline]: 1.31002e-06 [parallel-infer-symbol]: 2.05007e-06 [pre_auto_parallel]: 2.812e-05 [insert-virtual-dataset]: 2.96999e-06 [parallel-infer-symbol-second]: 3.49944e-07 [dataset_repeat_opt]: 1.40001e-06 [pipeline_split]: 1.46008e-06 [optimize]: 0.00757046, [52] [py_interpret_to_execute]: 2.003e-05 [rewriter_before_opt_a]: 3.556e-05 [opt_a]: 0.00566865, [2] [Cycle 1]: 0.0016393, [43] [expand_dump_flag]: 3.92995e-06 [switch_simplify]: 3.22e-05 [loop_unroll]: 1.47601e-05 [a_1]: 0.00035672 [recompute_prepare]: 8.51997e-06 [updatestate_depend_eliminate]: 9.14e-06 [updatestate_assign_eliminate]: 6.42997e-06 [updatestate_loads_eliminate]: 8.76002e-06 [parameter_eliminate]: 3.62005e-06 [a_2]: 0.00012298 [accelerated_algorithm]: 8.41997e-06 [shard]: 2.15997e-06 [meta_shard_fg_expand]: 4.08001e-06 [shard_inline]: 8.70007e-06 [auto_parallel]: 1.32001e-05 [parallel]: 7.90006e-06 [flash_sp]: 1.274e-05 [merge_comm]: 9.17003e-06 [allreduce_fusion]: 5.64998e-06 [matmul_add_comm_reduction]: 1.203e-05 [allreduce_slice_to_reducescatter]: 4.29922e-07 [virtual_shard_identity]: 1.00499e-05 [virtual_dataset]: 8.22998e-06 [get_grad_eliminate_]: 7.92008e-06 [virtual_output]: 7.77002e-06 [merge_forward]: 6.58003e-06 [cell_reuse_recompute_pass]: 1.96998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.80301e-05 [before_grad]: 1.437e-05 [inplace_validation]: 6.23998e-06 [meta_fg_expand]: 5.58991e-06 [inplace_validation_after_expand]: 7.28003e-06 [flash_sp_send_recv_attached]: 5.64998e-06 [receive_attached]: 2.93995e-06 [after_resolve]: 1.209e-05 [a_after_grad]: 1.30601e-05 [special_op_eliminate]: 8.18004e-06 [renormalize]: 0.00048565 [add_forward_monad_depend]: 4.29002e-06 [auto_monad_grad]: 2.00991e-06 [auto_monad_eliminator]: 3.527e-05 [cse]: 3.575e-05 [a_3]: 5.97601e-05 [Cycle 2]: 0.00085118, [43] [expand_dump_flag]: 1.05007e-06 [switch_simplify]: 9.39996e-06 [loop_unroll]: 8.02998e-06 [a_1]: 0.00025762 [recompute_prepare]: 7.86991e-06 [updatestate_depend_eliminate]: 6.29993e-06 [updatestate_assign_eliminate]: 5.11005e-06 [updatestate_loads_eliminate]: 5.44998e-06 [parameter_eliminate]: 1.36998e-06 [a_2]: 0.00010673 [accelerated_algorithm]: 8.00996e-06 [shard]: 1.10001e-06 [meta_shard_fg_expand]: 2.59001e-06 [shard_inline]: 8.22998e-06 [auto_parallel]: 1.069e-05 [parallel]: 3.58e-06 [flash_sp]: 3.42005e-06 [merge_comm]: 5.78002e-06 [allreduce_fusion]: 4.67e-06 [matmul_add_comm_reduction]: 8.66991e-06 [allreduce_slice_to_reducescatter]: 3.40049e-07 [virtual_shard_identity]: 9.60997e-06 [virtual_dataset]: 7.92998e-06 [get_grad_eliminate_]: 7.30995e-06 [virtual_output]: 7.99005e-06 [merge_forward]: 4.64998e-06 [cell_reuse_recompute_pass]: 1.86998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.587e-05 [before_grad]: 1.242e-05 [inplace_validation]: 4.27e-06 [meta_fg_expand]: 4.81005e-06 [inplace_validation_after_expand]: 5.97991e-06 [flash_sp_send_recv_attached]: 1.21002e-06 [receive_attached]: 7.50064e-07 [after_resolve]: 1.056e-05 [a_after_grad]: 1.183e-05 [special_op_eliminate]: 7.11996e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 9.79984e-07 [auto_monad_grad]: 1.47999e-06 [auto_monad_eliminator]: 1.908e-05 [cse]: 2.049e-05 [a_3]: 4.91199e-05 [py_interpret_to_execute_after_opt_a]: 1.011e-05 [slice_cell_reuse_recomputed_activation]: 2.36998e-06 [rewriter_after_opt_a]: 0.00014022 [convert_after_rewriter]: 8.71997e-06 [order_py_execute_after_rewriter]: 6.24999e-06 [opt_b]: 0.00024906, [1] [Cycle 1]: 0.00024339, [7] [b_1]: 0.000166 [b_2]: 9.94001e-06 [updatestate_depend_eliminate]: 5.59993e-06 [updatestate_assign_eliminate]: 4.72995e-06 [updatestate_loads_eliminate]: 5.04998e-06 [renormalize]: 2.30037e-07 [cse]: 1.918e-05 [optimize_parallel_all_gather_comm]: 8.12998e-06 [overlap_param_gather]: 1.15996e-06 [cconv]: 2.53801e-05 [loop_unroll]: 0.0005095 [opt_after_cconv]: 0.0001363, [1] [Cycle 1]: 0.00013037, [7] [c_1]: 5.405e-05 [parameter_eliminate]: 2.36998e-06 [updatestate_depend_eliminate]: 8.50006e-06 [updatestate_assign_eliminate]: 5.40994e-06 [updatestate_loads_eliminate]: 5.73997e-06 [cse]: 2.179e-05 [renormalize]: 4.10015e-07 [remove_dup_value]: 1.364e-05 [tuple_transform]: 7.002e-05, [1] [Cycle 1]: 6.558e-05, [2] [d_1]: 5.585e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 2.01992e-06 [add_cache_embedding]: 1.479e-05 [add_recomputation]: 6.446e-05 [cse_after_recomputation]: 2.802e-05, [1] [Cycle 1]: 2.304e-05, [1] [cse]: 1.75401e-05 [environ_conv]: 7.08003e-06 [swap_dp_allreduce_reducescatter]: 7.92998e-06 [bias_add_comm_swap]: 2.16998e-06 [label_micro_interleaved_index]: 2.60002e-06 [label_fine_grained_interleaved_index]: 2.13995e-06 [merge_cast_opt]: 1.03004e-06 [slice_recompute_activation]: 2.35997e-06 [micro_interleaved_order_control]: 1.91992e-06 [assign_add_opt]: 2.91499e-05 [ForceFp32Comm]: 8.69972e-07 [remove_cast_before_assign_add]: 7.4401e-06 [full_micro_interleaved_order_control]: 2.32004e-06 [reorder_send_recv_between_fp_bp]: 2.16998e-06 [comm_op_add_attrs]: 2.91601e-05 [add_comm_op_reuse_tag]: 2.11003e-06 [interleave_split_concat_branches]: 9.10019e-07 [interleave_parallel_branches]: 8.89995e-07 [overlap_opt_shard_in_pipeline]: 9.39937e-07 [overlap_opt_shard_grad_in_pipeline]: 2.89991e-06 [control_data_broadcast_order]: 1.06997e-06 [grouped_pairwise_exchange_alltoall]: 1.016e-05 [offloading_packed_experts]: 2.12993e-06 [overlap_recompute_and_grad_model_parallel]: 2.13995e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.39937e-07 [overlap_recompute_allgather_and_fa_grad]: 7.37001e-05 [overlap_grad_ring_attention]: 2.34006e-06 [overlap_grad_flash_sp]: 1.529e-05 [begin_end_overlap_inline]: 1.06997e-06 [split_matmul_comm_elemetwise]: 1.98989e-06 [split_layernorm_comm]: 2.62994e-06 [handle_group_info]: 5.42006e-06 [symbol_engine_optimizer]: 9.59999e-05, [1] [Cycle 1]: 9.06e-05, [6] [build]: 5.48002e-06 [elim_shapecalc]: 1.39601e-05 [elim_not_effective]: 1.73501e-05 [opt_reshape]: 9.71998e-06 [fold_const_symbol]: 1.552e-05 [renormalize]: 2.49944e-07 [pipeline_parallel_scheduler]: 1.56008e-06 [auto_monad_reorder]: 3.428e-05 [get_jit_bprop_graph]: 5.00004e-07 [rewriter_after_jit_bprop_graph]: 4.30038e-07 [eliminate_special_op_node]: 0.00051664 [distribtued_split]: 4.301e-05 [validate]: 3.652e-05 [task_emit]: 0.0707004 [execute]: 1.162e-05 Sums bootstrap : 0.000317s : 0.41% type_inference : 0.002739s : 3.50% auto_monad : 0.000137s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000028s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000020s : 0.03% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000042s : 0.05% optimize.opt_a.loop_unroll : 0.000023s : 0.03% optimize.opt_a.a_1 : 0.000614s : 0.79% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000230s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000021s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000007s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000023s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000486s : 0.62% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000054s : 0.07% optimize.opt_a.cse : 0.000056s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000140s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000166s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000509s : 0.65% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000003s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000029s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000074s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000003s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000016s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000034s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000517s : 0.66% distribtued_split : 0.000043s : 0.06% validate : 0.000037s : 0.05% task_emit : 0.070700s : 90.42% execute : 0.000012s : 0.01% TotalTime = 0.0828109, [21] [bootstrap]: 0.00030883 [type_inference]: 0.00257377 [auto_monad]: 0.00013401 [graph_reusing]: 2.85008e-06 [inline]: 1.39e-06 [parallel-infer-symbol]: 2.19001e-06 [pre_auto_parallel]: 2.492e-05 [insert-virtual-dataset]: 2.29001e-06 [parallel-infer-symbol-second]: 3.89991e-07 [dataset_repeat_opt]: 1.20001e-06 [pipeline_split]: 1.76008e-06 [optimize]: 0.00737932, [52] [py_interpret_to_execute]: 1.768e-05 [rewriter_before_opt_a]: 3.456e-05 [opt_a]: 0.005523, [2] [Cycle 1]: 0.00158609, [43] [expand_dump_flag]: 3.71004e-06 [switch_simplify]: 3.202e-05 [loop_unroll]: 1.363e-05 [a_1]: 0.00035222 [recompute_prepare]: 8.72998e-06 [updatestate_depend_eliminate]: 9.24e-06 [updatestate_assign_eliminate]: 6.12997e-06 [updatestate_loads_eliminate]: 7.68004e-06 [parameter_eliminate]: 2.99001e-06 [a_2]: 0.00012254 [accelerated_algorithm]: 8.85001e-06 [shard]: 2.06998e-06 [meta_shard_fg_expand]: 4.21004e-06 [shard_inline]: 8.71997e-06 [auto_parallel]: 1.29e-05 [parallel]: 6.98993e-06 [flash_sp]: 1.095e-05 [merge_comm]: 8.43999e-06 [allreduce_fusion]: 5.35999e-06 [matmul_add_comm_reduction]: 1.067e-05 [allreduce_slice_to_reducescatter]: 6.10016e-07 [virtual_shard_identity]: 9.91998e-06 [virtual_dataset]: 8.1599e-06 [get_grad_eliminate_]: 7.91997e-06 [virtual_output]: 7.81007e-06 [merge_forward]: 5.72007e-06 [cell_reuse_recompute_pass]: 1.66008e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.82301e-05 [before_grad]: 1.41e-05 [inplace_validation]: 4.89992e-06 [meta_fg_expand]: 5.52996e-06 [inplace_validation_after_expand]: 6.31995e-06 [flash_sp_send_recv_attached]: 4.6799e-06 [receive_attached]: 2.84007e-06 [after_resolve]: 1.206e-05 [a_after_grad]: 1.349e-05 [special_op_eliminate]: 8.36002e-06 [renormalize]: 0.00046178 [add_forward_monad_depend]: 3.72995e-06 [auto_monad_grad]: 1.94996e-06 [auto_monad_eliminator]: 3.19401e-05 [cse]: 3.232e-05 [a_3]: 6.05e-05 [Cycle 2]: 0.00086473, [43] [expand_dump_flag]: 1.06008e-06 [switch_simplify]: 9.32999e-06 [loop_unroll]: 7.80995e-06 [a_1]: 0.00021018 [recompute_prepare]: 7.50995e-06 [updatestate_depend_eliminate]: 5.52996e-06 [updatestate_assign_eliminate]: 5.08002e-06 [updatestate_loads_eliminate]: 5.25999e-06 [parameter_eliminate]: 1.31002e-06 [a_2]: 0.00011002 [accelerated_algorithm]: 8.60007e-06 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 2.65008e-06 [shard_inline]: 8.42998e-06 [auto_parallel]: 1.08e-05 [parallel]: 3.44997e-06 [flash_sp]: 3.34997e-06 [merge_comm]: 5.99003e-06 [allreduce_fusion]: 4.68001e-06 [matmul_add_comm_reduction]: 7.42998e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 9.55001e-06 [virtual_dataset]: 7.59005e-06 [get_grad_eliminate_]: 7.30006e-06 [virtual_output]: 7.79005e-06 [merge_forward]: 4.19992e-06 [cell_reuse_recompute_pass]: 1.96998e-06 [cell_reuse_handle_not_recompute_node_pass]: 7.402e-05 [before_grad]: 1.341e-05 [inplace_validation]: 4.38991e-06 [meta_fg_expand]: 5.12006e-06 [inplace_validation_after_expand]: 5.46e-06 [flash_sp_send_recv_attached]: 1.19e-06 [receive_attached]: 8.50065e-07 [after_resolve]: 1.012e-05 [a_after_grad]: 1.27e-05 [special_op_eliminate]: 8.08004e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 1.19e-06 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 1.956e-05 [cse]: 2.07999e-05 [a_3]: 5.08e-05 [py_interpret_to_execute_after_opt_a]: 9.81009e-06 [slice_cell_reuse_recomputed_activation]: 2.32004e-06 [rewriter_after_opt_a]: 0.00013609 [convert_after_rewriter]: 1.093e-05 [order_py_execute_after_rewriter]: 5.96e-06 [opt_b]: 0.0002497, [1] [Cycle 1]: 0.00024435, [7] [b_1]: 0.00016889 [b_2]: 9.67004e-06 [updatestate_depend_eliminate]: 5.33997e-06 [updatestate_assign_eliminate]: 4.42995e-06 [updatestate_loads_eliminate]: 5.25999e-06 [renormalize]: 3.7998e-07 [cse]: 1.947e-05 [optimize_parallel_all_gather_comm]: 8.54e-06 [overlap_param_gather]: 1.15996e-06 [cconv]: 2.225e-05 [loop_unroll]: 0.0005081 [opt_after_cconv]: 0.00013694, [1] [Cycle 1]: 0.00013098, [7] [c_1]: 5.432e-05 [parameter_eliminate]: 2.51003e-06 [updatestate_depend_eliminate]: 8.46991e-06 [updatestate_assign_eliminate]: 5.07e-06 [updatestate_loads_eliminate]: 5.53008e-06 [cse]: 2.22001e-05 [renormalize]: 4.30038e-07 [remove_dup_value]: 1.178e-05 [tuple_transform]: 6.91201e-05, [1] [Cycle 1]: 6.51299e-05, [2] [d_1]: 5.575e-05 [renormalize]: 2.00002e-07 [partial_unused_args_eliminate]: 2.01003e-06 [add_cache_embedding]: 1.345e-05 [add_recomputation]: 6.084e-05 [cse_after_recomputation]: 2.694e-05, [1] [Cycle 1]: 2.235e-05, [1] [cse]: 1.729e-05 [environ_conv]: 7.60995e-06 [swap_dp_allreduce_reducescatter]: 7.66001e-06 [bias_add_comm_swap]: 2.17999e-06 [label_micro_interleaved_index]: 2.41003e-06 [label_fine_grained_interleaved_index]: 1.97999e-06 [merge_cast_opt]: 1.05007e-06 [slice_recompute_activation]: 1.6999e-06 [micro_interleaved_order_control]: 1.91003e-06 [assign_add_opt]: 2.829e-05 [ForceFp32Comm]: 1.00001e-06 [remove_cast_before_assign_add]: 7.17002e-06 [full_micro_interleaved_order_control]: 2.29001e-06 [reorder_send_recv_between_fp_bp]: 1.94996e-06 [comm_op_add_attrs]: 2.579e-05 [add_comm_op_reuse_tag]: 2.02004e-06 [interleave_split_concat_branches]: 8.39937e-07 [interleave_parallel_branches]: 6.59958e-07 [overlap_opt_shard_in_pipeline]: 8.89995e-07 [overlap_opt_shard_grad_in_pipeline]: 1.73994e-06 [control_data_broadcast_order]: 1.25996e-06 [grouped_pairwise_exchange_alltoall]: 9.29006e-06 [offloading_packed_experts]: 1.93005e-06 [overlap_recompute_and_grad_model_parallel]: 2.01003e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.09901e-07 [overlap_recompute_allgather_and_fa_grad]: 6.944e-05 [overlap_grad_ring_attention]: 1.91992e-06 [overlap_grad_flash_sp]: 1.39701e-05 [begin_end_overlap_inline]: 7.80099e-07 [split_matmul_comm_elemetwise]: 2.12004e-06 [split_layernorm_comm]: 1.59e-06 [handle_group_info]: 4.74998e-06 [symbol_engine_optimizer]: 9.162e-05, [1] [Cycle 1]: 8.687e-05, [6] [build]: 5.09003e-06 [elim_shapecalc]: 1.303e-05 [elim_not_effective]: 1.759e-05 [opt_reshape]: 9.54e-06 [fold_const_symbol]: 1.45099e-05 [renormalize]: 3.50061e-07 [pipeline_parallel_scheduler]: 1.42003e-06 [auto_monad_reorder]: 3.014e-05 [get_jit_bprop_graph]: 4.69969e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00052048 [distribtued_split]: 3.937e-05 [validate]: 3.488e-05 [task_emit]: 0.0715041 [execute]: 1.094e-05 Sums bootstrap : 0.000309s : 0.39% type_inference : 0.002574s : 3.27% auto_monad : 0.000134s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000018s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000562s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000233s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000092s : 0.12% optimize.opt_a.before_grad : 0.000028s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000462s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.07% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000111s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000136s : 0.17% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000169s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000508s : 0.65% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000026s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000069s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000520s : 0.66% distribtued_split : 0.000039s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.071504s : 90.80% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000141 63 4.93% : 0.000007s : 2: substitution.depend_value_elim 2.10% : 0.000003s : 5: substitution.elim_not_effective 2.04% : 0.000003s : 5: substitution.fold_const_symbol 5.40% : 0.000008s : 6: substitution.graph_param_transform 51.77% : 0.000073s : 1: substitution.inline 4.11% : 0.000006s : 10: substitution.j_node_and_user_rematch 2.97% : 0.000004s : 6: substitution.load_eliminater 2.38% : 0.000003s : 2: substitution.reduce_all_const_elim 6.22% : 0.000009s : 10: substitution.remove_not_recompute_node 2.61% : 0.000004s : 2: substitution.replace_old_param 8.12% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.35% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002709 2 88.65% : 0.002402s : 1: type_inference.infer 11.35% : 0.000307s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000072 1 100.00% : 0.000072s : 1: match.inline ------[predicate.] 0.000234 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 1.27% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.76% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.28% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.42% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.77% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.88% : 0.000004s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.87% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.21% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.70% : 0.000013s : 63: predicate.inline 1.12% : 0.000003s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.99% : 0.000002s : 12: predicate.less_batch_normalization 1.65% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.46% : 0.000006s : 38: predicate.load_eliminater 1.45% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.27% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.71% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.79% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.83% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.07% : 0.000002s : 14: predicate.partial_defer_inline 1.19% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.01% : 0.000002s : 13: predicate.reduce_eliminate 0.64% : 0.000002s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 1.07% : 0.000003s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.99% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 0.94% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.95% : 0.000002s : 14: predicate.switch_defer_inline 1.72% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.47% : 0.000010s : 43: predicate.switch_simplify 0.89% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.80% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.64% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.46% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.48% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.48% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.48% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.86% : 0.000002s : 12: predicate.virtual_output_eliminate 0.65% : 0.000002s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000175 4 10.02% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.98% : 0.000158s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091951 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.01% : 0.000006s : 1: add_comm_op_reuse_tag 0.08% : 0.000069s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.17% : 0.000152s : 1: auto_monad 0.04% : 0.000040s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000357s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.04% : 0.000033s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.58% : 0.000532s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000009s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000519s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.29% : 0.001189s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000157s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000052s : 4: opt.transform.symbol_engine_opt 6.17% : 0.005672s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.27% : 0.000252s : 1: opt_b 8.24% : 0.007579s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.09% : 0.000080s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.03% : 0.000025s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.29% : 0.000268s : 1: renormalize.infer 0.23% : 0.000211s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000146s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000099s : 1: symbol_engine_optimizer 76.93% : 0.070734s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 3.00% : 0.002759s : 1: type_inference 0.08% : 0.000073s : 1: validate Time group info: ------[substitution.] 0.000191 63 3.56% : 0.000007s : 2: substitution.depend_value_elim 1.56% : 0.000003s : 5: substitution.elim_not_effective 1.39% : 0.000003s : 5: substitution.fold_const_symbol 3.66% : 0.000007s : 6: substitution.graph_param_transform 35.03% : 0.000067s : 1: substitution.inline 2.96% : 0.000006s : 10: substitution.j_node_and_user_rematch 2.33% : 0.000004s : 6: substitution.load_eliminater 2.23% : 0.000004s : 2: substitution.reduce_all_const_elim 34.25% : 0.000065s : 10: substitution.remove_not_recompute_node 1.75% : 0.000003s : 2: substitution.replace_old_param 5.74% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 5.56% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002543 2 88.41% : 0.002248s : 1: type_inference.infer 11.59% : 0.000295s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000234 1420 0.92% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.36% : 0.000006s : 25: predicate.arithmetic_simplify 0.81% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.47% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_depend_swap 1.99% : 0.000005s : 31: predicate.environ_get_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.26% : 0.000003s : 14: predicate.float_depend_g_call 0.79% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 6: predicate.fold_const_symbol 0.75% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.84% : 0.000014s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000003s : 12: predicate.less_batch_normalization 1.67% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000006s : 38: predicate.load_eliminater 1.53% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.75% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.72% : 0.000002s : 13: predicate.minmaximum_grad 0.75% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.29% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.82% : 0.000002s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.50% : 0.000001s : 12: predicate.replace_old_param 0.29% : 0.000001s : 6: predicate.reset_defer_inline 0.76% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.45% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.97% : 0.000002s : 12: predicate.shard_identity_eliminate 1.46% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.08% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.02% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.31% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.59% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.48% : 0.000011s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.75% : 0.000002s : 13: predicate.transpose_eliminate 1.74% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.46% : 0.000003s : 25: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.28% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.26% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.88% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000180 4 8.66% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.34% : 0.000164s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092145 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000332s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.03% : 0.000030s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.58% : 0.000535s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000518s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.30% : 0.001201s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000159s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.00% : 0.005527s : 1: opt_a 0.15% : 0.000141s : 1: opt_after_cconv 0.27% : 0.000253s : 1: opt_b 8.02% : 0.007387s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000075s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000023s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.27% : 0.000246s : 1: renormalize.infer 0.23% : 0.000209s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000142s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000095s : 1: symbol_engine_optimizer 77.63% : 0.071531s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.81% : 0.002593s : 1: type_inference 0.07% : 0.000069s : 1: validate TotalTime = 0.0831698, [21] [bootstrap]: 0.00031667 [type_inference]: 0.00253042 [auto_monad]: 0.00011146 [graph_reusing]: 1.66998e-06 [inline]: 1.24006e-06 [parallel-infer-symbol]: 1.54995e-06 [pre_auto_parallel]: 2.278e-05 [insert-virtual-dataset]: 1.86998e-06 [parallel-infer-symbol-second]: 4.10015e-07 [dataset_repeat_opt]: 7.79983e-07 [pipeline_split]: 1.14006e-06 [optimize]: 0.00749378, [52] [py_interpret_to_execute]: 1.497e-05 [rewriter_before_opt_a]: 3.111e-05 [opt_a]: 0.00570883, [2] [Cycle 1]: 0.00149944, [43] [expand_dump_flag]: 2.10991e-06 [switch_simplify]: 2.914e-05 [loop_unroll]: 1.373e-05 [a_1]: 0.00034186 [recompute_prepare]: 9.00996e-06 [updatestate_depend_eliminate]: 7.70995e-06 [updatestate_assign_eliminate]: 5.61005e-06 [updatestate_loads_eliminate]: 5.97991e-06 [parameter_eliminate]: 2.41993e-06 [a_2]: 0.00011926 [accelerated_algorithm]: 9.51998e-06 [shard]: 1.44995e-06 [meta_shard_fg_expand]: 3.35998e-06 [shard_inline]: 9.01998e-06 [auto_parallel]: 1.182e-05 [parallel]: 5.33008e-06 [flash_sp]: 7.92998e-06 [merge_comm]: 7.75e-06 [allreduce_fusion]: 5.78992e-06 [matmul_add_comm_reduction]: 9.40997e-06 [allreduce_slice_to_reducescatter]: 3.10014e-07 [virtual_shard_identity]: 1.014e-05 [virtual_dataset]: 8.46002e-06 [get_grad_eliminate_]: 7.99005e-06 [virtual_output]: 7.81007e-06 [merge_forward]: 5.23997e-06 [cell_reuse_recompute_pass]: 1.62004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.737e-05 [before_grad]: 1.348e-05 [inplace_validation]: 4.71994e-06 [meta_fg_expand]: 5.29992e-06 [inplace_validation_after_expand]: 5.47001e-06 [flash_sp_send_recv_attached]: 3.46999e-06 [receive_attached]: 1.90001e-06 [after_resolve]: 1.14901e-05 [a_after_grad]: 1.31599e-05 [special_op_eliminate]: 8.31997e-06 [renormalize]: 0.00042416 [add_forward_monad_depend]: 2.45008e-06 [auto_monad_grad]: 1.29e-06 [auto_monad_eliminator]: 2.35899e-05 [cse]: 2.484e-05 [a_3]: 5.989e-05 [Cycle 2]: 0.00082416, [43] [expand_dump_flag]: 1.01002e-06 [switch_simplify]: 9.61998e-06 [loop_unroll]: 8.37992e-06 [a_1]: 0.00020719 [recompute_prepare]: 7.65e-06 [updatestate_depend_eliminate]: 5.68002e-06 [updatestate_assign_eliminate]: 4.88001e-06 [updatestate_loads_eliminate]: 2.507e-05 [parameter_eliminate]: 1.16008e-06 [a_2]: 0.00010999 [accelerated_algorithm]: 9.08005e-06 [shard]: 1.04005e-06 [meta_shard_fg_expand]: 2.70002e-06 [shard_inline]: 8.38994e-06 [auto_parallel]: 9.77004e-06 [parallel]: 3.19001e-06 [flash_sp]: 2.31003e-06 [merge_comm]: 5.64998e-06 [allreduce_fusion]: 4.63007e-06 [matmul_add_comm_reduction]: 7.03998e-06 [allreduce_slice_to_reducescatter]: 2.89991e-07 [virtual_shard_identity]: 9.65002e-06 [virtual_dataset]: 7.82998e-06 [get_grad_eliminate_]: 7.52998e-06 [virtual_output]: 8.10996e-06 [merge_forward]: 4.25999e-06 [cell_reuse_recompute_pass]: 1.65997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.599e-05 [before_grad]: 1.27399e-05 [inplace_validation]: 4.13007e-06 [meta_fg_expand]: 4.97e-06 [inplace_validation_after_expand]: 5.33997e-06 [flash_sp_send_recv_attached]: 7.89994e-07 [receive_attached]: 7.00005e-07 [after_resolve]: 1.04901e-05 [a_after_grad]: 1.222e-05 [special_op_eliminate]: 7.60006e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.79984e-07 [auto_monad_grad]: 9.49949e-07 [auto_monad_eliminator]: 1.72299e-05 [cse]: 1.96099e-05 [a_3]: 5.008e-05 [py_interpret_to_execute_after_opt_a]: 8.56002e-06 [slice_cell_reuse_recomputed_activation]: 2.11003e-06 [rewriter_after_opt_a]: 0.00012207 [convert_after_rewriter]: 1.11701e-05 [order_py_execute_after_rewriter]: 5.52007e-06 [opt_b]: 0.0002478, [1] [Cycle 1]: 0.00024288, [7] [b_1]: 0.00016776 [b_2]: 9.96003e-06 [updatestate_depend_eliminate]: 5.24009e-06 [updatestate_assign_eliminate]: 4.63007e-06 [updatestate_loads_eliminate]: 5.23997e-06 [renormalize]: 2.59955e-07 [cse]: 1.835e-05 [optimize_parallel_all_gather_comm]: 7.53999e-06 [overlap_param_gather]: 8.29925e-07 [cconv]: 1.51e-05 [loop_unroll]: 0.00050561 [opt_after_cconv]: 0.00012866, [1] [Cycle 1]: 0.00012286, [7] [c_1]: 5.22899e-05 [parameter_eliminate]: 1.73005e-06 [updatestate_depend_eliminate]: 6.99994e-06 [updatestate_assign_eliminate]: 4.51005e-06 [updatestate_loads_eliminate]: 5.43997e-06 [cse]: 2.017e-05 [renormalize]: 3.49944e-07 [remove_dup_value]: 1.042e-05 [tuple_transform]: 6.98899e-05, [1] [Cycle 1]: 6.571e-05, [2] [d_1]: 5.597e-05 [renormalize]: 1.90106e-07 [partial_unused_args_eliminate]: 1.40001e-06 [add_cache_embedding]: 1.08699e-05 [add_recomputation]: 5.899e-05 [cse_after_recomputation]: 2.617e-05, [1] [Cycle 1]: 2.13899e-05, [1] [cse]: 1.59401e-05 [environ_conv]: 6.96001e-06 [swap_dp_allreduce_reducescatter]: 7.1699e-06 [bias_add_comm_swap]: 1.80001e-06 [label_micro_interleaved_index]: 1.24995e-06 [label_fine_grained_interleaved_index]: 1.09e-06 [merge_cast_opt]: 9.69972e-07 [slice_recompute_activation]: 1.03994e-06 [micro_interleaved_order_control]: 1.19e-06 [assign_add_opt]: 2.556e-05 [ForceFp32Comm]: 7.69971e-07 [remove_cast_before_assign_add]: 6.91006e-06 [full_micro_interleaved_order_control]: 1.12993e-06 [reorder_send_recv_between_fp_bp]: 1.06997e-06 [comm_op_add_attrs]: 2.317e-05 [add_comm_op_reuse_tag]: 1.52003e-06 [interleave_split_concat_branches]: 7.20029e-07 [interleave_parallel_branches]: 5.19911e-07 [overlap_opt_shard_in_pipeline]: 1.11002e-06 [overlap_opt_shard_grad_in_pipeline]: 1.45996e-06 [control_data_broadcast_order]: 6.39935e-07 [grouped_pairwise_exchange_alltoall]: 7.75e-06 [offloading_packed_experts]: 1.35996e-06 [overlap_recompute_and_grad_model_parallel]: 1.31992e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.19911e-07 [overlap_recompute_allgather_and_fa_grad]: 5.85699e-05 [overlap_grad_ring_attention]: 1.42003e-06 [overlap_grad_flash_sp]: 1.22e-05 [begin_end_overlap_inline]: 5.30039e-07 [split_matmul_comm_elemetwise]: 1.32993e-06 [split_layernorm_comm]: 1.01002e-06 [handle_group_info]: 3.42994e-06 [symbol_engine_optimizer]: 9.439e-05, [1] [Cycle 1]: 8.928e-05, [6] [build]: 4.07e-06 [elim_shapecalc]: 1.357e-05 [elim_not_effective]: 1.73701e-05 [opt_reshape]: 1.01799e-05 [fold_const_symbol]: 1.47e-05 [renormalize]: 2.10013e-07 [pipeline_parallel_scheduler]: 8.89995e-07 [auto_monad_reorder]: 2.385e-05 [get_jit_bprop_graph]: 3.30037e-07 [rewriter_after_jit_bprop_graph]: 3.29921e-07 [eliminate_special_op_node]: 0.00051433 [distribtued_split]: 3.54199e-05 [validate]: 3.059e-05 [task_emit]: 0.0718175 [execute]: 8.96002e-06 Sums bootstrap : 0.000317s : 0.40% type_inference : 0.002530s : 3.21% auto_monad : 0.000111s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000023s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000031s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000549s : 0.70% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000031s : 0.04% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000229s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000019s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.54% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000110s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000122s : 0.16% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000168s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000506s : 0.64% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000059s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000026s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000023s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000059s : 0.07% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000514s : 0.65% distribtued_split : 0.000035s : 0.04% validate : 0.000031s : 0.04% task_emit : 0.071817s : 91.20% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000118 63 4.22% : 0.000005s : 2: substitution.depend_value_elim 2.66% : 0.000003s : 5: substitution.elim_not_effective 1.83% : 0.000002s : 5: substitution.fold_const_symbol 5.56% : 0.000007s : 6: substitution.graph_param_transform 49.92% : 0.000059s : 1: substitution.inline 4.32% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.08% : 0.000004s : 6: substitution.load_eliminater 2.26% : 0.000003s : 2: substitution.reduce_all_const_elim 7.17% : 0.000008s : 10: substitution.remove_not_recompute_node 2.30% : 0.000003s : 2: substitution.replace_old_param 8.71% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.98% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002505 2 89.59% : 0.002245s : 1: type_inference.infer 10.41% : 0.000261s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000058 1 100.00% : 0.000058s : 1: match.inline ------[predicate.] 0.000235 1420 0.95% : 0.000002s : 13: predicate.accumulaten_eliminater 1.10% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.76% : 0.000002s : 12: predicate.addn_check_dump 0.92% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.97% : 0.000002s : 13: predicate.cast_eliminate 0.94% : 0.000002s : 12: predicate.check_bprop_eliminate 0.78% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.21% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.96% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.25% : 0.000001s : 6: predicate.elim_not_effective 0.65% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.92% : 0.000005s : 31: predicate.environ_get_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.93% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.16% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.89% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.36% : 0.000013s : 63: predicate.inline 1.02% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.16% : 0.000003s : 12: predicate.less_batch_normalization 1.73% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.35% : 0.000006s : 38: predicate.load_eliminater 1.38% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.80% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.52% : 0.000001s : 6: predicate.opt_reshape 0.55% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.72% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.06% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.04% : 0.000002s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.62% : 0.000001s : 6: predicate.row_tensor_eliminate 1.01% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.06% : 0.000002s : 12: predicate.shard_identity_eliminate 1.43% : 0.000003s : 18: predicate.special_op_eliminate 0.89% : 0.000002s : 12: predicate.specialize_transform 1.10% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.41% : 0.000006s : 38: predicate.stopgrad_eliminater 0.40% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.25% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.72% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.55% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.61% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.50% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.57% : 0.000001s : 6: predicate.value_based_eliminate 0.87% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000157 4 6.44% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.56% : 0.000146s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092492 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.03% : 0.000030s : 1: assign_add_opt 0.13% : 0.000124s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000362s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.03% : 0.000027s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000043s : 1: distribtued_split 0.57% : 0.000527s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000515s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001122s : 80: opt.transform.opt_a 0.05% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.06% : 0.000051s : 4: opt.transform.symbol_engine_opt 6.18% : 0.005712s : 1: opt_a 0.14% : 0.000133s : 1: opt_after_cconv 0.27% : 0.000251s : 1: opt_b 8.11% : 0.007502s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.07% : 0.000064s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000029s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.24% : 0.000226s : 1: renormalize.infer 0.21% : 0.000193s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000127s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000098s : 1: symbol_engine_optimizer 77.67% : 0.071843s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.75% : 0.002546s : 1: type_inference 0.07% : 0.000064s : 1: validate TotalTime = 0.0837011, [21] [bootstrap]: 0.00033097 [type_inference]: 0.00274016 [auto_monad]: 0.00013918 [graph_reusing]: 3.00992e-06 [inline]: 1.87999e-06 [parallel-infer-symbol]: 2.37999e-06 [pre_auto_parallel]: 2.733e-05 [insert-virtual-dataset]: 2.80002e-06 [parallel-infer-symbol-second]: 4.90109e-07 [dataset_repeat_opt]: 1.27999e-06 [pipeline_split]: 1.64995e-06 [optimize]: 0.00767712, [52] [py_interpret_to_execute]: 1.858e-05 [rewriter_before_opt_a]: 3.652e-05 [opt_a]: 0.00577106, [2] [Cycle 1]: 0.00163701, [43] [expand_dump_flag]: 4.28001e-06 [switch_simplify]: 3.04801e-05 [loop_unroll]: 1.342e-05 [a_1]: 0.00035978 [recompute_prepare]: 9.06992e-06 [updatestate_depend_eliminate]: 8.59995e-06 [updatestate_assign_eliminate]: 6.68992e-06 [updatestate_loads_eliminate]: 7.89994e-06 [parameter_eliminate]: 3.31004e-06 [a_2]: 0.00011993 [accelerated_algorithm]: 9.04e-06 [shard]: 2.83006e-06 [meta_shard_fg_expand]: 4.19002e-06 [shard_inline]: 8.61008e-06 [auto_parallel]: 1.21e-05 [parallel]: 8.02998e-06 [flash_sp]: 1.064e-05 [merge_comm]: 8.34011e-06 [allreduce_fusion]: 6.41996e-06 [matmul_add_comm_reduction]: 1.133e-05 [allreduce_slice_to_reducescatter]: 6.29923e-07 [virtual_shard_identity]: 9.78005e-06 [virtual_dataset]: 7.89005e-06 [get_grad_eliminate_]: 7.69005e-06 [virtual_output]: 7.31996e-06 [merge_forward]: 6.12007e-06 [cell_reuse_recompute_pass]: 1.81003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.817e-05 [before_grad]: 1.312e-05 [inplace_validation]: 5.30994e-06 [meta_fg_expand]: 5.83008e-06 [inplace_validation_after_expand]: 6.59993e-06 [flash_sp_send_recv_attached]: 4.72006e-06 [receive_attached]: 2.71003e-06 [after_resolve]: 1.144e-05 [a_after_grad]: 1.283e-05 [special_op_eliminate]: 8.33999e-06 [renormalize]: 0.00047496 [add_forward_monad_depend]: 4.05998e-06 [auto_monad_grad]: 2.13995e-06 [auto_monad_eliminator]: 3.239e-05 [cse]: 3.35401e-05 [a_3]: 8.57001e-05 [Cycle 2]: 0.00080226, [43] [expand_dump_flag]: 1.16997e-06 [switch_simplify]: 9.70997e-06 [loop_unroll]: 7.95e-06 [a_1]: 0.00020815 [recompute_prepare]: 7.61996e-06 [updatestate_depend_eliminate]: 5.97001e-06 [updatestate_assign_eliminate]: 5.15999e-06 [updatestate_loads_eliminate]: 5.52996e-06 [parameter_eliminate]: 1.44006e-06 [a_2]: 0.00010692 [accelerated_algorithm]: 8.61997e-06 [shard]: 1.30001e-06 [meta_shard_fg_expand]: 2.56009e-06 [shard_inline]: 7.87003e-06 [auto_parallel]: 1.08901e-05 [parallel]: 3.68e-06 [flash_sp]: 3.26999e-06 [merge_comm]: 5.9501e-06 [allreduce_fusion]: 4.69992e-06 [matmul_add_comm_reduction]: 7.88004e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 9.44e-06 [virtual_dataset]: 8.13999e-06 [get_grad_eliminate_]: 7.28003e-06 [virtual_output]: 7.63999e-06 [merge_forward]: 4.37e-06 [cell_reuse_recompute_pass]: 1.84006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.603e-05 [before_grad]: 1.25801e-05 [inplace_validation]: 4.08001e-06 [meta_fg_expand]: 4.83997e-06 [inplace_validation_after_expand]: 5.39003e-06 [flash_sp_send_recv_attached]: 9.00007e-07 [receive_attached]: 7.79983e-07 [after_resolve]: 1.034e-05 [a_after_grad]: 1.182e-05 [special_op_eliminate]: 7.00995e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 1.02993e-06 [auto_monad_grad]: 1.40001e-06 [auto_monad_eliminator]: 1.904e-05 [cse]: 2.134e-05 [a_3]: 5.021e-05 [py_interpret_to_execute_after_opt_a]: 9.95991e-06 [slice_cell_reuse_recomputed_activation]: 2.48e-06 [rewriter_after_opt_a]: 0.00014305 [convert_after_rewriter]: 1.15e-05 [order_py_execute_after_rewriter]: 6.00005e-06 [opt_b]: 0.00024945, [1] [Cycle 1]: 0.00024414, [7] [b_1]: 0.00016791 [b_2]: 9.31008e-06 [updatestate_depend_eliminate]: 5.42996e-06 [updatestate_assign_eliminate]: 4.48001e-06 [updatestate_loads_eliminate]: 5.35999e-06 [renormalize]: 2.40048e-07 [cse]: 2.034e-05 [optimize_parallel_all_gather_comm]: 8.38994e-06 [overlap_param_gather]: 1.41002e-06 [cconv]: 2.29401e-05 [loop_unroll]: 0.000519 [opt_after_cconv]: 0.00013701, [1] [Cycle 1]: 0.00013082, [7] [c_1]: 5.44901e-05 [parameter_eliminate]: 2.42004e-06 [updatestate_depend_eliminate]: 8.61997e-06 [updatestate_assign_eliminate]: 5.40994e-06 [updatestate_loads_eliminate]: 5.22996e-06 [cse]: 2.22799e-05 [renormalize]: 4.7998e-07 [remove_dup_value]: 1.46e-05 [tuple_transform]: 6.98001e-05, [1] [Cycle 1]: 6.535e-05, [2] [d_1]: 5.637e-05 [renormalize]: 1.60071e-07 [partial_unused_args_eliminate]: 2.01992e-06 [add_cache_embedding]: 1.322e-05 [add_recomputation]: 6.50299e-05 [cse_after_recomputation]: 2.73499e-05, [1] [Cycle 1]: 2.277e-05, [1] [cse]: 1.735e-05 [environ_conv]: 7.86001e-06 [swap_dp_allreduce_reducescatter]: 7.66001e-06 [bias_add_comm_swap]: 2.71993e-06 [label_micro_interleaved_index]: 1.56998e-06 [label_fine_grained_interleaved_index]: 2.06998e-06 [merge_cast_opt]: 1.19e-06 [slice_recompute_activation]: 1.81003e-06 [micro_interleaved_order_control]: 2.22004e-06 [assign_add_opt]: 2.983e-05 [ForceFp32Comm]: 9.09902e-07 [remove_cast_before_assign_add]: 7.42998e-06 [full_micro_interleaved_order_control]: 2.50002e-06 [reorder_send_recv_between_fp_bp]: 1.91003e-06 [comm_op_add_attrs]: 2.875e-05 [add_comm_op_reuse_tag]: 1.77999e-06 [interleave_split_concat_branches]: 1.01002e-06 [interleave_parallel_branches]: 6.50063e-07 [overlap_opt_shard_in_pipeline]: 1.05007e-06 [overlap_opt_shard_grad_in_pipeline]: 1.92004e-06 [control_data_broadcast_order]: 1.24006e-06 [grouped_pairwise_exchange_alltoall]: 1.055e-05 [offloading_packed_experts]: 2.78e-06 [overlap_recompute_and_grad_model_parallel]: 2.01992e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.30041e-07 [overlap_recompute_allgather_and_fa_grad]: 6.785e-05 [overlap_grad_ring_attention]: 2.27999e-06 [overlap_grad_flash_sp]: 1.516e-05 [begin_end_overlap_inline]: 8.2003e-07 [split_matmul_comm_elemetwise]: 1.82993e-06 [split_layernorm_comm]: 1.81003e-06 [handle_group_info]: 4.92006e-06 [symbol_engine_optimizer]: 9.156e-05, [1] [Cycle 1]: 8.65801e-05, [6] [build]: 5.02006e-06 [elim_shapecalc]: 1.333e-05 [elim_not_effective]: 1.721e-05 [opt_reshape]: 9.36002e-06 [fold_const_symbol]: 1.403e-05 [renormalize]: 4.39934e-07 [pipeline_parallel_scheduler]: 1.47999e-06 [auto_monad_reorder]: 3.291e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 7.69971e-07 [eliminate_special_op_node]: 0.00052817 [distribtued_split]: 4.508e-05 [validate]: 3.52099e-05 [task_emit]: 0.0718602 [execute]: 1.07001e-05 Sums bootstrap : 0.000331s : 0.42% type_inference : 0.002740s : 3.45% auto_monad : 0.000139s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000568s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000227s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000004s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000475s : 0.60% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.06% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000136s : 0.17% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000143s : 0.18% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000168s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000519s : 0.65% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000065s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000029s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000068s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000528s : 0.67% distribtued_split : 0.000045s : 0.06% validate : 0.000035s : 0.04% task_emit : 0.071860s : 90.58% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000145 63 4.71% : 0.000007s : 2: substitution.depend_value_elim 2.12% : 0.000003s : 5: substitution.elim_not_effective 1.89% : 0.000003s : 5: substitution.fold_const_symbol 5.04% : 0.000007s : 6: substitution.graph_param_transform 52.60% : 0.000076s : 1: substitution.inline 3.85% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.00% : 0.000004s : 6: substitution.load_eliminater 2.64% : 0.000004s : 2: substitution.reduce_all_const_elim 5.91% : 0.000009s : 10: substitution.remove_not_recompute_node 2.43% : 0.000004s : 2: substitution.replace_old_param 8.40% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.42% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002708 2 88.69% : 0.002402s : 1: type_inference.infer 11.31% : 0.000306s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000075 1 100.00% : 0.000075s : 1: match.inline ------[predicate.] 0.000233 1420 0.86% : 0.000002s : 13: predicate.accumulaten_eliminater 1.16% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.75% : 0.000002s : 12: predicate.addn_check_dump 0.87% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.30% : 0.000005s : 25: predicate.arithmetic_simplify 0.96% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.57% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.22% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_depend_swap 2.01% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.17% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.42% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.13% : 0.000003s : 12: predicate.less_batch_normalization 1.75% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.49% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.84% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.53% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.69% : 0.000002s : 12: predicate.remove_not_recompute_node 1.08% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 0.96% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.00% : 0.000002s : 12: predicate.shard_identity_eliminate 1.49% : 0.000003s : 18: predicate.special_op_eliminate 0.88% : 0.000002s : 12: predicate.specialize_transform 1.08% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.93% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.36% : 0.000010s : 43: predicate.switch_simplify 0.73% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.79% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.40% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.90% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.41% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.32% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.60% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000180 4 8.89% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.11% : 0.000164s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.093298 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000070s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.16% : 0.000152s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000367s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.04% : 0.000033s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.06% : 0.000053s : 1: distribtued_split 0.58% : 0.000543s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.02% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.57% : 0.000529s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000006s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001159s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000158s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.19% : 0.005775s : 1: opt_a 0.15% : 0.000141s : 1: opt_after_cconv 0.27% : 0.000253s : 1: opt_b 8.24% : 0.007686s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.03% : 0.000024s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.28% : 0.000257s : 1: renormalize.infer 0.23% : 0.000212s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000149s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 77.05% : 0.071889s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.96% : 0.002759s : 1: type_inference 0.08% : 0.000071s : 1: validate [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:29.573.333 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:29.573.662 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:29.573.762 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:29.573.894 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:29.574.036 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:29.574.132 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:29.574.309 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:29.574.417 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.0810047, [21] [bootstrap]: 0.00029725 [type_inference]: 0.00233935 [auto_monad]: 0.00011124 [graph_reusing]: 1.81992e-06 [inline]: 1.15996e-06 [parallel-infer-symbol]: 1.42003e-06 [pre_auto_parallel]: 2.158e-05 [insert-virtual-dataset]: 2.19001e-06 [parallel-infer-symbol-second]: 3.30037e-07 [dataset_repeat_opt]: 8.2003e-07 [pipeline_split]: 1.10001e-06 [optimize]: 0.0071451, [52] [py_interpret_to_execute]: 1.37399e-05 [rewriter_before_opt_a]: 3.20401e-05 [opt_a]: 0.00531314, [2] [Cycle 1]: 0.0015068, [43] [expand_dump_flag]: 2.36998e-06 [switch_simplify]: 2.673e-05 [loop_unroll]: 1.341e-05 [a_1]: 0.00033694 [recompute_prepare]: 9.24e-06 [updatestate_depend_eliminate]: 7.81007e-06 [updatestate_assign_eliminate]: 5.30994e-06 [updatestate_loads_eliminate]: 6.41006e-06 [parameter_eliminate]: 2.33995e-06 [a_2]: 0.00011651 [accelerated_algorithm]: 8.92999e-06 [shard]: 1.83994e-06 [meta_shard_fg_expand]: 3.32005e-06 [shard_inline]: 8.57003e-06 [auto_parallel]: 1.15901e-05 [parallel]: 6.61006e-06 [flash_sp]: 8.22009e-06 [merge_comm]: 7.39994e-06 [allreduce_fusion]: 5.21005e-06 [matmul_add_comm_reduction]: 9.44e-06 [allreduce_slice_to_reducescatter]: 3.30037e-07 [virtual_shard_identity]: 1.044e-05 [virtual_dataset]: 8.30006e-06 [get_grad_eliminate_]: 8.56002e-06 [virtual_output]: 8.05e-06 [merge_forward]: 5.69003e-06 [cell_reuse_recompute_pass]: 1.50001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.718e-05 [before_grad]: 1.34e-05 [inplace_validation]: 4.94998e-06 [meta_fg_expand]: 5.72007e-06 [inplace_validation_after_expand]: 5.48002e-06 [flash_sp_send_recv_attached]: 3.82005e-06 [receive_attached]: 1.64006e-06 [after_resolve]: 1.172e-05 [a_after_grad]: 1.282e-05 [special_op_eliminate]: 8.27992e-06 [renormalize]: 0.0004338 [add_forward_monad_depend]: 2.70002e-06 [auto_monad_grad]: 1.54995e-06 [auto_monad_eliminator]: 2.53899e-05 [cse]: 2.616e-05 [a_3]: 5.89699e-05 [Cycle 2]: 0.00080216, [43] [expand_dump_flag]: 9.29926e-07 [switch_simplify]: 9.31008e-06 [loop_unroll]: 7.37002e-06 [a_1]: 0.00020772 [recompute_prepare]: 7.40995e-06 [updatestate_depend_eliminate]: 5.62996e-06 [updatestate_assign_eliminate]: 4.97e-06 [updatestate_loads_eliminate]: 5.08002e-06 [parameter_eliminate]: 1.15007e-06 [a_2]: 0.00010554 [accelerated_algorithm]: 8.89995e-06 [shard]: 1.07998e-06 [meta_shard_fg_expand]: 2.79001e-06 [shard_inline]: 8.21997e-06 [auto_parallel]: 1.03e-05 [parallel]: 3.06999e-06 [flash_sp]: 2.68e-06 [merge_comm]: 6.32997e-06 [allreduce_fusion]: 5.29992e-06 [matmul_add_comm_reduction]: 7.48003e-06 [allreduce_slice_to_reducescatter]: 2.60072e-07 [virtual_shard_identity]: 9.80997e-06 [virtual_dataset]: 8.23999e-06 [get_grad_eliminate_]: 7.91997e-06 [virtual_output]: 7.32997e-06 [merge_forward]: 4.74998e-06 [cell_reuse_recompute_pass]: 1.75997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.629e-05 [before_grad]: 1.256e-05 [inplace_validation]: 4.59002e-06 [meta_fg_expand]: 4.90993e-06 [inplace_validation_after_expand]: 4.95999e-06 [flash_sp_send_recv_attached]: 8.69972e-07 [receive_attached]: 7.60076e-07 [after_resolve]: 9.99996e-06 [a_after_grad]: 1.221e-05 [special_op_eliminate]: 7.83999e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 8.60076e-07 [auto_monad_grad]: 9.79984e-07 [auto_monad_eliminator]: 1.679e-05 [cse]: 1.861e-05 [a_3]: 5.021e-05 [py_interpret_to_execute_after_opt_a]: 9.34e-06 [slice_cell_reuse_recomputed_activation]: 1.59e-06 [rewriter_after_opt_a]: 0.0001278 [convert_after_rewriter]: 1.067e-05 [order_py_execute_after_rewriter]: 5.48002e-06 [opt_b]: 0.00024685, [1] [Cycle 1]: 0.00024165, [7] [b_1]: 0.00016573 [b_2]: 1.043e-05 [updatestate_depend_eliminate]: 5.1799e-06 [updatestate_assign_eliminate]: 4.65999e-06 [updatestate_loads_eliminate]: 4.99003e-06 [renormalize]: 3.49944e-07 [cse]: 1.838e-05 [optimize_parallel_all_gather_comm]: 7.80995e-06 [overlap_param_gather]: 6.3004e-07 [cconv]: 1.58e-05 [loop_unroll]: 0.00054247 [opt_after_cconv]: 0.0001314, [1] [Cycle 1]: 0.00012546, [7] [c_1]: 5.28799e-05 [parameter_eliminate]: 1.69e-06 [updatestate_depend_eliminate]: 7.61996e-06 [updatestate_assign_eliminate]: 4.83007e-06 [updatestate_loads_eliminate]: 5.13997e-06 [cse]: 1.97501e-05 [renormalize]: 3.20026e-07 [remove_dup_value]: 1.05699e-05 [tuple_transform]: 7.02799e-05, [1] [Cycle 1]: 6.611e-05, [2] [d_1]: 5.61699e-05 [renormalize]: 2.00002e-07 [partial_unused_args_eliminate]: 1.66998e-06 [add_cache_embedding]: 1.19599e-05 [add_recomputation]: 5.434e-05 [cse_after_recomputation]: 2.622e-05, [1] [Cycle 1]: 2.17999e-05, [1] [cse]: 1.642e-05 [environ_conv]: 6.84999e-06 [swap_dp_allreduce_reducescatter]: 7.37002e-06 [bias_add_comm_swap]: 1.49e-06 [label_micro_interleaved_index]: 1.89e-06 [label_fine_grained_interleaved_index]: 1.22003e-06 [merge_cast_opt]: 7.60076e-07 [slice_recompute_activation]: 1.11992e-06 [micro_interleaved_order_control]: 1.33994e-06 [assign_add_opt]: 2.71e-05 [ForceFp32Comm]: 6.6997e-07 [remove_cast_before_assign_add]: 6.96001e-06 [full_micro_interleaved_order_control]: 1.30001e-06 [reorder_send_recv_between_fp_bp]: 1.73994e-06 [comm_op_add_attrs]: 2.33001e-05 [add_comm_op_reuse_tag]: 1.72004e-06 [interleave_split_concat_branches]: 5.20027e-07 [interleave_parallel_branches]: 7.10017e-07 [overlap_opt_shard_in_pipeline]: 6.3004e-07 [overlap_opt_shard_grad_in_pipeline]: 1.42003e-06 [control_data_broadcast_order]: 7.29924e-07 [grouped_pairwise_exchange_alltoall]: 6.40994e-06 [offloading_packed_experts]: 1.34995e-06 [overlap_recompute_and_grad_model_parallel]: 1.40001e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.10016e-07 [overlap_recompute_allgather_and_fa_grad]: 6.48999e-05 [overlap_grad_ring_attention]: 1.51002e-06 [overlap_grad_flash_sp]: 1.26499e-05 [begin_end_overlap_inline]: 5.20027e-07 [split_matmul_comm_elemetwise]: 1.25996e-06 [split_layernorm_comm]: 1.23004e-06 [handle_group_info]: 2.82994e-06 [symbol_engine_optimizer]: 9.219e-05, [1] [Cycle 1]: 8.74101e-05, [6] [build]: 4.70004e-06 [elim_shapecalc]: 1.29e-05 [elim_not_effective]: 1.655e-05 [opt_reshape]: 8.84e-06 [fold_const_symbol]: 1.43199e-05 [renormalize]: 2.20025e-07 [pipeline_parallel_scheduler]: 9.59961e-07 [auto_monad_reorder]: 2.445e-05 [get_jit_bprop_graph]: 3.59956e-07 [rewriter_after_jit_bprop_graph]: 3.20026e-07 [eliminate_special_op_node]: 0.00051199 [distribtued_split]: 3.43899e-05 [validate]: 2.94499e-05 [task_emit]: 0.0702348 [execute]: 9.04e-06 Sums bootstrap : 0.000297s : 0.39% type_inference : 0.002339s : 3.04% auto_monad : 0.000111s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000545s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000222s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000434s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000042s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000128s : 0.17% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000166s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000542s : 0.70% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000027s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000023s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000065s : 0.08% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000512s : 0.67% distribtued_split : 0.000034s : 0.04% validate : 0.000029s : 0.04% task_emit : 0.070235s : 91.24% execute : 0.000009s : 0.01% TotalTime = 0.0811346, [21] [bootstrap]: 0.00031763 [type_inference]: 0.00257863 [auto_monad]: 0.0001337 [graph_reusing]: 2.64996e-06 [inline]: 1.54995e-06 [parallel-infer-symbol]: 2.35008e-06 [pre_auto_parallel]: 2.56799e-05 [insert-virtual-dataset]: 2.56009e-06 [parallel-infer-symbol-second]: 3.89991e-07 [dataset_repeat_opt]: 1.49e-06 [pipeline_split]: 1.60001e-06 [optimize]: 0.00746773, [52] [py_interpret_to_execute]: 1.646e-05 [rewriter_before_opt_a]: 3.491e-05 [opt_a]: 0.00548015, [2] [Cycle 1]: 0.00156657, [43] [expand_dump_flag]: 3.69002e-06 [switch_simplify]: 3.06901e-05 [loop_unroll]: 1.38299e-05 [a_1]: 0.00034793 [recompute_prepare]: 9.55991e-06 [updatestate_depend_eliminate]: 8.10996e-06 [updatestate_assign_eliminate]: 5.7799e-06 [updatestate_loads_eliminate]: 5.74999e-06 [parameter_eliminate]: 3.39001e-06 [a_2]: 0.0001191 [accelerated_algorithm]: 9.15001e-06 [shard]: 1.91003e-06 [meta_shard_fg_expand]: 3.62995e-06 [shard_inline]: 8.56991e-06 [auto_parallel]: 1.18599e-05 [parallel]: 6.98003e-06 [flash_sp]: 1.102e-05 [merge_comm]: 7.6599e-06 [allreduce_fusion]: 5.68992e-06 [matmul_add_comm_reduction]: 1.143e-05 [allreduce_slice_to_reducescatter]: 3.50061e-07 [virtual_shard_identity]: 9.78005e-06 [virtual_dataset]: 8.18004e-06 [get_grad_eliminate_]: 8.41008e-06 [virtual_output]: 7.97003e-06 [merge_forward]: 4.80004e-06 [cell_reuse_recompute_pass]: 2.12004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.672e-05 [before_grad]: 1.39e-05 [inplace_validation]: 4.9799e-06 [meta_fg_expand]: 5.76e-06 [inplace_validation_after_expand]: 6.80995e-06 [flash_sp_send_recv_attached]: 4.54998e-06 [receive_attached]: 2.75997e-06 [after_resolve]: 1.102e-05 [a_after_grad]: 1.26701e-05 [special_op_eliminate]: 7.59994e-06 [renormalize]: 0.00045847 [add_forward_monad_depend]: 3.53996e-06 [auto_monad_grad]: 1.87999e-06 [auto_monad_eliminator]: 3.24701e-05 [cse]: 3.23399e-05 [a_3]: 6.012e-05 [Cycle 2]: 0.00079692, [43] [expand_dump_flag]: 1.19e-06 [switch_simplify]: 9.17003e-06 [loop_unroll]: 8.19005e-06 [a_1]: 0.00021394 [recompute_prepare]: 7.36001e-06 [updatestate_depend_eliminate]: 6.07001e-06 [updatestate_assign_eliminate]: 5.29992e-06 [updatestate_loads_eliminate]: 5.29992e-06 [parameter_eliminate]: 1.25996e-06 [a_2]: 0.00010695 [accelerated_algorithm]: 8.35001e-06 [shard]: 1.22003e-06 [meta_shard_fg_expand]: 2.88e-06 [shard_inline]: 7.97003e-06 [auto_parallel]: 1.11701e-05 [parallel]: 3.43006e-06 [flash_sp]: 2.42994e-06 [merge_comm]: 5.91995e-06 [allreduce_fusion]: 4.99003e-06 [matmul_add_comm_reduction]: 7.63999e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 9.26002e-06 [virtual_dataset]: 7.58003e-06 [get_grad_eliminate_]: 7.39004e-06 [virtual_output]: 9.74e-06 [merge_forward]: 4.92006e-06 [cell_reuse_recompute_pass]: 1.94996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.59401e-05 [before_grad]: 1.27599e-05 [inplace_validation]: 4.20993e-06 [meta_fg_expand]: 4.94998e-06 [inplace_validation_after_expand]: 5.46e-06 [flash_sp_send_recv_attached]: 8.801e-07 [receive_attached]: 7.89994e-07 [after_resolve]: 9.82999e-06 [a_after_grad]: 1.21801e-05 [special_op_eliminate]: 7.43009e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 8.60076e-07 [auto_monad_grad]: 1.15996e-06 [auto_monad_eliminator]: 1.82e-05 [cse]: 2.02e-05 [a_3]: 4.821e-05 [py_interpret_to_execute_after_opt_a]: 9.28005e-06 [slice_cell_reuse_recomputed_activation]: 1.27999e-06 [rewriter_after_opt_a]: 0.00014924 [convert_after_rewriter]: 1.04801e-05 [order_py_execute_after_rewriter]: 5.88002e-06 [opt_b]: 0.00044337, [1] [Cycle 1]: 0.00043745, [7] [b_1]: 0.00035719 [b_2]: 1.03599e-05 [updatestate_depend_eliminate]: 5.80004e-06 [updatestate_assign_eliminate]: 4.43996e-06 [updatestate_loads_eliminate]: 5.46e-06 [renormalize]: 3.89991e-07 [cse]: 2.157e-05 [optimize_parallel_all_gather_comm]: 8.32998e-06 [overlap_param_gather]: 6.89994e-07 [cconv]: 1.47499e-05 [loop_unroll]: 0.00050973 [opt_after_cconv]: 0.00013647, [1] [Cycle 1]: 0.00013051, [7] [c_1]: 5.52201e-05 [parameter_eliminate]: 2.52004e-06 [updatestate_depend_eliminate]: 7.90996e-06 [updatestate_assign_eliminate]: 5.02996e-06 [updatestate_loads_eliminate]: 5.48002e-06 [cse]: 2.28201e-05 [renormalize]: 4.60073e-07 [remove_dup_value]: 9.50997e-06 [tuple_transform]: 6.906e-05, [1] [Cycle 1]: 6.487e-05, [2] [d_1]: 5.526e-05 [renormalize]: 2.20025e-07 [partial_unused_args_eliminate]: 1.60991e-06 [add_cache_embedding]: 1.107e-05 [add_recomputation]: 5.136e-05 [cse_after_recomputation]: 2.723e-05, [1] [Cycle 1]: 2.288e-05, [1] [cse]: 1.765e-05 [environ_conv]: 5.82996e-06 [swap_dp_allreduce_reducescatter]: 7.12997e-06 [bias_add_comm_swap]: 1.27999e-06 [label_micro_interleaved_index]: 9.70089e-07 [label_fine_grained_interleaved_index]: 1.16997e-06 [merge_cast_opt]: 5.19911e-07 [slice_recompute_activation]: 6.79982e-07 [micro_interleaved_order_control]: 8.49948e-07 [assign_add_opt]: 2.41899e-05 [ForceFp32Comm]: 4.89992e-07 [remove_cast_before_assign_add]: 5.57001e-06 [full_micro_interleaved_order_control]: 9.19914e-07 [reorder_send_recv_between_fp_bp]: 8.40053e-07 [comm_op_add_attrs]: 2.15099e-05 [add_comm_op_reuse_tag]: 1.45996e-06 [interleave_split_concat_branches]: 5.10016e-07 [interleave_parallel_branches]: 4.50062e-07 [overlap_opt_shard_in_pipeline]: 5.89993e-07 [overlap_opt_shard_grad_in_pipeline]: 8.69972e-07 [control_data_broadcast_order]: 5.50062e-07 [grouped_pairwise_exchange_alltoall]: 6.21995e-06 [offloading_packed_experts]: 1.01002e-06 [overlap_recompute_and_grad_model_parallel]: 8.69972e-07 [overlap_grad_matmul_and_grad_allreduce]: 4.89992e-07 [overlap_recompute_allgather_and_fa_grad]: 5.362e-05 [overlap_grad_ring_attention]: 1.06997e-06 [overlap_grad_flash_sp]: 1.183e-05 [begin_end_overlap_inline]: 4.20026e-07 [split_matmul_comm_elemetwise]: 9.89996e-07 [split_layernorm_comm]: 1.01002e-06 [handle_group_info]: 2.72004e-06 [symbol_engine_optimizer]: 9.00701e-05, [1] [Cycle 1]: 8.51901e-05, [6] [build]: 3.97e-06 [elim_shapecalc]: 1.388e-05 [elim_not_effective]: 1.691e-05 [opt_reshape]: 9.08994e-06 [fold_const_symbol]: 1.419e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 7.59959e-07 [auto_monad_reorder]: 2.23001e-05 [get_jit_bprop_graph]: 2.79979e-07 [rewriter_after_jit_bprop_graph]: 2.5006e-07 [eliminate_special_op_node]: 0.0005329 [distribtued_split]: 3.38501e-05 [validate]: 3.10401e-05 [task_emit]: 0.0697171 [execute]: 1.155e-05 Sums bootstrap : 0.000318s : 0.41% type_inference : 0.002579s : 3.35% auto_monad : 0.000134s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000562s : 0.73% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000226s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000459s : 0.60% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000149s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000357s : 0.46% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_d Time group info: ------[substitution.] 0.000116 63 4.38% : 0.000005s : 2: substitution.depend_value_elim 1.98% : 0.000002s : 5: substitution.elim_not_effective 1.81% : 0.000002s : 5: substitution.fold_const_symbol 5.70% : 0.000007s : 6: substitution.graph_param_transform 49.82% : 0.000058s : 1: substitution.inline 4.20% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.53% : 0.000004s : 6: substitution.load_eliminater 2.37% : 0.000003s : 2: substitution.reduce_all_const_elim 6.76% : 0.000008s : 10: substitution.remove_not_recompute_node 2.57% : 0.000003s : 2: substitution.replace_old_param 9.18% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.71% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002315 2 89.42% : 0.002070s : 1: type_inference.infer 10.58% : 0.000245s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000057 1 100.00% : 0.000057s : 1: match.inline ------[predicate.] 0.000234 1420 0.88% : 0.000002s : 13: predicate.accumulaten_eliminater 1.08% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.80% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.23% : 0.000005s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.83% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.55% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.28% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.91% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.23% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_depend_swap 1.92% : 0.000005s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.91% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.31% : 0.000012s : 63: predicate.inline 0.95% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.17% : 0.000003s : 12: predicate.less_batch_normalization 1.83% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.36% : 0.000006s : 38: predicate.load_eliminater 1.50% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.80% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.80% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicatepend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000022s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000510s : 0.66% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000051s : 0.07% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000024s : 0.03% optimize.ForceFp32Comm : 0.000000s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000022s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000054s : 0.07% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% e.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.51% : 0.000001s : 6: predicate.parallel_virtual_node 1.25% : 0.000003s : 14: predicate.partial_defer_inline 1.16% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 1.18% : 0.000003s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.06% : 0.000002s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.89% : 0.000002s : 13: predicate.reshape_eliminate 0.76% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.13% : 0.000003s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.99% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.12% : 0.000010s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.80% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.62% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.60% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.87% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000144 4 7.20% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.80% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089975 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000059s : 1: add_recomputation 0.03% : 0.000031s : 1: assign_add_opt 0.14% : 0.000123s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.36% : 0.000322s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.03% : 0.000027s : 1: comm_op_add_attrs 0.01% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.00004 optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000022s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000533s : 0.69% distribtued_split : 0.000034s : 0.04% validate : 0.000031s : 0.04% task_emit : 0.069717s : 90.53% execute : 0.000012s : 0.01% 2s : 1: distribtued_split 0.58% : 0.000526s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000009s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.61% : 0.000552s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001106s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000155s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 5.91% : 0.005317s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.28% : 0.000250s : 1: opt_b 7.95% : 0.007154s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000070s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000028s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.26% : 0.000233s : 1: renormalize.infer 0.22% : 0.000196s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000134s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000095s : 1: symbol_engine_optimizer 78.09% : 0.070261s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.62% : 0.002356s : 1: type_inference 0.07% : 0.000062s : 1: validate TotalTime = 0.0814861, [21] [bootstrap]: 0.00031759 [type_inference]: 0.00257838 [auto_monad]: 0.00013486 [graph_reusing]: 1.41002e-06 [inline]: 1.64006e-06 [parallel-infer-symbol]: 2.19001e-06 [pre_auto_parallel]: 2.55001e-05 [insert-virtual-dataset]: 2.74007e-06 [parallel-infer-symbol-second]: 4.30038e-07 [dataset_repeat_opt]: 1.05007e-06 [pipeline_split]: 1.55997e-06 [optimize]: 0.00740416, [52] [py_interpret_to_execute]: 1.631e-05 [rewriter_before_opt_a]: 3.495e-05 [opt_a]: 0.00547188, [2] [Cycle 1]: 0.00155858, [43] [expand_dump_flag]: 1.9999e-06 [switch_simplify]: 2.712e-05 [loop_unroll]: 1.37e-05 [a_1]: 0.00034619 [recompute_prepare]: 8.89995e-06 [updatestate_depend_eliminate]: 9.05001e-06 [updatestate_assign_eliminate]: 5.83997e-06 [updatestate_loads_eliminate]: 7.31996e-06 [parameter_eliminate]: 2.88e-06 [a_2]: 0.0001195 [accelerated_algorithm]: 8.99995e-06 [shard]: 1.80991e-06 [meta_shard_fg_expand]: 3.21004e-06 [shard_inline]: 8.89006e-06 [auto_parallel]: 1.265e-05 [parallel]: 5.74999e-06 [flash_sp]: 1.06799e-05 [merge_comm]: 7.81997e-06 [allreduce_fusion]: 5.23007e-06 [matmul_add_comm_reduction]: 1.075e-05 [allreduce_slice_to_reducescatter]: 5.19911e-07 [virtual_shard_identity]: 9.97004e-06 [virtual_dataset]: 8.27003e-06 [get_grad_eliminate_]: 7.86991e-06 [virtual_output]: 7.63009e-06 [merge_forward]: 6.10994e-06 [cell_reuse_recompute_pass]: 1.65997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.741e-05 [before_grad]: 1.37601e-05 [inplace_validation]: 5.08002e-06 [meta_fg_expand]: 5.25999e-06 [inplace_validation_after_expand]: 6.57002e-06 [flash_sp_send_recv_attached]: 4.92996e-06 [receive_attached]: 2.68989e-06 [after_resolve]: 1.083e-05 [a_after_grad]: 1.29699e-05 [special_op_eliminate]: 8.01997e-06 [renormalize]: 0.00045845 [add_forward_monad_depend]: 3.54997e-06 [auto_monad_grad]: 1.34006e-06 [auto_monad_eliminator]: 3.14701e-05 [cse]: 3.26199e-05 [a_3]: 5.912e-05 [Cycle 2]: 0.0007899, [43] [expand_dump_flag]: 9.79984e-07 [switch_simplify]: 9.09006e-06 [loop_unroll]: 7.68004e-06 [a_1]: 0.00020513 [recompute_prepare]: 7.12997e-06 [updatestate_depend_eliminate]: 5.86e-06 [updatestate_assign_eliminate]: 5.08002e-06 [updatestate_loads_eliminate]: 5.33997e-06 [parameter_eliminate]: 1.22003e-06 [a_2]: 0.00010854 [accelerated_algorithm]: 8.72998e-06 [shard]: 1.16008e-06 [meta_shard_fg_expand]: 2.53995e-06 [shard_inline]: 8.30996e-06 [auto_parallel]: 1.05399e-05 [parallel]: 3.22994e-06 [flash_sp]: 3.40003e-06 [merge_comm]: 5.58991e-06 [allreduce_fusion]: 4.65999e-06 [matmul_add_comm_reduction]: 8.00996e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 9.20007e-06 [virtual_dataset]: 7.82998e-06 [get_grad_eliminate_]: 7.07002e-06 [virtual_output]: 1.001e-05 [merge_forward]: 5.04998e-06 [cell_reuse_recompute_pass]: 1.65997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.58501e-05 [before_grad]: 1.266e-05 [inplace_validation]: 5.00993e-06 [meta_fg_expand]: 4.54998e-06 [inplace_validation_after_expand]: 5.32996e-06 [flash_sp_send_recv_attached]: 8.40053e-07 [receive_attached]: 8.69972e-07 [after_resolve]: 9.65002e-06 [a_after Time group info: ------[substitution.] 0.000125 63 5.01% : 0.000006s : 2: substitution.depend_value_elim 1.81% : 0.000002s : 5: substitution.elim_not_effective 1.61% : 0.000002s : 5: substitution.fold_const_symbol 4.91% : 0.000006s : 6: substitution.graph_param_transform 51.91% : 0.000065s : 1: substitution.inline 3.99% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.41% : 0.000004s : 6: substitution.load_eliminater 2.46% : 0.000003s : 2: substitution.reduce_all_const_elim 6.22% : 0.000008s : 10: substitution.remove_not_recompute_node 2.02% : 0.000003s : 2: substitution.replace_old_param 8.64% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.02% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002548 2 88.65% : 0.002259s : 1: type_inference.infer 11.35% : 0.000289s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000427 1420 0.46% : 0.000002s : 13: predicate.accumulaten_eliminater 0.70% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.38% : 0.000002s : 12: predicate.addn_check_dump 0.50% : 0.000002s : 13: predicate.addn_zero_filter 0.45% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.28% : 0.000005s : 25: predicate.arithmetic_simplify 0.45% : 0.000002s : 13: predicate.cast_eliminate 0.43% : 0.000002s : 12: predicate.check_bprop_eliminate 0.41% : 0.000002s : 12: predicate.compare_switch_simplify 0.12% : 0.000000s : 6: predicate.const_output_eliminate 0.27% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 0.82% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.42% : 0.000002s : 12: predicate.depend_value_elim 0.44% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.48% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.53% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.17% : 0.000001s : 6: predicate.elim_not_effective 0.33% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 0.62% : 0.000003s : 19: predicate.environ_add_const_eliminate 0.65% : 0.000003s : 19: predicate.environ_get_add_eliminate 0.65% : 0.000003s : 19: predicate.environ_get_depend_swap 1.07% : 0.000005s : 31: predicate.environ_get_eliminate 0.64% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.45% : 0.000002s : 14: predicate.exchange_switch_depend_value 0.74% : 0.000003s : 14: predicate.float_depend_g_call 0.41% : 0.000002s : 12: predicate.float_environ_get_switch 0.61% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.12% : 0.000001s : 6: predicate.fold_const_symbol 0.46% : 0.000002s : 12: predicate.get_grad_eliminate 0.15% : 0.000001s : 6: predicate.graph_param_transform 0.41% : 0.000002s : 12: predicate.incorporate_call 0.36% : 0.000002s : 12: predicate.incorporate_call_switch 3.24% : 0.000014s : 63: predicate.inline 0.58% : 0.000002s : 12: predicate.inline_without_move 0.21% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.58% : 0.000002s : 12: predicate.less_batch_normalization 0.90% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 1.35% : 0.000006s : 38: predicate.load_eliminater 0.72% : 0.000003s : 6: predicate.loop_unroll_after_grad 0.68% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.00% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.43% : 0.000002s : 12: predicate.merge_addn 0.52% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.45% : 0.000002s : 12: predicat_grad]: 1.20299e-05 [special_op_eliminate]: 7.52998e-06 [renormalize]: 1.10012e-07 [add_forward_monad_depend]: 1.09e-06 [auto_monad_grad]: 1.13004e-06 [auto_monad_eliminator]: 1.79501e-05 [cse]: 1.929e-05 [a_3]: 5.02199e-05 [py_interpret_to_execute_after_opt_a]: 9.57993e-06 [slice_cell_reuse_recomputed_activation]: 2.54007e-06 [rewriter_after_opt_a]: 0.00015353 [convert_after_rewriter]: 1.371e-05 [order_py_execute_after_rewriter]: 6.28002e-06 [opt_b]: 0.00029053, [1] [Cycle 1]: 0.00028538, [7] [b_1]: 0.00020873 [b_2]: 1.02801e-05 [updatestate_depend_eliminate]: 5.63008e-06 [updatestate_assign_eliminate]: 4.61994e-06 [updatestate_loads_eliminate]: 5.4799e-06 [renormalize]: 3.50061e-07 [cse]: 1.92299e-05 [optimize_parallel_all_gather_comm]: 9.58005e-06 [overlap_param_gather]: 1.11002e-06 [cconv]: 2.434e-05 [loop_unroll]: 0.0004899 [opt_after_cconv]: 0.00013503, [1] [Cycle 1]: 0.00012894, [7] [c_1]: 5.28999e-05 [parameter_eliminate]: 2.25997e-06 [updatestate_depend_eliminate]: 8.39995e-06 [updatestate_assign_eliminate]: 5.04998e-06 [updatestate_loads_eliminate]: 5.87001e-06 [cse]: 2.14e-05 [renormalize]: 4.69969e-07 [remove_dup_value]: 1.33901e-05 [tuple_transform]: 7.082e-05, [1] [Cycle 1]: 6.633e-05, [2] [d_1]: 5.666e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 2.40002e-06 [add_cache_embedding]: 1.26699e-05 [add_recomputation]: 6.538e-05 [cse_after_recomputation]: 2.63801e-05, [1] [Cycle 1]: 2.158e-05, [1] [cse]: 1.65299e-05 [environ_conv]: 7.55e-06 [swap_dp_allreduce_reducescatter]: 7.40006e-06 [bias_add_comm_swap]: 2.35008e-06 [label_micro_interleaved_index]: 2.04996e-06 [label_fine_grained_interleaved_index]: 2.04996e-06 [merge_cast_opt]: 1.10001e-06 [slice_recompute_activation]: 1.84006e-06 [micro_interleaved_order_control]: 2.11003e-06 [assign_add_opt]: 2.89e-05 [ForceFp32Comm]: 8.69972e-07 [remove_cast_before_assign_add]: 7.26001e-06 [full_micro_interleaved_order_control]: 1.90001e-06 [reorder_send_recv_between_fp_bp]: 2.06998e-06 [comm_op_add_attrs]: 2.82701e-05 [add_comm_op_reuse_tag]: 1.96998e-06 [interleave_split_concat_branches]: 1.03994e-06 [interleave_parallel_branches]: 8.2003e-07 [overlap_opt_shard_in_pipeline]: 1.07998e-06 [overlap_opt_shard_grad_in_pipeline]: 2.43995e-06 [control_data_broadcast_order]: 1.22003e-06 [grouped_pairwise_exchange_alltoall]: 8.42998e-06 [offloading_packed_experts]: 1.94006e-06 [overlap_recompute_and_grad_model_parallel]: 2.05007e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.69971e-07 [overlap_recompute_allgather_and_fa_grad]: 8.655e-05 [overlap_grad_ring_attention]: 1.93994e-06 [overlap_grad_flash_sp]: 1.488e-05 [begin_end_overlap_inline]: 7.50064e-07 [split_matmul_comm_elemetwise]: 1.90001e-06 [split_layernorm_comm]: 1.67999e-06 [handle_group_info]: 5.07e-06 [symbol_engine_optimizer]: 9.313e-05, [1] [Cycle 1]: 8.84e-05, [6] [build]: 4.80993e-06 [elim_shapecalc]: 1.424e-05 [elim_not_effective]: 1.72299e-05 [opt_reshape]: 9.12999e-06 [fold_const_symbol]: 1.513e-05 [renormalize]: 3.70084e-07 [pipeline_parallel_scheduler]: 1.59e-06 [auto_monad_reorder]: 3.171e-05 [get_jit_bprop_graph]: 7.90111e-07 [rewriter_after_jit_bprop_graph]: 4.49945e-07 [eliminate_special_op_node]: 0.00051323 [distribtued_split]: 3.968e-05 [validate]: 3.491e-05 [task_emit]: 0.0701419 [execute]: 8.71997e-06 Sums bootstrap : 0.000318s : 0.41% type_inference : 0.002578s : 3.33% auto_monad : 0.000135s : 0.17%e.mini_step_allgather_replace 0.42% : 0.000002s : 13: predicate.minmaximum_grad 0.37% : 0.000002s : 6: predicate.mutable_eliminate 0.24% : 0.000001s : 6: predicate.opt_reshape 0.26% : 0.000001s : 6: predicate.parallel_virtual_node 0.66% : 0.000003s : 14: predicate.partial_defer_inline 0.70% : 0.000003s : 19: predicate.partial_eliminate 0.45% : 0.000002s : 13: predicate.print_const_string_wrapper 0.43% : 0.000002s : 12: predicate.reduce_all_const_elim 0.60% : 0.000003s : 13: predicate.reduce_eliminate 0.29% : 0.000001s : 12: predicate.remove_not_recompute_node 0.60% : 0.000003s : 25: predicate.replace_applicator 0.27% : 0.000001s : 12: predicate.replace_old_param 0.13% : 0.000001s : 6: predicate.reset_defer_inline 0.49% : 0.000002s : 13: predicate.reshape_eliminate 0.44% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.26% : 0.000001s : 6: predicate.row_tensor_eliminate 0.54% : 0.000002s : 12: predicate.same_eliminate 0.25% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.52% : 0.000002s : 12: predicate.shard_identity_eliminate 0.77% : 0.000003s : 18: predicate.special_op_eliminate 0.51% : 0.000002s : 12: predicate.specialize_transform 0.56% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.55% : 0.000002s : 12: predicate.stack_unstack_eliminate 1.24% : 0.000005s : 38: predicate.stopgrad_eliminater 0.23% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.52% : 0.000002s : 14: predicate.switch_defer_inline 0.88% : 0.000004s : 26: predicate.switch_layer_defer_inline 2.55% : 0.000011s : 43: predicate.switch_simplify 0.43% : 0.000002s : 13: predicate.tile_eliminate 0.54% : 0.000002s : 13: predicate.transpose_eliminate 0.98% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 0.94% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 0.85% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 46.05% : 0.000197s : 37: predicate.tuple_list_get_item_eliminator 0.87% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 1.43% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 0.93% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 1.30% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 1.92% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.27% : 0.000001s : 6: predicate.value_based_eliminate 0.44% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.43% : 0.000002s : 12: predicate.virtual_output_eliminate 0.27% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000153 4 6.01% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.99% : 0.000144s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090665 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.06% : 0.000056s : 1: add_recomputation 0.03% : 0.000028s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.38% : 0.000346s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.03% : 0.000026s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.00004 graph_reusing : 0.000001s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000551s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000228s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000018s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000459s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000154s : 0.20% optimize.convert_after_rewriter : 0.000014s : 0.02% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000209s : 0.27% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_de1s : 1: distribtued_split 0.60% : 0.000547s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000009s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.57% : 0.000519s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001128s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.38% : 0.000346s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.05% : 0.005484s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.49% : 0.000447s : 1: opt_b 8.25% : 0.007477s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.07% : 0.000059s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000005s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.28% : 0.000253s : 1: renormalize.infer 0.22% : 0.000199s : 1: renormalize.specialize 0.00% : 0.000003s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000155s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 76.93% : 0.069750s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.87% : 0.002598s : 1: type_inference 0.07% : 0.000063s : 1: validate pend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000010s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000490s : 0.63% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000065s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000087s : 0.11% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000513s : 0.66% distribtued_split : 0.000040s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.070142s : 90.67% execute : 0.000009s : 0.01% TotalTime = 0.0818801, [21] [bootstrap]: 0.00030128 [type_inference]: 0.0024839 [auto_monad]: 0.00012494 [graph_reusing]: 2.33005e-06 [inline]: 1.41992e-06 [parallel-infer-symbol]: 2.36998e-06 [pre_auto_parallel]: 2.398e-05 [insert-virtual-dataset]: 2.27999e-06 [parallel-infer-symbol-second]: 4.10015e-07 [dataset_repeat_opt]: 1.29e-06 [pipeline_split]: 1.50001e-06 [optimize]: 0.00737689, [52] [py_interpret_to_execute]: 1.554e-05 [rewriter_before_opt_a]: 3.53999e-05 [opt_a]: 0.00546016, [2] [Cycle 1]: 0.00157419, [43] [expand_dump_flag]: 3.25008e-06 [switch_simplify]: 2.98e-05 [loop_unroll]: 1.341e-05 [a_1]: 0.00035748 [recompute_prepare]: 8.78994e-06 [updatestate_depend_eliminate]: 8.11997e-06 [updatestate_assign_eliminate]: 6.48992e-06 [updatestate_loads_eliminate]: 7.75e-06 [parameter_eliminate]: 2.98e-06 [a_2]: 0.00012202 [accelerated_algorithm]: 8.61008e-06 [shard]: 2.46998e-06 [meta_shard_fg_expand]: 3.79002e-06 [shard_inline]: 8.76002e-06 [auto_parallel]: 1.218e-05 [parallel]: 6.99004e-06 [flash_sp]: 1.012e-05 [merge_comm]: 8.05e-06 [allreduce_fusion]: 5.47001e-06 [matmul_add_comm_reduction]: 1.09901e-05 [allreduce_slice_to_reducescatter]: 8.30041e-07 [virtual_shard_identity]: 1.00899e-05 [virtual_dataset]: 8.46002e-06 [get_grad_eliminate_]: 7.88004e-06 [virtual_output]: 7.71997e-06 [merge_forward]: 6.27001e-06 [cell_reuse_recompute_pass]: 1.74996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.725e-05 [before_grad]: 1.48e-05 [inplace_validation]: 4.82006e-06 [meta_fg_expand]: 5.56e-06 [inplace_validation_after_expand]: 6.29993e-06 [flash_sp_send_recv_attached]: 4.35999e-06 [receive_attached]: 2.58e-06 [after_resolve]: 1.156e-05 [a_after_grad]: 1.312e-05 [special_op_eliminate]: 8.26002e-06 [renormalize]: 0.00045067 [add_forward_monad_depend]: 3.74997e-06 [auto_monad_grad]: 1.99e-06 [auto_monad_eliminator]: 3.171e-05 [cse]: 3.202e-05 [a_3]: 6.023e-05 [Cycle 2]: 0.00079726, [43] [expand_dump_flag]: 1.02003e-06 [switch_simplify]: 9.15001e-06 [loop_unroll]: 7.95e-06 [a_1]: 0.00020973 [recompute_prepare]: 7.55e-06 [updatestate_depend_eliminate]: 5.86e-06 [updatestate_assign_eliminate]: 5.20004e-06 [updatestate_loads_eliminate]: 5.60004e-06 [parameter_eliminate]: 1.29e-06 [a_2]: 0.00010813 [accelerated_algorithm]: 8.41008e-06 [shard]: 1.15007e-06 [meta_shard_fg_expand]: 2.48e-06 [shard_inline]: 8.31997e-06 [auto_parallel]: 1.116e-05 [parallel]: 3.90003e-06 [flash_sp]: 3.36999e-06 [merge_comm]: 5.92996e-06 [allreduce_fusion]: 4.64998e-06 [matmul_add_comm_reduction]: 8.02008e-06 [allreduce_slice_to_reducescatter]: 2.89991e-07 [virtual_shard_identity]: 9.26002e-06 [virtual_dataset]: 7.65e-06 [get_grad_eliminate_]: 9.67004e-06 [virtual_output]: 7.40995e-06 [merge_forward]: 4.93007e-06 [cell_reuse_recompute_pass]: 1.77999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.573e-05 [before_grad]: 1.279e-05 [inplace_validation]: 4.31004e-06 [meta_fg_expand]: 4.82996e-06 [inplace_validation_after_expand]: 5.08002e-06 [flash_sp_send_recv_attached]: 9.89996e-07 [receive_attached]: 8.29925e-07 [after_resolve]: 9.77993e-06 [a_after_grad]: 1.191e-05 [special_op_eliminate]: 7.63999e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 1.00001e-06 [auto_monad_grad]: 1.25996e-06 [auto_monad_eliminator]: 1.815e-05 [cse]: 2.006e-05 [a_3]: 4.99301e-05 [py_interpret_to_execute_after_opt_a]: 9.30997e-06 [slice_cell_reuse_recomputed_activation]: 2.24996e-06 [rewriter_after_opt_a]: 0.00014229 [convert_after_rewriter]: 1.10801e-05 [order_py_execute_after_rewriter]: 5.73997e-06 [opt_b]: 0.00024824, [1] [Cycle 1]: 0.00024301, [7] [b_1]: 0.00016819 [b_2]: 9.78995e-06 [updatestate_depend_eliminate]: 5.62007e-06 [updatestate_assign_eliminate]: 4.53996e-06 [updatestate_loads_eliminate]: 5.37001e-06 [renormalize]: 3.00002e-07 [cse]: 1.94e-05 [optimize_parallel_all_gather_comm]: 8.71997e-06 [overlap_param_gather]: 1.81003e-06 [cconv]: 2.273e-05 [loop_unroll]: 0.00055535 [opt_after_cconv]: 0.00013738, [1] [Cycle 1]: 0.00013153, [7] [c_1]: 5.453e-05 [parameter_eliminate]: 2.65008e-06 [updatestate_depend_eliminate]: 8.25e-06 [updatestate_assign_eliminate]: 4.66011e-06 [updatestate_loads_eliminate]: 5.41995e-06 [cse]: 2.27001e-05 [renormalize]: 3.40049e-07 [remove_dup_value]: 1.29001e-05 [tuple_transform]: 7.00899e-05, [1] [Cycle 1]: 6.58e-05, [2] [d_1]: 5.672e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 1.86998e-06 [add_cache_embedding]: 1.285e-05 [add_recomputation]: 6.321e-05 [cse_after_recomputation]: 2.68701e-05, [1] [Cycle 1]: 2.226e-05, [1] [cse]: 1.711e-05 [environ_conv]: 7.88004e-06 [swap_dp_allreduce_reducescatter]: 7.31007e-06 [bias_add_comm_swap]: 2.23995e-06 [label_micro_interleaved_index]: 2.43005e-06 [label_fine_grained_interleaved_index]: 1.9701e-06 [merge_cast_opt]: 1.04005e-06 [slice_recompute_activation]: 2.01003e-06 [micro_interleaved_order_control]: 1.65997e-06 [assign_add_opt]: 2.93299e-05 [ForceFp32Comm]: 9.19914e-07 [remove_cast_before_assign_add]: 7.61996e-06 [full_micro_interleaved_order_control]: 2.25997e-06 [reorder_send_recv_between_fp_bp]: 1.97999e-06 [comm_op_add_attrs]: 2.552e-05 [add_comm_op_reuse_tag]: 1.82993e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 8.00006e-07 [overlap_opt_shard_in_pipeline]: 1.04995e-06 [overlap_opt_shard_grad_in_pipeline]: 2.05997e-06 [control_data_broadcast_order]: 1.14995e-06 [grouped_pairwise_exchange_alltoall]: 8.30996e-06 [offloading_packed_experts]: 1.97999e-06 [overlap_recompute_and_grad_model_parallel]: 1.90991e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.29925e-07 [overlap_recompute_allgather_and_fa_grad]: 6.835e-05 [overlap_grad_ring_attention]: 2.25997e-06 [overlap_grad_flash_sp]: 1.502e-05 [begin_end_overlap_inline]: 7.59959e-07 [split_matmul_comm_elemetwise]: 1.92004e-06 [split_layernorm_comm]: 1.81003e-06 [handle_group_info]: 4.65999e-06 [symbol_engine_optimizer]: 9.35299e-05, [1] [Cycle 1]: 8.846e-05, [6] [build]: 4.11004e-06 [elim_shapecalc]: 1.32399e-05 [elim_not_effective]: 1.794e-05 [opt_reshape]: 9.12999e-06 [fold_const_symbol]: 1.55e-05 [renormalize]: 3.89991e-07 [pipeline_parallel_scheduler]: 1.61992e-06 [auto_monad_reorder]: 2.98599e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 6.29923e-07 [eliminate_special_op_node]: 0.00052474 [distribtued_split]: 4.16799e-05 [validate]: 3.49501e-05 [task_emit]: 0.0706733 [execute]: 1.106e-05 Sums bootstrap : 0.000301s : 0.39% type_inference : 0.002484s : 3.19% auto_monad : 0.000125s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000567s : 0.73% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000230s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000018s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000028s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000451s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000110s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000142s : 0.18% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000168s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000555s : 0.71% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000008s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000026s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000068s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000016s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000525s : 0.67% distribtued_split : 0.000042s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.070673s : 90.85% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000127 63 4.48% : 0.000006s : 2: substitution.depend_value_elim 2.20% : 0.000003s : 5: substitution.elim_not_effective 2.25% : 0.000003s : 5: substitution.fold_const_symbol 5.84% : 0.000007s : 6: substitution.graph_param_transform 50.22% : 0.000064s : 1: substitution.inline 4.03% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.32% : 0.000004s : 6: substitution.load_eliminater 2.11% : 0.000003s : 2: substitution.reduce_all_const_elim 6.23% : 0.000008s : 10: substitution.remove_not_recompute_node 2.51% : 0.000003s : 2: substitution.replace_old_param 8.82% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.98% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002549 2 88.59% : 0.002258s : 1: type_inference.infer 11.41% : 0.000291s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000232 1420 0.87% : 0.000002s : 13: predicate.accumulaten_eliminater 1.11% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.85% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.18% : 0.000005s : 25: predicate.arithmetic_simplify 0.92% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.43% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.87% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.93% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.91% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000005s : 31: predicate.environ_get_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.88% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.22% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.14% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.48% : 0.000001s : 6: predicate.graph_param_transform 0.74% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.72% : 0.000013s : 63: predicate.inline 1.07% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.09% : 0.000003s : 12: predicate.less_batch_normalization 1.65% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.82% : 0.000002s : 12: predicate.merge_addn 0.94% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.78% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.57% : 0.000001s : 6: predicate.parallel_virtual_node 1.23% : 0.000003s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.83% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 1.16% : 0.000003s : 13: predicate.reduce_eliminate 0.59% : 0.000001s : 12: predicate.remove_not_recompute_node 1.09% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.91% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.07% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.02% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.58% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.27% : 0.000010s : 43: predicate.switch_simplify 0.72% : 0.000002s : 13: predicate.tile_eliminate 0.76% : 0.000002s : 13: predicate.transpose_eliminate 1.72% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.66% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.44% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.35% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.43% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.57% : 0.000001s : 6: predicate.value_based_eliminate 0.87% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.60% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000173 4 10.12% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.88% : 0.000156s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090789 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000070s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000149s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000346s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000018s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.58% : 0.000527s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000499s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001114s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.22% : 0.000199s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000051s : 4: opt.transform.symbol_engine_opt 6.03% : 0.005475s : 1: opt_a 0.15% : 0.000139s : 1: opt_after_cconv 0.32% : 0.000294s : 1: opt_b 8.16% : 0.007413s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.10% : 0.000092s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000245s : 1: renormalize.infer 0.23% : 0.000208s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000159s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000096s : 1: symbol_engine_optimizer 77.28% : 0.070165s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.86% : 0.002598s : 1: type_inference 0.08% : 0.000071s : 1: validate Time group info: ------[substitution.] 0.000136 63 4.73% : 0.000006s : 2: substitution.depend_value_elim 1.92% : 0.000003s : 5: substitution.elim_not_effective 2.32% : 0.000003s : 5: substitution.fold_const_symbol 5.29% : 0.000007s : 6: substitution.graph_param_transform 50.75% : 0.000069s : 1: substitution.inline 4.28% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.25% : 0.000004s : 6: substitution.load_eliminater 2.79% : 0.000004s : 2: substitution.reduce_all_const_elim 5.57% : 0.000008s : 10: substitution.remove_not_recompute_node 2.54% : 0.000003s : 2: substitution.replace_old_param 8.64% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.91% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002453 2 88.72% : 0.002176s : 1: type_inference.infer 11.28% : 0.000277s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000233 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.68% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.93% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000001s : 6: predicate.const_output_eliminate 0.39% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.36% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.61% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.66% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.62% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.51% : 0.000006s : 38: predicate.load_eliminater 1.48% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.98% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.40% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.80% : 0.000002s : 12: predicate.reduce_all_const_elim 1.16% : 0.000003s : 13: predicate.reduce_eliminate 0.67% : 0.000002s : 12: predicate.remove_not_recompute_node 1.08% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.22% : 0.000001s : 6: predicate.reset_defer_inline 1.05% : 0.000002s : 13: predicate.reshape_eliminate 0.97% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.55% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.42% : 0.000003s : 18: predicate.special_op_eliminate 0.90% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.29% : 0.000010s : 43: predicate.switch_simplify 0.74% : 0.000002s : 13: predicate.tile_eliminate 0.83% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.66% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.50% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.39% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.41% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.75% : 0.000002s : 12: predicate.virtual_output_eliminate 0.49% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000158 4 10.03% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.97% : 0.000142s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091140 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.15% : 0.000138s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000326s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000030s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.59% : 0.000540s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.62% : 0.000566s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001139s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000158s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000052s : 4: opt.transform.symbol_engine_opt 6.00% : 0.005464s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.28% : 0.000252s : 1: opt_b 8.10% : 0.007385s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000006s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000244s : 1: renormalize.infer 0.22% : 0.000200s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000148s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000096s : 1: symbol_engine_optimizer 77.58% : 0.070704s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.75% : 0.002502s : 1: type_inference 0.08% : 0.000069s : 1: validate TotalTime = 0.0823232, [21] [bootstrap]: 0.00030175 [type_inference]: 0.00248403 [auto_monad]: 0.00012648 [graph_reusing]: 2.30002e-06 [inline]: 1.16997e-06 [parallel-infer-symbol]: 1.94996e-06 [pre_auto_parallel]: 2.329e-05 [insert-virtual-dataset]: 2.44996e-06 [parallel-infer-symbol-second]: 4.59957e-07 [dataset_repeat_opt]: 1.23994e-06 [pipeline_split]: 1.41002e-06 [optimize]: 0.00727417, [52] [py_interpret_to_execute]: 1.57601e-05 [rewriter_before_opt_a]: 3.19e-05 [opt_a]: 0.00538242, [2] [Cycle 1]: 0.00156922, [43] [expand_dump_flag]: 3.06999e-06 [switch_simplify]: 2.915e-05 [loop_unroll]: 1.332e-05 [a_1]: 0.00034742 [recompute_prepare]: 8.80007e-06 [updatestate_depend_eliminate]: 9.08994e-06 [updatestate_assign_eliminate]: 5.94999e-06 [updatestate_loads_eliminate]: 7.37002e-06 [parameter_eliminate]: 3.09001e-06 [a_2]: 0.00011852 [accelerated_algorithm]: 8.95001e-06 [shard]: 2.06998e-06 [meta_shard_fg_expand]: 4.23996e-06 [shard_inline]: 8.84e-06 [auto_parallel]: 1.27499e-05 [parallel]: 6.88992e-06 [flash_sp]: 1.054e-05 [merge_comm]: 7.98993e-06 [allreduce_fusion]: 5.08002e-06 [matmul_add_comm_reduction]: 1.08799e-05 [allreduce_slice_to_reducescatter]: 4.4005e-07 [virtual_shard_identity]: 1.03399e-05 [virtual_dataset]: 8.42998e-06 [get_grad_eliminate_]: 7.93999e-06 [virtual_output]: 7.83999e-06 [merge_forward]: 5.86e-06 [cell_reuse_recompute_pass]: 1.64995e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.72501e-05 [before_grad]: 1.354e-05 [inplace_validation]: 5.18002e-06 [meta_fg_expand]: 5.53997e-06 [inplace_validation_after_expand]: 5.81006e-06 [flash_sp_send_recv_attached]: 4.37e-06 [receive_attached]: 2.88e-06 [after_resolve]: 1.13e-05 [a_after_grad]: 1.263e-05 [special_op_eliminate]: 8.25e-06 [renormalize]: 0.00045938 [add_forward_monad_depend]: 3.46999e-06 [auto_monad_grad]: 1.94006e-06 [auto_monad_eliminator]: 3.13499e-05 [cse]: 3.06e-05 [a_3]: 5.985e-05 [Cycle 2]: 0.00079333, [43] [expand_dump_flag]: 1.12003e-06 [switch_simplify]: 9.19995e-06 [loop_unroll]: 7.78993e-06 [a_1]: 0.00020642 [recompute_prepare]: 7.28003e-06 [updatestate_depend_eliminate]: 5.69993e-06 [updatestate_assign_eliminate]: 5.10993e-06 [updatestate_loads_eliminate]: 5.46011e-06 [parameter_eliminate]: 1.32993e-06 [a_2]: 0.00010605 [accelerated_algorithm]: 8.47003e-06 [shard]: 1.13004e-06 [meta_shard_fg_expand]: 2.70002e-06 [shard_inline]: 7.89994e-06 [auto_parallel]: 1.11e-05 [parallel]: 3.54997e-06 [flash_sp]: 3.18e-06 [merge_comm]: 5.94999e-06 [allreduce_fusion]: 4.67e-06 [matmul_add_comm_reduction]: 8.03999e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 9.00996e-06 [virtual_dataset]: 7.50995e-06 [get_grad_eliminate_]: 7.03998e-06 [virtual_output]: 7.67002e-06 [merge_forward]: 4.61005e-06 [cell_reuse_recompute_pass]: 1.81003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.534e-05 [before_grad]: 1.232e-05 [inplace_validation]: 4.33996e-06 [meta_fg_expand]: 4.95999e-06 [inplace_validation_after_expand]: 5.29992e-06 [flash_sp_send_recv_attached]: 8.89995e-07 [receive_attached]: 6.20028e-07 [after_resolve]: 9.39006e-06 [a_after_grad]: 1.186e-05 [special_op_eliminate]: 7.65e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.30042e-07 [auto_monad_grad]: 1.16008e-06 [auto_monad_eliminator]: 1.815e-05 [cse]: 2.105e-05 [a_3]: 4.879e-05 [py_interpret_to_execute_after_opt_a]: 9.84001e-06 [slice_cell_reuse_recomputed_activation]: 2.17999e-06 [rewriter_after_opt_a]: 0.00014082 [convert_after_rewriter]: 1.08801e-05 [order_py_execute_after_rewriter]: 5.83008e-06 [opt_b]: 0.00024563, [1] [Cycle 1]: 0.00024009, [7] [b_1]: 0.00016391 [b_2]: 9.29995e-06 [updatestate_depend_eliminate]: 5.25999e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.49003e-06 [renormalize]: 2.10013e-07 [cse]: 1.951e-05 [optimize_parallel_all_gather_comm]: 8.2599e-06 [overlap_param_gather]: 1.16008e-06 [cconv]: 2.19099e-05 [loop_unroll]: 0.00053091 [opt_after_cconv]: 0.00014004, [1] [Cycle 1]: 0.00013382, [7] [c_1]: 5.466e-05 [parameter_eliminate]: 2.45997e-06 [updatestate_depend_eliminate]: 8.64e-06 [updatestate_assign_eliminate]: 5.14998e-06 [updatestate_loads_eliminate]: 5.23997e-06 [cse]: 2.22101e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.431e-05 [tuple_transform]: 6.95901e-05, [1] [Cycle 1]: 6.52199e-05, [2] [d_1]: 5.565e-05 [renormalize]: 1.79978e-07 [partial_unused_args_eliminate]: 2.10002e-06 [add_cache_embedding]: 1.383e-05 [add_recomputation]: 6.173e-05 [cse_after_recomputation]: 2.71499e-05, [1] [Cycle 1]: 2.23099e-05, [1] [cse]: 1.707e-05 [environ_conv]: 7.97003e-06 [swap_dp_allreduce_reducescatter]: 7.49005e-06 [bias_add_comm_swap]: 2.06998e-06 [label_micro_interleaved_index]: 1.91992e-06 [label_fine_grained_interleaved_index]: 1.93994e-06 [merge_cast_opt]: 1.09e-06 [slice_recompute_activation]: 1.73994e-06 [micro_interleaved_order_control]: 1.95007e-06 [assign_add_opt]: 2.91599e-05 [ForceFp32Comm]: 8.70088e-07 [remove_cast_before_assign_add]: 7.27002e-06 [full_micro_interleaved_order_control]: 1.85997e-06 [reorder_send_recv_between_fp_bp]: 1.93005e-06 [comm_op_add_attrs]: 2.66901e-05 [add_comm_op_reuse_tag]: 1.73005e-06 [interleave_split_concat_branches]: 6.9011e-07 [interleave_parallel_branches]: 6.6997e-07 [overlap_opt_shard_in_pipeline]: 1.00001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.22994e-06 [control_data_broadcast_order]: 9.89996e-07 [grouped_pairwise_exchange_alltoall]: 9.01998e-06 [offloading_packed_experts]: 1.86998e-06 [overlap_recompute_and_grad_model_parallel]: 1.59e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.5996e-07 [overlap_recompute_allgather_and_fa_grad]: 7.062e-05 [overlap_grad_ring_attention]: 1.96998e-06 [overlap_grad_flash_sp]: 1.429e-05 [begin_end_overlap_inline]: 7.3004e-07 [split_matmul_comm_elemetwise]: 1.80001e-06 [split_layernorm_comm]: 2.11003e-06 [handle_group_info]: 4.11994e-06 [symbol_engine_optimizer]: 9.214e-05, [1] [Cycle 1]: 8.71499e-05, [6] [build]: 4.49002e-06 [elim_shapecalc]: 1.31e-05 [elim_not_effective]: 1.757e-05 [opt_reshape]: 8.77993e-06 [fold_const_symbol]: 1.409e-05 [renormalize]: 2.89991e-07 [pipeline_parallel_scheduler]: 1.45007e-06 [auto_monad_reorder]: 2.999e-05 [get_jit_bprop_graph]: 4.49945e-07 [rewriter_after_jit_bprop_graph]: 4.30038e-07 [eliminate_special_op_node]: 0.00051566 [distribtued_split]: 3.952e-05 [validate]: 3.58099e-05 [task_emit]: 0.0712271 [execute]: 1.122e-05 Sums bootstrap : 0.000302s : 0.39% type_inference : 0.002484s : 3.17% auto_monad : 0.000126s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000023s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000554s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000459s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000141s : 0.18% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000531s : 0.68% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000071s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000004s : 0.01% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000516s : 0.66% distribtued_split : 0.000040s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.071227s : 90.99% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000131 63 4.88% : 0.000006s : 2: substitution.depend_value_elim 2.21% : 0.000003s : 5: substitution.elim_not_effective 1.89% : 0.000002s : 5: substitution.fold_const_symbol 5.21% : 0.000007s : 6: substitution.graph_param_transform 50.44% : 0.000066s : 1: substitution.inline 4.04% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.33% : 0.000004s : 6: substitution.load_eliminater 2.55% : 0.000003s : 2: substitution.reduce_all_const_elim 6.17% : 0.000008s : 10: substitution.remove_not_recompute_node 2.59% : 0.000003s : 2: substitution.replace_old_param 8.75% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.94% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002455 2 88.54% : 0.002174s : 1: type_inference.infer 11.46% : 0.000281s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000234 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.17% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.96% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.14% : 0.000005s : 25: predicate.arithmetic_simplify 0.79% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.42% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.84% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.10% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 2.01% : 0.000005s : 31: predicate.environ_get_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.78% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.22% : 0.000003s : 14: predicate.float_depend_g_call 0.78% : 0.000002s : 12: predicate.float_environ_get_switch 1.14% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.38% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.52% : 0.000013s : 63: predicate.inline 1.00% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.76% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.51% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.84% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.85% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.75% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.64% : 0.000001s : 6: predicate.parallel_virtual_node 1.17% : 0.000003s : 14: predicate.partial_defer_inline 1.17% : 0.000003s : 19: predicate.partial_eliminate 0.75% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.02% : 0.000002s : 13: predicate.reduce_eliminate 0.64% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.10% : 0.000003s : 12: predicate.same_eliminate 0.45% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.97% : 0.000002s : 12: predicate.shard_identity_eliminate 1.46% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000006s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.42% : 0.000010s : 43: predicate.switch_simplify 0.82% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.61% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.59% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.76% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.28% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000167 4 9.06% : 0.000015s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.94% : 0.000152s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091449 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.15% : 0.000140s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000325s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.03% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.58% : 0.000529s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.59% : 0.000541s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001112s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 5.89% : 0.005386s : 1: opt_a 0.16% : 0.000145s : 1: opt_after_cconv 0.27% : 0.000249s : 1: opt_b 7.96% : 0.007283s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000076s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.27% : 0.000246s : 1: renormalize.infer 0.23% : 0.000206s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000147s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000006s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000095s : 1: symbol_engine_optimizer 77.92% : 0.071256s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.74% : 0.002502s : 1: type_inference 0.08% : 0.000069s : 1: validate TotalTime = 0.0832218, [21] [bootstrap]: 0.00031841 [type_inference]: 0.00263759 [auto_monad]: 0.00014891 [graph_reusing]: 2.71003e-06 [inline]: 1.43005e-06 [parallel-infer-symbol]: 2.35997e-06 [pre_auto_parallel]: 2.672e-05 [insert-virtual-dataset]: 3.25998e-06 [parallel-infer-symbol-second]: 3.29921e-07 [dataset_repeat_opt]: 1.35996e-06 [pipeline_split]: 1.57999e-06 [optimize]: 0.00758287, [52] [py_interpret_to_execute]: 1.716e-05 [rewriter_before_opt_a]: 3.677e-05 [opt_a]: 0.00561167, [2] [Cycle 1]: 0.00163326, [43] [expand_dump_flag]: 4.14008e-06 [switch_simplify]: 3.209e-05 [loop_unroll]: 1.39601e-05 [a_1]: 0.00036425 [recompute_prepare]: 9.04e-06 [updatestate_depend_eliminate]: 9.06002e-06 [updatestate_assign_eliminate]: 6.32997e-06 [updatestate_loads_eliminate]: 7.87992e-06 [parameter_eliminate]: 3.52005e-06 [a_2]: 0.00012291 [accelerated_algorithm]: 9.31008e-06 [shard]: 2.06998e-06 [meta_shard_fg_expand]: 4.15999e-06 [shard_inline]: 8.67003e-06 [auto_parallel]: 1.20699e-05 [parallel]: 7.95e-06 [flash_sp]: 1.242e-05 [merge_comm]: 8.31007e-06 [allreduce_fusion]: 5.19003e-06 [matmul_add_comm_reduction]: 1.15499e-05 [allreduce_slice_to_reducescatter]: 5.10016e-07 [virtual_shard_identity]: 1.00599e-05 [virtual_dataset]: 8.17992e-06 [get_grad_eliminate_]: 7.99994e-06 [virtual_output]: 8.11997e-06 [merge_forward]: 6.49993e-06 [cell_reuse_recompute_pass]: 1.81003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.767e-05 [before_grad]: 1.398e-05 [inplace_validation]: 5.42006e-06 [meta_fg_expand]: 5.80004e-06 [inplace_validation_after_expand]: 6.53998e-06 [flash_sp_send_recv_attached]: 4.90993e-06 [receive_attached]: 2.72004e-06 [after_resolve]: 1.192e-05 [a_after_grad]: 1.31499e-05 [special_op_eliminate]: 8.05e-06 [renormalize]: 0.00047906 [add_forward_monad_depend]: 3.80992e-06 [auto_monad_grad]: 2.01003e-06 [auto_monad_eliminator]: 3.528e-05 [cse]: 3.55e-05 [a_3]: 6.06601e-05 [Cycle 2]: 0.00080957, [43] [expand_dump_flag]: 1.20001e-06 [switch_simplify]: 9.4201e-06 [loop_unroll]: 8.10996e-06 [a_1]: 0.00021253 [recompute_prepare]: 7.96001e-06 [updatestate_depend_eliminate]: 6.04999e-06 [updatestate_assign_eliminate]: 5.24998e-06 [updatestate_loads_eliminate]: 5.37001e-06 [parameter_eliminate]: 1.33005e-06 [a_2]: 0.00010925 [accelerated_algorithm]: 8.7599e-06 [shard]: 1.09e-06 [meta_shard_fg_expand]: 2.7999e-06 [shard_inline]: 8.15e-06 [auto_parallel]: 1.111e-05 [parallel]: 3.41993e-06 [flash_sp]: 3.55998e-06 [merge_comm]: 5.59003e-06 [allreduce_fusion]: 4.61994e-06 [matmul_add_comm_reduction]: 7.89994e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 9.68005e-06 [virtual_dataset]: 7.82008e-06 [get_grad_eliminate_]: 7.68004e-06 [virtual_output]: 7.99994e-06 [merge_forward]: 4.70004e-06 [cell_reuse_recompute_pass]: 2.03995e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.642e-05 [before_grad]: 1.289e-05 [inplace_validation]: 4.44998e-06 [meta_fg_expand]: 4.78001e-06 [inplace_validation_after_expand]: 5.93998e-06 [flash_sp_send_recv_attached]: 9.30042e-07 [receive_attached]: 6.6997e-07 [after_resolve]: 9.65991e-06 [a_after_grad]: 1.533e-05 [special_op_eliminate]: 7.80995e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 9.00007e-07 [auto_monad_grad]: 1.43005e-06 [auto_monad_eliminator]: 1.95201e-05 [cse]: 1.963e-05 [a_3]: 5.00401e-05 [py_interpret_to_execute_after_opt_a]: 1.00899e-05 [slice_cell_reuse_recomputed_activation]: 2.30991e-06 [rewriter_after_opt_a]: 0.0002001 [convert_after_rewriter]: 1.246e-05 [order_py_execute_after_rewriter]: 6.39004e-06 [opt_b]: 0.00025716, [1] [Cycle 1]: 0.00025166, [7] [b_1]: 0.0001715 [b_2]: 1.022e-05 [updatestate_depend_eliminate]: 5.68002e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.72996e-06 [renormalize]: 2.60072e-07 [cse]: 1.983e-05 [optimize_parallel_all_gather_comm]: 8.65001e-06 [overlap_param_gather]: 1.04005e-06 [cconv]: 2.53301e-05 [loop_unroll]: 0.00050551 [opt_after_cconv]: 0.00013836, [1] [Cycle 1]: 0.00013162, [7] [c_1]: 5.554e-05 [parameter_eliminate]: 2.43995e-06 [updatestate_depend_eliminate]: 8.46002e-06 [updatestate_assign_eliminate]: 5.01995e-06 [updatestate_loads_eliminate]: 5.50994e-06 [cse]: 2.235e-05 [renormalize]: 3.80096e-07 [remove_dup_value]: 1.486e-05 [tuple_transform]: 7.431e-05, [1] [Cycle 1]: 6.97799e-05, [2] [d_1]: 5.903e-05 [renormalize]: 3.69968e-07 [partial_unused_args_eliminate]: 2.52994e-06 [add_cache_embedding]: 1.52399e-05 [add_recomputation]: 6.61401e-05 [cse_after_recomputation]: 2.736e-05, [1] [Cycle 1]: 2.259e-05, [1] [cse]: 1.716e-05 [environ_conv]: 7.86001e-06 [swap_dp_allreduce_reducescatter]: 7.71007e-06 [bias_add_comm_swap]: 2.19001e-06 [label_micro_interleaved_index]: 2.06998e-06 [label_fine_grained_interleaved_index]: 2.51993e-06 [merge_cast_opt]: 1.21002e-06 [slice_recompute_activation]: 1.92993e-06 [micro_interleaved_order_control]: 1.95007e-06 [assign_add_opt]: 3.04701e-05 [ForceFp32Comm]: 9.89996e-07 [remove_cast_before_assign_add]: 8.19005e-06 [full_micro_interleaved_order_control]: 2.13005e-06 [reorder_send_recv_between_fp_bp]: 2.58e-06 [comm_op_add_attrs]: 2.745e-05 [add_comm_op_reuse_tag]: 2.10002e-06 [interleave_split_concat_branches]: 1.33994e-06 [interleave_parallel_branches]: 9.2003e-07 [overlap_opt_shard_in_pipeline]: 1.29e-06 [overlap_opt_shard_grad_in_pipeline]: 2.5999e-06 [control_data_broadcast_order]: 1.14995e-06 [grouped_pairwise_exchange_alltoall]: 9.96003e-06 [offloading_packed_experts]: 2.10002e-06 [overlap_recompute_and_grad_model_parallel]: 2.20991e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.29925e-07 [overlap_recompute_allgather_and_fa_grad]: 6.39299e-05 [overlap_grad_ring_attention]: 1.96998e-06 [overlap_grad_flash_sp]: 1.52601e-05 [begin_end_overlap_inline]: 7.69971e-07 [split_matmul_comm_elemetwise]: 2.05997e-06 [split_layernorm_comm]: 1.93005e-06 [handle_group_info]: 5.18002e-06 [symbol_engine_optimizer]: 9.484e-05, [1] [Cycle 1]: 8.93e-05, [6] [build]: 5.18002e-06 [elim_shapecalc]: 1.431e-05 [elim_not_effective]: 1.765e-05 [opt_reshape]: 9.04e-06 [fold_const_symbol]: 1.441e-05 [renormalize]: 3.30037e-07 [pipeline_parallel_scheduler]: 1.83005e-06 [auto_monad_reorder]: 3.355e-05 [get_jit_bprop_graph]: 5.10016e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.00052786 [distribtued_split]: 4.154e-05 [validate]: 3.78301e-05 [task_emit]: 0.071586 [execute]: 1.141e-05 Sums bootstrap : 0.000318s : 0.40% type_inference : 0.002638s : 3.34% auto_monad : 0.000149s : 0.19% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000042s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000577s : 0.73% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000232s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000028s : 0.04% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000479s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000055s : 0.07% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000111s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000200s : 0.25% optimize.convert_after_rewriter : 0.000012s : 0.02% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000172s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000506s : 0.64% optimize.opt_after_cconv.c_1 : 0.000056s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000059s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000066s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000008s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000064s : 0.08% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000034s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000528s : 0.67% distribtued_split : 0.000042s : 0.05% validate : 0.000038s : 0.05% task_emit : 0.071586s : 90.59% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000140 63 5.20% : 0.000007s : 2: substitution.depend_value_elim 2.22% : 0.000003s : 5: substitution.elim_not_effective 1.82% : 0.000003s : 5: substitution.fold_const_symbol 5.33% : 0.000007s : 6: substitution.graph_param_transform 51.37% : 0.000072s : 1: substitution.inline 3.77% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.23% : 0.000005s : 6: substitution.load_eliminater 2.52% : 0.000004s : 2: substitution.reduce_all_const_elim 6.32% : 0.000009s : 10: substitution.remove_not_recompute_node 2.41% : 0.000003s : 2: substitution.replace_old_param 8.12% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.70% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002606 2 87.87% : 0.002290s : 1: type_inference.infer 12.13% : 0.000316s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000071 1 100.00% : 0.000071s : 1: match.inline ------[predicate.] 0.000237 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.29% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.83% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.30% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.19% : 0.000000s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.56% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.83% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.61% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.84% : 0.000004s : 31: predicate.environ_get_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.37% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.73% : 0.000002s : 12: predicate.incorporate_call 0.65% : 0.000002s : 12: predicate.incorporate_call_switch 5.64% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.03% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.62% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.13% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.81% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.19% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.03% : 0.000002s : 13: predicate.reduce_eliminate 0.65% : 0.000002s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.79% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000003s : 12: predicate.same_eliminate 0.44% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.40% : 0.000003s : 18: predicate.special_op_eliminate 0.89% : 0.000002s : 12: predicate.specialize_transform 0.94% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.09% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.18% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.41% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.81% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.65% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.67% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.53% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.74% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.61% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.63% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000179 4 9.71% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.29% : 0.000162s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092735 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000071s : 1: add_recomputation 0.04% : 0.000035s : 1: assign_add_opt 0.18% : 0.000163s : 1: auto_monad 0.04% : 0.000040s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000346s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.03% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000017s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.59% : 0.000543s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000007s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000516s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001159s : 80: opt.transform.opt_a 0.06% : 0.000054s : 1: opt.transform.opt_after_cconv 0.17% : 0.000160s : 27: opt.transform.opt_b 0.06% : 0.000057s : 1: opt.transform.opt_trans_graph 0.04% : 0.000035s : 3: opt.transform.special_op_eliminate 0.06% : 0.000051s : 4: opt.transform.symbol_engine_opt 6.06% : 0.005616s : 1: opt_a 0.15% : 0.000142s : 1: opt_after_cconv 0.28% : 0.000260s : 1: opt_b 8.19% : 0.007592s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.07% : 0.000069s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000012s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.28% : 0.000259s : 1: renormalize.infer 0.23% : 0.000213s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.22% : 0.000206s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000098s : 1: symbol_engine_optimizer 77.23% : 0.071616s : 1: task_emit 0.08% : 0.000078s : 1: tuple_transform 2.86% : 0.002657s : 1: type_inference 0.08% : 0.000074s : 1: validate TotalTime = 0.0836691, [21] [bootstrap]: 0.00031308 [type_inference]: 0.00250165 [auto_monad]: 0.0001067 [graph_reusing]: 1.63994e-06 [inline]: 1.23004e-06 [parallel-infer-symbol]: 1.3801e-06 [pre_auto_parallel]: 2.074e-05 [insert-virtual-dataset]: 2.13005e-06 [parallel-infer-symbol-second]: 3.89991e-07 [dataset_repeat_opt]: 1.07998e-06 [pipeline_split]: 1.03994e-06 [optimize]: 0.00744796, [52] [py_interpret_to_execute]: 1.36701e-05 [rewriter_before_opt_a]: 3.069e-05 [opt_a]: 0.00561463, [2] [Cycle 1]: 0.00148478, [43] [expand_dump_flag]: 1.85997e-06 [switch_simplify]: 2.62101e-05 [loop_unroll]: 1.36701e-05 [a_1]: 0.00033747 [recompute_prepare]: 9.42999e-06 [updatestate_depend_eliminate]: 7.63999e-06 [updatestate_assign_eliminate]: 5.57001e-06 [updatestate_loads_eliminate]: 5.84009e-06 [parameter_eliminate]: 2.11003e-06 [a_2]: 0.00012037 [accelerated_algorithm]: 9.04e-06 [shard]: 1.36008e-06 [meta_shard_fg_expand]: 3.21004e-06 [shard_inline]: 9.05001e-06 [auto_parallel]: 1.116e-05 [parallel]: 5.61005e-06 [flash_sp]: 8.11007e-06 [merge_comm]: 7.17991e-06 [allreduce_fusion]: 4.88991e-06 [matmul_add_comm_reduction]: 8.76002e-06 [allreduce_slice_to_reducescatter]: 3.30037e-07 [virtual_shard_identity]: 1.004e-05 [virtual_dataset]: 8.06001e-06 [get_grad_eliminate_]: 8.43999e-06 [virtual_output]: 7.93999e-06 [merge_forward]: 5.46e-06 [cell_reuse_recompute_pass]: 1.42003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.69e-05 [before_grad]: 1.44e-05 [inplace_validation]: 4.71005e-06 [meta_fg_expand]: 5.29003e-06 [inplace_validation_after_expand]: 5.27001e-06 [flash_sp_send_recv_attached]: 3.26999e-06 [receive_attached]: 2.25997e-06 [after_resolve]: 1.08801e-05 [a_after_grad]: 1.362e-05 [special_op_eliminate]: 8.32998e-06 [renormalize]: 0.00042443 [add_forward_monad_depend]: 2.58e-06 [auto_monad_grad]: 1.73005e-06 [auto_monad_eliminator]: 2.317e-05 [cse]: 2.64699e-05 [a_3]: 6.018e-05 [Cycle 2]: 0.00080197, [43] [expand_dump_flag]: 8.00006e-07 [switch_simplify]: 9.80997e-06 [loop_unroll]: 8.06001e-06 [a_1]: 0.00020828 [recompute_prepare]: 8.15e-06 [updatestate_depend_eliminate]: 5.63008e-06 [updatestate_assign_eliminate]: 4.82996e-06 [updatestate_loads_eliminate]: 4.87e-06 [parameter_eliminate]: 1.07998e-06 [a_2]: 0.0001105 [accelerated_algorithm]: 9.19006e-06 [shard]: 1.05996e-06 [meta_shard_fg_expand]: 2.75997e-06 [shard_inline]: 8.27003e-06 [auto_parallel]: 9.71009e-06 [parallel]: 3.21004e-06 [flash_sp]: 2.25008e-06 [merge_comm]: 5.68002e-06 [allreduce_fusion]: 4.70004e-06 [matmul_add_comm_reduction]: 6.97002e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 9.74e-06 [virtual_dataset]: 7.96001e-06 [get_grad_eliminate_]: 7.61996e-06 [virtual_output]: 7.81007e-06 [merge_forward]: 4.05998e-06 [cell_reuse_recompute_pass]: 1.62004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.621e-05 [before_grad]: 1.301e-05 [inplace_validation]: 4.15999e-06 [meta_fg_expand]: 5.08991e-06 [inplace_validation_after_expand]: 5.19003e-06 [flash_sp_send_recv_attached]: 9.10019e-07 [receive_attached]: 7.69971e-07 [after_resolve]: 1.301e-05 [a_after_grad]: 1.208e-05 [special_op_eliminate]: 7.86001e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 8.30041e-07 [auto_monad_grad]: 1.01002e-06 [auto_monad_eliminator]: 1.68e-05 [cse]: 1.904e-05 [a_3]: 5.086e-05 [py_interpret_to_execute_after_opt_a]: 8.86002e-06 [slice_cell_reuse_recomputed_activation]: 2.16998e-06 [rewriter_after_opt_a]: 0.00012605 [convert_after_rewriter]: 7.86001e-06 [order_py_execute_after_rewriter]: 5.22006e-06 [opt_b]: 0.00029089, [1] [Cycle 1]: 0.00028594, [7] [b_1]: 0.00020657 [b_2]: 1.09101e-05 [updatestate_depend_eliminate]: 5.34998e-06 [updatestate_assign_eliminate]: 4.81994e-06 [updatestate_loads_eliminate]: 5.04998e-06 [renormalize]: 2.79979e-07 [cse]: 1.937e-05 [optimize_parallel_all_gather_comm]: 7.55e-06 [overlap_param_gather]: 1.04995e-06 [cconv]: 1.54499e-05 [loop_unroll]: 0.00049268 [opt_after_cconv]: 0.00013163, [1] [Cycle 1]: 0.00012581, [7] [c_1]: 5.308e-05 [parameter_eliminate]: 2.11992e-06 [updatestate_depend_eliminate]: 7.30995e-06 [updatestate_assign_eliminate]: 4.69002e-06 [updatestate_loads_eliminate]: 5.11005e-06 [cse]: 2.07899e-05 [renormalize]: 3.20026e-07 [remove_dup_value]: 1.01901e-05 [tuple_transform]: 7.068e-05, [1] [Cycle 1]: 6.627e-05, [2] [d_1]: 5.685e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.50001e-06 [add_cache_embedding]: 1.14e-05 [add_recomputation]: 5.56801e-05 [cse_after_recomputation]: 2.695e-05, [1] [Cycle 1]: 2.232e-05, [1] [cse]: 1.661e-05 [environ_conv]: 6.63998e-06 [swap_dp_allreduce_reducescatter]: 7.09994e-06 [bias_add_comm_swap]: 1.46008e-06 [label_micro_interleaved_index]: 1.32003e-06 [label_fine_grained_interleaved_index]: 1.04995e-06 [merge_cast_opt]: 7.20029e-07 [slice_recompute_activation]: 1.01002e-06 [micro_interleaved_order_control]: 1.72993e-06 [assign_add_opt]: 2.493e-05 [ForceFp32Comm]: 9.89996e-07 [remove_cast_before_assign_add]: 5.83008e-06 [full_micro_interleaved_order_control]: 1.41002e-06 [reorder_send_recv_between_fp_bp]: 1.07998e-06 [comm_op_add_attrs]: 2.236e-05 [add_comm_op_reuse_tag]: 1.57009e-06 [interleave_split_concat_branches]: 1.02993e-06 [interleave_parallel_branches]: 4.89992e-07 [overlap_opt_shard_in_pipeline]: 1.09e-06 [overlap_opt_shard_grad_in_pipeline]: 1.64006e-06 [control_data_broadcast_order]: 1.02003e-06 [grouped_pairwise_exchange_alltoall]: 6.27001e-06 [offloading_packed_experts]: 1.23994e-06 [overlap_recompute_and_grad_model_parallel]: 1.06997e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.00004e-07 [overlap_recompute_allgather_and_fa_grad]: 7.51299e-05 [overlap_grad_ring_attention]: 1.21002e-06 [overlap_grad_flash_sp]: 1.162e-05 [begin_end_overlap_inline]: 5.10016e-07 [split_matmul_comm_elemetwise]: 1.66998e-06 [split_layernorm_comm]: 1.09e-06 [handle_group_info]: 3.66999e-06 [symbol_engine_optimizer]: 9.476e-05, [1] [Cycle 1]: 8.985e-05, [6] [build]: 4.94998e-06 [elim_shapecalc]: 1.41e-05 [elim_not_effective]: 1.72299e-05 [opt_reshape]: 9.98995e-06 [fold_const_symbol]: 1.478e-05 [renormalize]: 2.40048e-07 [pipeline_parallel_scheduler]: 9.00007e-07 [auto_monad_reorder]: 2.432e-05 [get_jit_bprop_graph]: 3.10014e-07 [rewriter_after_jit_bprop_graph]: 3.20026e-07 [eliminate_special_op_node]: 0.00051364 [distribtued_split]: 3.386e-05 [validate]: 3.11701e-05 [task_emit]: 0.0724264 [execute]: 8.40006e-06 Sums bootstrap : 0.000313s : 0.39% type_inference : 0.002502s : 3.15% auto_monad : 0.000107s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000031s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000546s : 0.69% optimize.opt_a.recompute_prepare : 0.000018s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000231s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000024s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000424s : 0.54% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000046s : 0.06% optimize.opt_a.a_3 : 0.000111s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000126s : 0.16% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000207s : 0.26% optimize.opt_b.b_2 : 0.000011s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000493s : 0.62% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000056s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000025s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000022s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000075s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.01% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000004s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000514s : 0.65% distribtued_split : 0.000034s : 0.04% validate : 0.000031s : 0.04% task_emit : 0.072426s : 91.29% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000116 63 4.37% : 0.000005s : 2: substitution.depend_value_elim 2.20% : 0.000003s : 5: substitution.elim_not_effective 1.86% : 0.000002s : 5: substitution.fold_const_symbol 5.80% : 0.000007s : 6: substitution.graph_param_transform 49.11% : 0.000057s : 1: substitution.inline 4.68% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.49% : 0.000004s : 6: substitution.load_eliminater 2.17% : 0.000003s : 2: substitution.reduce_all_const_elim 6.89% : 0.000008s : 10: substitution.remove_not_recompute_node 2.30% : 0.000003s : 2: substitution.replace_old_param 9.05% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.08% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002478 2 89.79% : 0.002225s : 1: type_inference.infer 10.21% : 0.000253s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000056 1 100.00% : 0.000056s : 1: match.inline ------[predicate.] 0.000236 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.12% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.77% : 0.000002s : 12: predicate.addn_check_dump 0.93% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.29% : 0.000005s : 25: predicate.arithmetic_simplify 0.89% : 0.000002s : 13: predicate.cast_eliminate 0.88% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.52% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.18% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.88% : 0.000002s : 12: predicate.depend_value_elim 0.80% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.13% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000005s : 31: predicate.environ_get_eliminate 1.06% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.86% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.18% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.92% : 0.000002s : 12: predicate.get_grad_eliminate 0.25% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.39% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.14% : 0.000003s : 12: predicate.less_batch_normalization 1.82% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.44% : 0.000006s : 38: predicate.load_eliminater 1.35% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.74% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.88% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.83% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.72% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.53% : 0.000001s : 6: predicate.opt_reshape 0.58% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.75% : 0.000002s : 13: predicate.print_const_string_wrapper 0.98% : 0.000002s : 12: predicate.reduce_all_const_elim 1.17% : 0.000003s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.03% : 0.000002s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.91% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.02% : 0.000002s : 12: predicate.same_eliminate 0.44% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.60% : 0.000004s : 18: predicate.special_op_eliminate 1.00% : 0.000002s : 12: predicate.specialize_transform 0.97% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.76% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.18% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.88% : 0.000002s : 13: predicate.transpose_eliminate 1.87% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.63% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.62% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.55% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.51% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.63% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000152 4 6.95% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.05% : 0.000142s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092947 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.06% : 0.000060s : 1: add_recomputation 0.03% : 0.000029s : 1: assign_add_opt 0.13% : 0.000119s : 1: auto_monad 0.03% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.36% : 0.000336s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.03% : 0.000027s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000041s : 1: distribtued_split 0.57% : 0.000526s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000009s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000502s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001121s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000160s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000051s : 4: opt.transform.symbol_engine_opt 6.04% : 0.005618s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.32% : 0.000294s : 1: opt_b 8.02% : 0.007456s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.09% : 0.000081s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000223s : 1: renormalize.infer 0.21% : 0.000196s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000132s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000098s : 1: symbol_engine_optimizer 77.95% : 0.072452s : 1: task_emit 0.08% : 0.000075s : 1: tuple_transform 2.71% : 0.002518s : 1: type_inference 0.07% : 0.000063s : 1: validate TotalTime = 0.084095, [21] [bootstrap]: 0.00034041 [type_inference]: 0.00269574 [auto_monad]: 0.00013491 [graph_reusing]: 3.04997e-06 [inline]: 1.54995e-06 [parallel-infer-symbol]: 2.54007e-06 [pre_auto_parallel]: 2.49e-05 [insert-virtual-dataset]: 2.43995e-06 [parallel-infer-symbol-second]: 5.49946e-07 [dataset_repeat_opt]: 1.61002e-06 [pipeline_split]: 2.21003e-06 [optimize]: 0.00767409, [52] [py_interpret_to_execute]: 1.633e-05 [rewriter_before_opt_a]: 3.504e-05 [opt_a]: 0.00575939, [2] [Cycle 1]: 0.00158639, [43] [expand_dump_flag]: 3.75998e-06 [switch_simplify]: 3.096e-05 [loop_unroll]: 1.335e-05 [a_1]: 0.00035212 [recompute_prepare]: 9.01008e-06 [updatestate_depend_eliminate]: 8.64e-06 [updatestate_assign_eliminate]: 6.49004e-06 [updatestate_loads_eliminate]: 8.10006e-06 [parameter_eliminate]: 3.74008e-06 [a_2]: 0.00011836 [accelerated_algorithm]: 8.91997e-06 [shard]: 2.23005e-06 [meta_shard_fg_expand]: 3.83006e-06 [shard_inline]: 8.89006e-06 [auto_parallel]: 1.206e-05 [parallel]: 7.88993e-06 [flash_sp]: 9.85991e-06 [merge_comm]: 8.12009e-06 [allreduce_fusion]: 4.99003e-06 [matmul_add_comm_reduction]: 1.136e-05 [allreduce_slice_to_reducescatter]: 4.49945e-07 [virtual_shard_identity]: 9.99996e-06 [virtual_dataset]: 8.2599e-06 [get_grad_eliminate_]: 8.06001e-06 [virtual_output]: 8.00996e-06 [merge_forward]: 5.59993e-06 [cell_reuse_recompute_pass]: 1.94006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.77e-05 [before_grad]: 1.37901e-05 [inplace_validation]: 5.10004e-06 [meta_fg_expand]: 5.66e-06 [inplace_validation_after_expand]: 5.93998e-06 [flash_sp_send_recv_attached]: 4.87e-06 [receive_attached]: 3.08e-06 [after_resolve]: 1.171e-05 [a_after_grad]: 1.23699e-05 [special_op_eliminate]: 7.86001e-06 [renormalize]: 0.00046221 [add_forward_monad_depend]: 3.53996e-06 [auto_monad_grad]: 2.23995e-06 [auto_monad_eliminator]: 3.30399e-05 [cse]: 3.37999e-05 [a_3]: 5.924e-05 [Cycle 2]: 0.00083807, [43] [expand_dump_flag]: 1.11002e-06 [switch_simplify]: 9.56992e-06 [loop_unroll]: 7.86001e-06 [a_1]: 0.00020635 [recompute_prepare]: 7.67002e-06 [updatestate_depend_eliminate]: 5.90994e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.30004e-06 [parameter_eliminate]: 1.34995e-06 [a_2]: 0.0001067 [accelerated_algorithm]: 8.49005e-06 [shard]: 1.39e-06 [meta_shard_fg_expand]: 2.65997e-06 [shard_inline]: 8.01007e-06 [auto_parallel]: 1.101e-05 [parallel]: 3.71994e-06 [flash_sp]: 3.40003e-06 [merge_comm]: 5.7799e-06 [allreduce_fusion]: 4.90004e-06 [matmul_add_comm_reduction]: 7.96001e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 9.69006e-06 [virtual_dataset]: 7.36001e-06 [get_grad_eliminate_]: 7.16001e-06 [virtual_output]: 7.71007e-06 [merge_forward]: 4.44998e-06 [cell_reuse_recompute_pass]: 1.90001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.56e-05 [before_grad]: 1.254e-05 [inplace_validation]: 4.22995e-06 [meta_fg_expand]: 4.96011e-06 [inplace_validation_after_expand]: 8.37003e-06 [flash_sp_send_recv_attached]: 9.40054e-07 [receive_attached]: 8.30041e-07 [after_resolve]: 9.81009e-06 [a_after_grad]: 1.195e-05 [special_op_eliminate]: 7.23009e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.04005e-06 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 1.946e-05 [cse]: 2.601e-05 [a_3]: 5.47001e-05 [py_interpret_to_execute_after_opt_a]: 9.29995e-06 [slice_cell_reuse_recomputed_activation]: 2.07999e-06 [rewriter_after_opt_a]: 0.00015015 [convert_after_rewriter]: 1.222e-05 [order_py_execute_after_rewriter]: 6.09003e-06 [opt_b]: 0.00024637, [1] [Cycle 1]: 0.00024127, [7] [b_1]: 0.00016456 [b_2]: 9.95002e-06 [updatestate_depend_eliminate]: 5.57001e-06 [updatestate_assign_eliminate]: 4.45999e-06 [updatestate_loads_eliminate]: 5.35999e-06 [renormalize]: 2.10013e-07 [cse]: 1.996e-05 [optimize_parallel_all_gather_comm]: 8.51997e-06 [overlap_param_gather]: 1.45996e-06 [cconv]: 2.346e-05 [loop_unroll]: 0.00051932 [opt_after_cconv]: 0.00013756, [1] [Cycle 1]: 0.00013117, [7] [c_1]: 5.434e-05 [parameter_eliminate]: 2.35997e-06 [updatestate_depend_eliminate]: 8.06001e-06 [updatestate_assign_eliminate]: 4.74998e-06 [updatestate_loads_eliminate]: 5.41995e-06 [cse]: 2.271e-05 [renormalize]: 4.7998e-07 [remove_dup_value]: 1.42801e-05 [tuple_transform]: 7.117e-05, [1] [Cycle 1]: 6.688e-05, [2] [d_1]: 5.705e-05 [renormalize]: 2.00002e-07 [partial_unused_args_eliminate]: 2.07999e-06 [add_cache_embedding]: 1.389e-05 [add_recomputation]: 6.41401e-05 [cse_after_recomputation]: 2.78801e-05, [1] [Cycle 1]: 2.284e-05, [1] [cse]: 1.75e-05 [environ_conv]: 7.49994e-06 [swap_dp_allreduce_reducescatter]: 7.59005e-06 [bias_add_comm_swap]: 2.51993e-06 [label_micro_interleaved_index]: 1.77999e-06 [label_fine_grained_interleaved_index]: 1.84996e-06 [merge_cast_opt]: 1.24006e-06 [slice_recompute_activation]: 1.83994e-06 [micro_interleaved_order_control]: 1.79e-06 [assign_add_opt]: 3.023e-05 [ForceFp32Comm]: 9.19914e-07 [remove_cast_before_assign_add]: 6.68992e-06 [full_micro_interleaved_order_control]: 2.2999e-06 [reorder_send_recv_between_fp_bp]: 2.21003e-06 [comm_op_add_attrs]: 2.885e-05 [add_comm_op_reuse_tag]: 2.20002e-06 [interleave_split_concat_branches]: 8.79983e-07 [interleave_parallel_branches]: 7.40052e-07 [overlap_opt_shard_in_pipeline]: 1.05007e-06 [overlap_opt_shard_grad_in_pipeline]: 2.51003e-06 [control_data_broadcast_order]: 1.14006e-06 [grouped_pairwise_exchange_alltoall]: 9.51998e-06 [offloading_packed_experts]: 2.46009e-06 [overlap_recompute_and_grad_model_parallel]: 2.17999e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.10017e-07 [overlap_recompute_allgather_and_fa_grad]: 7.582e-05 [overlap_grad_ring_attention]: 2.17999e-06 [overlap_grad_flash_sp]: 1.571e-05 [begin_end_overlap_inline]: 8.30041e-07 [split_matmul_comm_elemetwise]: 1.77009e-06 [split_layernorm_comm]: 2.14006e-06 [handle_group_info]: 4.81994e-06 [symbol_engine_optimizer]: 9.38999e-05, [1] [Cycle 1]: 8.894e-05, [6] [build]: 5.74999e-06 [elim_shapecalc]: 1.38801e-05 [elim_not_effective]: 1.827e-05 [opt_reshape]: 9.04e-06 [fold_const_symbol]: 1.39801e-05 [renormalize]: 4.50062e-07 [pipeline_parallel_scheduler]: 1.52003e-06 [auto_monad_reorder]: 3.16999e-05 [get_jit_bprop_graph]: 5.20027e-07 [rewriter_after_jit_bprop_graph]: 8.10018e-07 [eliminate_special_op_node]: 0.00053484 [distribtued_split]: 4.235e-05 [validate]: 3.567e-05 [task_emit]: 0.0723103 [execute]: 1.113e-05 Sums bootstrap : 0.000340s : 0.43% type_inference : 0.002696s : 3.38% auto_monad : 0.000135s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000558s : 0.70% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000014s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000462s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.07% optimize.opt_a.cse : 0.000060s : 0.08% optimize.opt_a.a_3 : 0.000114s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000150s : 0.19% optimize.convert_after_rewriter : 0.000012s : 0.02% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000519s : 0.65% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000029s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000076s : 0.10% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000006s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000535s : 0.67% distribtued_split : 0.000042s : 0.05% validate : 0.000036s : 0.04% task_emit : 0.072310s : 90.71% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000137 63 4.67% : 0.000006s : 2: substitution.depend_value_elim 2.28% : 0.000003s : 5: substitution.elim_not_effective 1.79% : 0.000002s : 5: substitution.fold_const_symbol 5.54% : 0.000008s : 6: substitution.graph_param_transform 50.59% : 0.000069s : 1: substitution.inline 4.17% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.34% : 0.000005s : 6: substitution.load_eliminater 2.78% : 0.000004s : 2: substitution.reduce_all_const_elim 5.90% : 0.000008s : 10: substitution.remove_not_recompute_node 2.86% : 0.000004s : 2: substitution.replace_old_param 8.22% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.85% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002665 2 89.09% : 0.002374s : 1: type_inference.infer 10.91% : 0.000291s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000234 1420 1.02% : 0.000002s : 13: predicate.accumulaten_eliminater 1.13% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.79% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.34% : 0.000005s : 25: predicate.arithmetic_simplify 0.88% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.29% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.78% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.28% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_depend_swap 1.92% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.19% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.25% : 0.000001s : 6: predicate.fold_const_symbol 0.86% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.58% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.15% : 0.000003s : 12: predicate.less_batch_normalization 1.81% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000006s : 38: predicate.load_eliminater 1.28% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.90% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.79% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.43% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000003s : 13: predicate.reduce_eliminate 0.67% : 0.000002s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.53% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.42% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.92% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.33% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.93% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.74% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.52% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.32% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.32% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.91% : 0.000002s : 12: predicate.virtual_output_eliminate 0.56% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000167 4 9.87% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.13% : 0.000151s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.093644 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000069s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.16% : 0.000148s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000364s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.04% : 0.000033s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000017s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.59% : 0.000549s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000529s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.20% : 0.001126s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.16% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.05% : 0.000051s : 4: opt.transform.symbol_engine_opt 6.15% : 0.005763s : 1: opt_a 0.15% : 0.000142s : 1: opt_after_cconv 0.27% : 0.000250s : 1: opt_b 8.20% : 0.007683s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.09% : 0.000081s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000252s : 1: renormalize.infer 0.22% : 0.000204s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000156s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000097s : 1: symbol_engine_optimizer 77.25% : 0.072340s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.90% : 0.002714s : 1: type_inference 0.07% : 0.000070s : 1: validate [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:29.685.037 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:29.685.521 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:29.685.556 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:29.685.677 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:29.685.682 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:29.685.679 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:29.685.861 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:29.686.305 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.0804836, [21] [bootstrap]: 0.00029333 [type_inference]: 0.00231996 [auto_monad]: 0.00010625 [graph_reusing]: 2.09e-06 [inline]: 1.16997e-06 [parallel-infer-symbol]: 1.45007e-06 [pre_auto_parallel]: 2.028e-05 [insert-virtual-dataset]: 2.22004e-06 [parallel-infer-symbol-second]: 5.10016e-07 [dataset_repeat_opt]: 9.19914e-07 [pipeline_split]: 1.01002e-06 [optimize]: 0.00710958, [52] [py_interpret_to_execute]: 1.364e-05 [rewriter_before_opt_a]: 3.181e-05 [opt_a]: 0.00534335, [2] [Cycle 1]: 0.0015459, [43] [expand_dump_flag]: 2.50002e-06 [switch_simplify]: 2.607e-05 [loop_unroll]: 1.352e-05 [a_1]: 0.0003393 [recompute_prepare]: 9.36002e-06 [updatestate_depend_eliminate]: 8.16002e-06 [updatestate_assign_eliminate]: 5.03007e-06 [updatestate_loads_eliminate]: 7.02997e-06 [parameter_eliminate]: 2.46009e-06 [a_2]: 0.00011553 [accelerated_algorithm]: 8.90996e-06 [shard]: 1.80001e-06 [meta_shard_fg_expand]: 3.15998e-06 [shard_inline]: 9.09006e-06 [auto_parallel]: 1.09599e-05 [parallel]: 5.66e-06 [flash_sp]: 8.56002e-06 [merge_comm]: 6.66e-06 [allreduce_fusion]: 5.02996e-06 [matmul_add_comm_reduction]: 9.75002e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 9.90997e-06 [virtual_dataset]: 8.40006e-06 [get_grad_eliminate_]: 7.92998e-06 [virtual_output]: 7.91997e-06 [merge_forward]: 5.31995e-06 [cell_reuse_recompute_pass]: 1.52003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.73299e-05 [before_grad]: 1.40601e-05 [inplace_validation]: 4.78001e-06 [meta_fg_expand]: 5.54009e-06 [inplace_validation_after_expand]: 5.72996e-06 [flash_sp_send_recv_attached]: 3.62995e-06 [receive_attached]: 1.77999e-06 [after_resolve]: 1.096e-05 [a_after_grad]: 1.382e-05 [special_op_eliminate]: 8.17992e-06 [renormalize]: 0.00042795 [add_forward_monad_depend]: 2.62004e-06 [auto_monad_grad]: 1.86008e-06 [auto_monad_eliminator]: 5.754e-05 [cse]: 4.05001e-05 [a_3]: 5.80701e-05 [Cycle 2]: 0.00080512, [43] [expand_dump_flag]: 8.69972e-07 [switch_simplify]: 9.47004e-06 [loop_unroll]: 8.11007e-06 [a_1]: 0.000209 [recompute_prepare]: 7.72998e-06 [updatestate_depend_eliminate]: 5.77001e-06 [updatestate_assign_eliminate]: 4.76011e-06 [updatestate_loads_eliminate]: 5.23997e-06 [parameter_eliminate]: 1.41002e-06 [a_2]: 0.00010959 [accelerated_algorithm]: 8.27992e-06 [shard]: 1.22993e-06 [meta_shard_fg_expand]: 2.83995e-06 [shard_inline]: 7.92998e-06 [auto_parallel]: 1.024e-05 [parallel]: 3.01003e-06 [flash_sp]: 2.6999e-06 [merge_comm]: 5.84009e-06 [allreduce_fusion]: 4.92996e-06 [matmul_add_comm_reduction]: 7.72998e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 8.91997e-06 [virtual_dataset]: 8.02998e-06 [get_grad_eliminate_]: 7.57002e-06 [virtual_output]: 7.73009e-06 [merge_forward]: 4.50003e-06 [cell_reuse_recompute_pass]: 1.94996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.531e-05 [before_grad]: 1.26801e-05 [inplace_validation]: 4.30003e-06 [meta_fg_expand]: 4.79002e-06 [inplace_validation_after_expand]: 5.32006e-06 [flash_sp_send_recv_attached]: 8.30041e-07 [receive_attached]: 6.60075e-07 [after_resolve]: 1.02201e-05 [a_after_grad]: 1.269e-05 [special_op_eliminate]: 8.17003e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 1.13004e-06 [auto_monad_grad]: 1.11992e-06 [auto_monad_eliminator]: 1.75401e-05 [cse]: 1.91501e-05 [a_3]: 5.03e-05 [py_interpret_to_execute_after_opt_a]: 9.26002e-06 [slice_cell_reuse_recomputed_activation]: 1.59e-06 [rewriter_after_opt_a]: 0.00012568 [convert_after_rewriter]: 8.00996e-06 [order_py_execute_after_rewriter]: 5.32996e-06 [opt_b]: 0.0002435, [1] [Cycle 1]: 0.00023841, [7] [b_1]: 0.00016513 [b_2]: 1.065e-05 [updatestate_depend_eliminate]: 5.01995e-06 [updatestate_assign_eliminate]: 4.37e-06 [updatestate_loads_eliminate]: 4.53996e-06 [renormalize]: 2.70084e-07 [cse]: 1.683e-05 [optimize_parallel_all_gather_comm]: 7.80006e-06 [overlap_param_gather]: 7.10017e-07 [cconv]: 1.62099e-05 [loop_unroll]: 0.00048464 [opt_after_cconv]: 0.00012772, [1] [Cycle 1]: 0.00012204, [7] [c_1]: 5.202e-05 [parameter_eliminate]: 1.85997e-06 [updatestate_depend_eliminate]: 7.17002e-06 [updatestate_assign_eliminate]: 4.81005e-06 [updatestate_loads_eliminate]: 4.92996e-06 [cse]: 1.91099e-05 [renormalize]: 3.89991e-07 [remove_dup_value]: 9.80997e-06 [tuple_transform]: 7.205e-05, [1] [Cycle 1]: 6.727e-05, [2] [d_1]: 5.664e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 1.71002e-06 [add_cache_embedding]: 1.19101e-05 [add_recomputation]: 5.40799e-05 [cse_after_recomputation]: 2.723e-05, [1] [Cycle 1]: 2.268e-05, [1] [cse]: 1.721e-05 [environ_conv]: 6.29004e-06 [swap_dp_allreduce_reducescatter]: 7.28003e-06 [bias_add_comm_swap]: 1.53994e-06 [label_micro_interleaved_index]: 1.16008e-06 [label_fine_grained_interleaved_index]: 1.60001e-06 [merge_cast_opt]: 7.29924e-07 [slice_recompute_activation]: 1.12993e-06 [micro_interleaved_order_control]: 1.62004e-06 [assign_add_opt]: 2.604e-05 [ForceFp32Comm]: 6.6997e-07 [remove_cast_before_assign_add]: 6.50005e-06 [full_micro_interleaved_order_control]: 1.36008e-06 [reorder_send_recv_between_fp_bp]: 1.21002e-06 [comm_op_add_attrs]: 2.318e-05 [add_comm_op_reuse_tag]: 1.63005e-06 [interleave_split_concat_branches]: 5.79981e-07 [interleave_parallel_branches]: 5.50062e-07 [overlap_opt_shard_in_pipeline]: 6.40051e-07 [overlap_opt_shard_grad_in_pipeline]: 1.49e-06 [control_data_broadcast_order]: 6.59958e-07 [grouped_pairwise_exchange_alltoall]: 6.69993e-06 [offloading_packed_experts]: 1.40001e-06 [overlap_recompute_and_grad_model_parallel]: 1.59e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.49946e-07 [overlap_recompute_allgather_and_fa_grad]: 6.355e-05 [overlap_grad_ring_attention]: 1.40001e-06 [overlap_grad_flash_sp]: 1.173e-05 [begin_end_overlap_inline]: 5.59958e-07 [split_matmul_comm_elemetwise]: 1.31002e-06 [split_layernorm_comm]: 1.20001e-06 [handle_group_info]: 3.23995e-06 [symbol_engine_optimizer]: 9.17601e-05, [1] [Cycle 1]: 8.71001e-05, [6] [build]: 5.51005e-06 [elim_shapecalc]: 1.334e-05 [elim_not_effective]: 1.647e-05 [opt_reshape]: 9.22999e-06 [fold_const_symbol]: 1.37399e-05 [renormalize]: 2.89991e-07 [pipeline_parallel_scheduler]: 9.40054e-07 [auto_monad_reorder]: 2.442e-05 [get_jit_bprop_graph]: 3.39933e-07 [rewriter_after_jit_bprop_graph]: 3.30037e-07 [eliminate_special_op_node]: 0.00050267 [distribtued_split]: 3.546e-05 [validate]: 2.981e-05 [task_emit]: 0.0697834 [execute]: 9.57004e-06 Sums bootstrap : 0.000293s : 0.38% type_inference : 0.002320s : 3.03% auto_monad : 0.000106s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000548s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000027s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000428s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000075s : 0.10% optimize.opt_a.cse : 0.000060s : 0.08% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000126s : 0.16% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.22% optimize.opt_b.b_2 : 0.000011s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000485s : 0.63% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000019s : 0.02% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000026s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000023s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000064s : 0.08% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000006s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000503s : 0.66% distribtued_split : 0.000035s : 0.05% validate : 0.000030s : 0.04% task_emit : 0.069783s : 91.27% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000116 63 4.63% : 0.000005s : 2: substitution.depend_value_elim 2.11% : 0.000002s : 5: substitution.elim_not_effective 2.07% : 0.000002s : 5: substitution.fold_const_symbol 5.71% : 0.000007s : 6: substitution.graph_param_transform 49.88% : 0.000058s : 1: substitution.inline 4.15% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.57% : 0.000004s : 6: substitution.load_eliminater 2.11% : 0.000002s : 2: substitution.reduce_all_const_elim 6.52% : 0.000008s : 10: substitution.remove_not_recompute_node 2.46% : 0.000003s : 2: substitution.replace_old_param 8.79% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.01% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002295 2 89.58% : 0.002055s : 1: type_inference.infer 10.42% : 0.000239s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000057 1 100.00% : 0.000057s : 1: match.inline ------[predicate.] 0.000233 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.79% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.80% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.18% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.74% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.25% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 1.01% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.80% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.18% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.18% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.21% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.20% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.16% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.31% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.65% : 0.000002s : 12: predicate.incorporate_call_switch 5.48% : 0.000013s : 63: predicate.inline 1.12% : 0.000003s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.13% : 0.000003s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.54% : 0.000006s : 38: predicate.load_eliminater 1.32% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.29% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.79% : 0.000002s : 12: predicate.merge_addn 0.87% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.51% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.92% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000003s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.50% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.69% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.23% : 0.000010s : 43: predicate.switch_simplify 0.75% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.85% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.63% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.43% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000140 4 7.75% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.25% : 0.000129s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089409 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000058s : 1: add_recomputation 0.03% : 0.000031s : 1: assign_add_opt 0.13% : 0.000118s : 1: auto_monad 0.03% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000318s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.03% : 0.000027s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000043s : 1: distribtued_split 0.58% : 0.000516s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000494s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001109s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.98% : 0.005348s : 1: opt_a 0.15% : 0.000132s : 1: opt_after_cconv 0.28% : 0.000246s : 1: opt_b 7.96% : 0.007118s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000069s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000026s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000021s : 1: remove_dup_value 0.26% : 0.000229s : 1: renormalize.infer 0.22% : 0.000194s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000131s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000095s : 1: symbol_engine_optimizer 78.08% : 0.069809s : 1: task_emit 0.08% : 0.000076s : 1: tuple_transform 2.61% : 0.002338s : 1: type_inference 0.07% : 0.000064s : 1: validate TotalTime = 0.0811044, [21] [bootstrap]: 0.00030857 [type_inference]: 0.0025867 [auto_monad]: 0.00013518 [graph_reusing]: 1.35996e-06 [inline]: 1.20001e-06 [parallel-infer-symbol]: 2.14006e-06 [pre_auto_parallel]: 2.55e-05 [insert-virtual-dataset]: 2.22994e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 1.12993e-06 [pipeline_split]: 9.39937e-07 [optimize]: 0.00731671, [52] [py_interpret_to_execute]: 1.57e-05 [rewriter_before_opt_a]: 3.49099e-05 [opt_a]: 0.00543742, [2] [Cycle 1]: 0.00155124, [43] [expand_dump_flag]: 3.78001e-06 [switch_simplify]: 3.087e-05 [loop_unroll]: 1.361e-05 [a_1]: 0.0003507 [recompute_prepare]: 9.24e-06 [updatestate_depend_eliminate]: 8.58994e-06 [updatestate_assign_eliminate]: 6.86001e-06 [updatestate_loads_eliminate]: 6.72997e-06 [parameter_eliminate]: 3.4601e-06 [a_2]: 0.00011677 [accelerated_algorithm]: 8.80996e-06 [shard]: 1.93994e-06 [meta_shard_fg_expand]: 3.50992e-06 [shard_inline]: 8.99006e-06 [auto_parallel]: 1.164e-05 [parallel]: 6.73998e-06 [flash_sp]: 9.32009e-06 [merge_comm]: 7.49994e-06 [allreduce_fusion]: 5.24998e-06 [matmul_add_comm_reduction]: 9.81998e-06 [allreduce_slice_to_reducescatter]: 4.50062e-07 [virtual_shard_identity]: 9.62999e-06 [virtual_dataset]: 8.09995e-06 [get_grad_eliminate_]: 7.66001e-06 [virtual_output]: 8.00006e-06 [merge_forward]: 5.71006e-06 [cell_reuse_recompute_pass]: 1.59e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.717e-05 [before_grad]: 1.373e-05 [inplace_validation]: 5.04998e-06 [meta_fg_expand]: 5.38002e-06 [inplace_validation_after_expand]: 6.6401e-06 [flash_sp_send_recv_attached]: 3.84997e-06 [receive_attached]: 2.20991e-06 [after_resolve]: 1.16801e-05 [a_after_grad]: 1.321e-05 [special_op_eliminate]: 7.98993e-06 [renormalize]: 0.00045345 [add_forward_monad_depend]: 3.50003e-06 [auto_monad_grad]: 1.71002e-06 [auto_monad_eliminator]: 2.981e-05 [cse]: 3.091e-05 [a_3]: 5.79499e-05 [Cycle 2]: 0.00078743, [43] [expand_dump_flag]: 1.11002e-06 [switch_simplify]: 9.42999e-06 [loop_unroll]: 7.85e-06 [a_1]: 0.00020644 [recompute_prepare]: 7.41996e-06 [updatestate_depend_eliminate]: 6.18992e-06 [updatestate_assign_eliminate]: 4.60004e-06 [updatestate_loads_eliminate]: 5.47001e-06 [parameter_eliminate]: 1.24995e-06 [a_2]: 0.00010512 [accelerated_algorithm]: 8.26002e-06 [shard]: 1.20001e-06 [meta_shard_fg_expand]: 2.79001e-06 [shard_inline]: 8.31007e-06 [auto_parallel]: 1.03901e-05 [parallel]: 3.41004e-06 [flash_sp]: 3.48e-06 [merge_comm]: 5.82996e-06 [allreduce_fusion]: 5.02996e-06 [matmul_add_comm_reduction]: 7.86001e-06 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 8.71997e-06 [virtual_dataset]: 7.88993e-06 [get_grad_eliminate_]: 7.25e-06 [virtual_output]: 7.30006e-06 [merge_forward]: 4.50003e-06 [cell_reuse_recompute_pass]: 1.99e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.532e-05 [before_grad]: 1.22701e-05 [inplace_validation]: 4.43007e-06 [meta_fg_expand]: 4.74008e-06 [inplace_validation_after_expand]: 5.46e-06 [flash_sp_send_recv_attached]: 9.00007e-07 [receive_attached]: 7.60076e-07 [after_resolve]: 1.01e-05 [a_after_grad]: 1.28e-05 [special_op_eliminate]: 7.62998e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 9.40054e-07 [auto_monad_grad]: 1.12003e-06 [auto_monad_eliminator]: 1.80199e-05 [cse]: 2.027e-05 [a_3]: 4.849e-05 [py_interpret_to_execute_after_opt_a]: 9.21998e-06 [slice_cell_reuse_recomputed_activation]: 2.29001e-06 [rewriter_after_opt_a]: 0.00015198 [convert_after_rewriter]: 8.02998e-06 [order_py_execute_after_rewriter]: 6.61996e-06 [opt_b]: 0.00024696, [1] [Cycle 1]: 0.00024145, [7] [b_1]: 0.00016488 [b_2]: 1.03201e-05 [updatestate_depend_eliminate]: 5.49003e-06 [updatestate_assign_eliminate]: 4.49992e-06 [updatestate_loads_eliminate]: 5.48002e-06 [renormalize]: 2.39932e-07 [cse]: 1.94501e-05 [optimize_parallel_all_gather_comm]: 7.97003e-06 [overlap_param_gather]: 1.11992e-06 [cconv]: 2.264e-05 [loop_unroll]: 0.00051463 [opt_after_cconv]: 0.00013647, [1] [Cycle 1]: 0.0001304, [7] [c_1]: 5.504e-05 [parameter_eliminate]: 2.34996e-06 [updatestate_depend_eliminate]: 7.95e-06 [updatestate_assign_eliminate]: 4.77e-06 [updatestate_loads_eliminate]: 5.40004e-06 [cse]: 2.216e-05 [renormalize]: 5.39934e-07 [remove_dup_value]: 1.438e-05 [tuple_transform]: 7.021e-05, [1] [Cycle 1]: 6.557e-05, [2] [d_1]: 5.652e-05 [renormalize]: 1.80095e-07 [partial_unused_args_eliminate]: 2.10002e-06 [add_cache_embedding]: 1.282e-05 [add_recomputation]: 6.31399e-05 [cse_after_recomputation]: 2.709e-05, [1] [Cycle 1]: 2.223e-05, [1] [cse]: 1.73701e-05 [environ_conv]: 7.60006e-06 [swap_dp_allreduce_reducescatter]: 7.51007e-06 [bias_add_comm_swap]: 2.23995e-06 [label_micro_interleaved_index]: 1.98989e-06 [label_fine_grained_interleaved_index]: 1.91003e-06 [merge_cast_opt]: 1.05007e-06 [slice_recompute_activation]: 2.02004e-06 [micro_interleaved_order_control]: 1.59e-06 [assign_add_opt]: 2.794e-05 [ForceFp32Comm]: 1.09e-06 [remove_cast_before_assign_add]: 7.10995e-06 [full_micro_interleaved_order_control]: 1.57999e-06 [reorder_send_recv_between_fp_bp]: 1.9701e-06 [comm_op_add_attrs]: 2.723e-05 [add_comm_op_reuse_tag]: 2.03995e-06 [interleave_split_concat_branches]: 8.00006e-07 [interleave_parallel_branches]: 7.69971e-07 [overlap_opt_shard_in_pipeline]: 9.2003e-07 [overlap_opt_shard_grad_in_pipeline]: 1.83994e-06 [control_data_broadcast_order]: 1.13004e-06 [grouped_pairwise_exchange_alltoall]: 9.28005e-06 [offloading_packed_experts]: 1.80001e-06 [overlap_recompute_and_grad_model_parallel]: 1.46998e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.10018e-07 [overlap_recompute_allgather_and_fa_grad]: 7.27e-05 [overlap_grad_ring_attention]: 1.92993e-06 [overlap_grad_flash_sp]: 1.429e-05 [begin_end_overlap_inline]: 5.09899e-07 [split_matmul_comm_elemetwise]: 1.89e-06 [split_layernorm_comm]: 1.75997e-06 [handle_group_info]: 4.53996e-06 [symbol_engine_optimizer]: 9.064e-05, [1] [Cycle 1]: 8.56899e-05, [6] [build]: 5.12006e-06 [elim_shapecalc]: 1.37599e-05 [elim_not_effective]: 1.677e-05 [opt_reshape]: 9.20007e-06 [fold_const_symbol]: 1.352e-05 [renormalize]: 3.20026e-07 [pipeline_parallel_scheduler]: 1.52993e-06 [auto_monad_reorder]: 2.98501e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.30038e-07 [eliminate_special_op_node]: 0.00052605 [distribtued_split]: 3.278e-05 [validate]: 3.151e-05 [task_emit]: 0.069839 [execute]: 1.165e-05 Sums bootstrap : 0.000309s : 0.40% type_inference : 0.002587s : 3.36% auto_monad : 0.000135s : 0.18% graph_reusing : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000557s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000454s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000048s : 0.06% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000152s : 0.20% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000515s : 0.67% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000073s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000526s : 0.68% distribtued_split : 0.000033s : 0.04% validate : 0.000032s : 0.04% task_emit : 0.069839s : 90.70% execute : 0.000012s : 0.02% TotalTime = 0.0814449, [21] [bootstrap]: 0.00031097 [type_inference]: 0.00256223 [auto_monad]: 0.00013287 [graph_reusing]: 2.41003e-06 [inline]: 1.42003e-06 [parallel-infer-symbol]: 2.06009e-06 [pre_auto_parallel]: 2.36699e-05 [insert-virtual-dataset]: 2.80002e-06 [parallel-infer-symbol-second]: 3.89991e-07 [dataset_repeat_opt]: 1.04995e-06 [pipeline_split]: 1.4999e-06 [optimize]: 0.00741645, [52] [py_interpret_to_execute]: 1.492e-05 [rewriter_before_opt_a]: 3.534e-05 [opt_a]: 0.0055361, [2] [Cycle 1]: 0.0015352, [43] [expand_dump_flag]: 3.06009e-06 [switch_simplify]: 2.78801e-05 [loop_unroll]: 1.33901e-05 [a_1]: 0.00034451 [recompute_prepare]: 9.12999e-06 [updatestate_depend_eliminate]: 8.54011e-06 [updatestate_assign_eliminate]: 6.17001e-06 [updatestate_loads_eliminate]: 7.16001e-06 [parameter_eliminate]: 2.78e-06 [a_2]: 0.0001201 [accelerated_algorithm]: 8.75001e-06 [shard]: 2.14996e-06 [meta_shard_fg_expand]: 3.31004e-06 [shard_inline]: 8.66002e-06 [auto_parallel]: 1.19801e-05 [parallel]: 6.07001e-06 [flash_sp]: 9.12009e-06 [merge_comm]: 7.42998e-06 [allreduce_fusion]: 5.27001e-06 [matmul_add_comm_reduction]: 1.053e-05 [allreduce_slice_to_reducescatter]: 5.60074e-07 [virtual_shard_identity]: 9.32999e-06 [virtual_dataset]: 7.99994e-06 [get_grad_eliminate_]: 7.63999e-06 [virtual_output]: 8.13999e-06 [merge_forward]: 5.18002e-06 [cell_reuse_recompute_pass]: 1.87999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.696e-05 [before_grad]: 1.365e-05 [inplace_validation]: 4.75999e-06 [meta_fg_expand]: 5.31995e-06 [inplace_validation_after_expand]: 5.79003e-06 [flash_sp_send_recv_attached]: 3.56999e-06 [receive_attached]: 2.05007e-06 [after_resolve]: 1.119e-05 [a_after_grad]: 1.272e-05 [special_op_eliminate]: 7.92998e-06 [renormalize]: 0.00044307 [add_forward_monad_depend]: 3.42005e-06 [auto_monad_grad]: 1.91003e-06 [auto_monad_eliminator]: 3.261e-05 [cse]: 3.318e-05 [a_3]: 5.92199e-05 [Cycle 2]: 0.00079944, [43] [expand_dump_flag]: 1.09e-06 [switch_simplify]: 8.86002e-06 [loop_unroll]: 7.52008e-06 [a_1]: 0.0002148 [recompute_prepare]: 7.37992e-06 [updatestate_depend_eliminate]: 5.87001e-06 [updatestate_assign_eliminate]: 4.78001e-06 [updatestate_loads_eliminate]: 5.44998e-06 [parameter_eliminate]: 1.2801e-06 [a_2]: 0.0001066 [accelerated_algorithm]: 8.7599e-06 [shard]: 1.16997e-06 [meta_shard_fg_expand]: 2.62004e-06 [shard_inline]: 8.06001e-06 [auto_parallel]: 1.1e-05 [parallel]: 4.04997e-06 [flash_sp]: 3.34997e-06 [merge_comm]: 6.04999e-06 [allreduce_fusion]: 4.99003e-06 [matmul_add_comm_reduction]: 7.83999e-06 [allreduce_slice_to_reducescatter]: 2.89991e-07 [virtual_shard_identity]: 8.87993e-06 [virtual_dataset]: 7.92998e-06 [get_grad_eliminate_]: 7.58003e-06 [virtual_output]: 7.32008e-06 [merge_forward]: 4.50003e-06 [cell_reuse_recompute_pass]: 1.83994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.564e-05 [before_grad]: 1.281e-05 [inplace_validation]: 4.43996e-06 [meta_fg_expand]: 4.87e-06 [inplace_validation_after_expand]: 5.24998e-06 [flash_sp_send_recv_attached]: 7.79983e-07 [receive_attached]: 8.2003e-07 [after_resolve]: 1.03e-05 [a_after_grad]: 1.25001e-05 [special_op_eliminate]: 7.58993e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 9.30042e-07 [auto_monad_grad]: 1.25996e-06 [auto_monad_eliminator]: 1.851e-05 [cse]: 1.99201e-05 [a_3]: 5.018e-05 [py_interpret_to_execute_after_opt_a]: 9.29995e-06 [slice_cell_reuse_recomputed_activation]: 1.99e-06 [rewriter_after_opt_a]: 0.00014495 [convert_after_rewriter]: 1.10101e-05 [order_py_execute_after_rewriter]: 6.19993e-06 [opt_b]: 0.00024366, [1] [Cycle 1]: 0.00023827, [7] [b_1]: 0.00016256 [b_2]: 9.82999e-06 [updatestate_depend_eliminate]: 5.2799e-06 [updatestate_assign_eliminate]: 4.68001e-06 [updatestate_loads_eliminate]: 5.72996e-06 [renormalize]: 3.30037e-07 [cse]: 1.941e-05 [optimize_parallel_all_gather_comm]: 8.68994e-06 [overlap_param_gather]: 1.06008e-06 [cconv]: 2.402e-05 [loop_unroll]: 0.00051942 [opt_after_cconv]: 0.00013573, [1] [Cycle 1]: 0.00012932, [7] [c_1]: 5.37599e-05 [parameter_eliminate]: 2.58e-06 [updatestate_depend_eliminate]: 8.28994e-06 [updatestate_assign_eliminate]: 4.94998e-06 [updatestate_loads_eliminate]: 5.39003e-06 [cse]: 2.213e-05 [renormalize]: 4.30038e-07 [remove_dup_value]: 1.188e-05 [tuple_transform]: 7.06599e-05, [1] [Cycle 1]: 6.64099e-05, [2] [d_1]: 5.702e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.98011e-06 [add_cache_embedding]: 1.29e-05 [add_recomputation]: 6.25e-05 [cse_after_recomputation]: 2.74801e-05, [1] [Cycle 1]: 2.283e-05, [1] [cse]: 1.766e-05 [environ_conv]: 7.01007e-06 [swap_dp_allreduce_reducescatter]: 7.22008e-06 [bias_add_comm_swap]: 2.23995e-06 [label_micro_interleaved_index]: 1.90001e-06 [label_fine_grained_interleaved_index]: 1.99e-06 [merge_cast_opt]: 1.05996e-06 [slice_recompute_activation]: 1.61002e-06 [micro_interleaved_order_control]: 1.62993e-06 [assign_add_opt]: 2.83499e-05 [ForceFp32Comm]: 1.06008e-06 [remove_cast_before_assign_add]: 7.08003e-06 [full_micro_interleaved_order_control]: 2.12993e-06 [reorder_send_recv_between_fp_bp]: 2.00991e-06 [comm_op_add_attrs]: 2.695e-05 [add_comm_op_reuse_tag]: 2.21003e-06 [interleave_split_concat_branches]: 7.79983e-07 [interleave_parallel_branches]: 6.89994e-07 [overlap_opt_shard_in_pipeline]: 1.09e-06 [overlap_opt_shard_grad_in_pipeline]: 1.83005e-06 [control_data_broadcast_order]: 1.04005e-06 [grouped_pairwise_exchange_alltoall]: 9.09995e-06 [offloading_packed_experts]: 2.37999e-06 [overlap_recompute_and_grad_model_parallel]: 1.53005e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89994e-07 [overlap_recompute_allgather_and_fa_grad]: 7.41e-05 [overlap_grad_ring_attention]: 2.01003e-06 [overlap_grad_flash_sp]: 1.33399e-05 [begin_end_overlap_inline]: 6.89994e-07 [split_matmul_comm_elemetwise]: 1.82993e-06 [split_layernorm_comm]: 1.70001e-06 [handle_group_info]: 4.15999e-06 [symbol_engine_optimizer]: 9.809e-05, [1] [Cycle 1]: 9.30299e-05, [6] [build]: 5.17e-06 [elim_shapecalc]: 1.29601e-05 [elim_not_effective]: 1.75e-05 [opt_reshape]: 9.11998e-06 [fold_const_symbol]: 1.54499e-05 [renormalize]: 2.89991e-07 [pipeline_parallel_scheduler]: 1.35996e-06 [auto_monad_reorder]: 2.83801e-05 [get_jit_bprop_graph]: 4.10015e-07 [rewriter_after_jit_bprop_graph]: 3.59956e-07 [eliminate_special_op_node]: 0.00052068 [distribtued_split]: 4.053e-05 [validate]: 3.501e-05 [task_emit]: 0.0701126 [execute]: 1.00901e-05 Sums bootstrap : 0.000311s : 0.40% type_inference : 0.002562s : 3.32% auto_monad : 0.000133s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000037s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000559s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000227s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000443s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000145s : 0.19% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000519s : 0.67% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000074s : 0.10% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000004s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000028s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000521s : 0.67% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.070113s : 90.76% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000132 63 4.69% : 0.000006s : 2: substitution.depend_value_elim 1.75% : 0.000002s : 5: substitution.elim_not_effective 1.53% : 0.000002s : 5: substitution.fold_const_symbol 5.56% : 0.000007s : 6: substitution.graph_param_transform 52.00% : 0.000069s : 1: substitution.inline 3.82% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.12% : 0.000004s : 6: substitution.load_eliminater 2.31% : 0.000003s : 2: substitution.reduce_all_const_elim 6.02% : 0.000008s : 10: substitution.remove_not_recompute_node 2.44% : 0.000003s : 2: substitution.replace_old_param 8.68% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.07% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002556 2 88.82% : 0.002270s : 1: type_inference.infer 11.18% : 0.000286s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000237 1420 0.72% : 0.000002s : 13: predicate.accumulaten_eliminater 1.25% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.84% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.32% : 0.000006s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.81% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.45% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.78% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.19% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_depend_swap 1.98% : 0.000005s : 31: predicate.environ_get_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.89% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.74% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.97% : 0.000014s : 63: predicate.inline 1.11% : 0.000003s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.09% : 0.000003s : 12: predicate.less_batch_normalization 1.69% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000006s : 38: predicate.load_eliminater 1.33% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.72% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.79% : 0.000002s : 13: predicate.minmaximum_grad 0.83% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.11% : 0.000003s : 14: predicate.partial_defer_inline 1.36% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.21% : 0.000000s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.93% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.01% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.47% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.22% : 0.000005s : 38: predicate.stopgrad_eliminater 0.47% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.55% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.24% : 0.000010s : 43: predicate.switch_simplify 0.73% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.47% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.53% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.57% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000165 4 9.56% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.44% : 0.000149s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090277 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000149s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000336s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.04% : 0.000040s : 1: distribtued_split 0.60% : 0.000541s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.58% : 0.000525s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001117s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.03% : 0.005441s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.28% : 0.000250s : 1: opt_b 8.11% : 0.007326s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.09% : 0.000078s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.27% : 0.000245s : 1: renormalize.infer 0.22% : 0.000202s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000158s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 77.40% : 0.069872s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.89% : 0.002606s : 1: type_inference 0.07% : 0.000062s : 1: validate Time group info: ------[substitution.] 0.000128 63 4.69% : 0.000006s : 2: substitution.depend_value_elim 2.06% : 0.000003s : 5: substitution.elim_not_effective 1.92% : 0.000002s : 5: substitution.fold_const_symbol 5.98% : 0.000008s : 6: substitution.graph_param_transform 49.47% : 0.000063s : 1: substitution.inline 4.09% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.45% : 0.000004s : 6: substitution.load_eliminater 2.57% : 0.000003s : 2: substitution.reduce_all_const_elim 6.05% : 0.000008s : 10: substitution.remove_not_recompute_node 2.40% : 0.000003s : 2: substitution.replace_old_param 9.12% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.21% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002532 2 89.42% : 0.002264s : 1: type_inference.infer 10.58% : 0.000268s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000062 1 100.00% : 0.000062s : 1: match.inline ------[predicate.] 0.000231 1420 0.85% : 0.000002s : 13: predicate.accumulaten_eliminater 1.24% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.84% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.36% : 0.000005s : 25: predicate.arithmetic_simplify 0.97% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.51% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.32% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.78% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.79% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.12% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.99% : 0.000005s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.23% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 0.93% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.66% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.42% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.25% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.79% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.74% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.62% : 0.000001s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.80% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.13% : 0.000003s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.48% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.34% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.83% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.76% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.47% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000160 4 10.34% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.66% : 0.000144s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090714 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000006s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000145s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000335s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.03% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.59% : 0.000535s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.58% : 0.000529s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001123s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.06% : 0.000051s : 4: opt.transform.symbol_engine_opt 6.11% : 0.005541s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.27% : 0.000247s : 1: opt_b 8.18% : 0.007424s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.09% : 0.000080s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000029s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.26% : 0.000234s : 1: renormalize.infer 0.22% : 0.000203s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000151s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000101s : 1: symbol_engine_optimizer 77.32% : 0.070142s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.84% : 0.002580s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.0817862, [21] [bootstrap]: 0.00030863 [type_inference]: 0.00258638 [auto_monad]: 0.00013664 [graph_reusing]: 2.44007e-06 [inline]: 1.15007e-06 [parallel-infer-symbol]: 2.27999e-06 [pre_auto_parallel]: 2.575e-05 [insert-virtual-dataset]: 3.02005e-06 [parallel-infer-symbol-second]: 4.10015e-07 [dataset_repeat_opt]: 1.30001e-06 [pipeline_split]: 1.44995e-06 [optimize]: 0.00731738, [52] [py_interpret_to_execute]: 1.59601e-05 [rewriter_before_opt_a]: 3.506e-05 [opt_a]: 0.00556272, [2] [Cycle 1]: 0.00146504, [43] [expand_dump_flag]: 1.81003e-06 [switch_simplify]: 2.535e-05 [loop_unroll]: 1.32701e-05 [a_1]: 0.0003299 [recompute_prepare]: 8.81997e-06 [updatestate_depend_eliminate]: 8.59005e-06 [updatestate_assign_eliminate]: 5.1799e-06 [updatestate_loads_eliminate]: 5.81995e-06 [parameter_eliminate]: 2.89991e-06 [a_2]: 0.00011409 [accelerated_algorithm]: 8.52998e-06 [shard]: 1.23004e-06 [meta_shard_fg_expand]: 2.74996e-06 [shard_inline]: 8.64e-06 [auto_parallel]: 1.183e-05 [parallel]: 4.43996e-06 [flash_sp]: 6.26e-06 [merge_comm]: 6.73009e-06 [allreduce_fusion]: 5.47001e-06 [matmul_add_comm_reduction]: 8.36002e-06 [allreduce_slice_to_reducescatter]: 3.30037e-07 [virtual_shard_identity]: 1.035e-05 [virtual_dataset]: 8.25e-06 [get_grad_eliminate_]: 8.13999e-06 [virtual_output]: 8.09005e-06 [merge_forward]: 5.21995e-06 [cell_reuse_recompute_pass]: 1.36998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.651e-05 [before_grad]: 1.389e-05 [inplace_validation]: 4.24008e-06 [meta_fg_expand]: 5.04008e-06 [inplace_validation_after_expand]: 5.60004e-06 [flash_sp_send_recv_attached]: 2.49001e-06 [receive_attached]: 1.36998e-06 [after_resolve]: 1.055e-05 [a_after_grad]: 1.319e-05 [special_op_eliminate]: 8.08993e-06 [renormalize]: 0.00043309 [add_forward_monad_depend]: 3.31004e-06 [auto_monad_grad]: 1.73005e-06 [auto_monad_eliminator]: 2.37699e-05 [cse]: 2.267e-05 [a_3]: 5.863e-05 [Cycle 2]: 0.00078653, [43] [expand_dump_flag]: 1.16008e-06 [switch_simplify]: 9.6201e-06 [loop_unroll]: 7.57002e-06 [a_1]: 0.00020424 [recompute_prepare]: 7.56001e-06 [updatestate_depend_eliminate]: 6.21006e-06 [updatestate_assign_eliminate]: 4.92006e-06 [updatestate_loads_eliminate]: 5.74999e-06 [parameter_eliminate]: 1.22003e-06 [a_2]: 0.00011053 [accelerated_algorithm]: 8.55001e-06 [shard]: 1.09e-06 [meta_shard_fg_expand]: 2.71993e-06 [shard_inline]: 7.74011e-06 [auto_parallel]: 1.041e-05 [parallel]: 3.62005e-06 [flash_sp]: 2.36009e-06 [merge_comm]: 5.96e-06 [allreduce_fusion]: 5.03997e-06 [matmul_add_comm_reduction]: 7.90006e-06 [allreduce_slice_to_reducescatter]: 3.90108e-07 [virtual_shard_identity]: 8.76002e-06 [virtual_dataset]: 7.50006e-06 [get_grad_eliminate_]: 7.26001e-06 [virtual_output]: 7.33999e-06 [merge_forward]: 4.39002e-06 [cell_reuse_recompute_pass]: 1.72993e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.548e-05 [before_grad]: 1.23101e-05 [inplace_validation]: 4.47e-06 [meta_fg_expand]: 4.84008e-06 [inplace_validation_after_expand]: 5.14998e-06 [flash_sp_send_recv_attached]: 9.89996e-07 [receive_attached]: 7.60076e-07 [after_resolve]: 9.88995e-06 [a_after_grad]: 1.199e-05 [special_op_eliminate]: 7.79994e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 9.59961e-07 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 1.869e-05 [cse]: 1.929e-05 [a_3]: 4.851e-05 [py_interpret_to_execute_after_opt_a]: 9.6499e-06 [slice_cell_reuse_recomputed_activation]: 1.23994e-06 [rewriter_after_opt_a]: 0.000127 [convert_after_rewriter]: 1.16701e-05 [order_py_execute_after_rewriter]: 5.15999e-06 [opt_b]: 0.00024079, [1] [Cycle 1]: 0.0002357, [7] [b_1]: 0.00016282 [b_2]: 9.80997e-06 [updatestate_depend_eliminate]: 5.33997e-06 [updatestate_assign_eliminate]: 4.42995e-06 [updatestate_loads_eliminate]: 5.25999e-06 [renormalize]: 2.89991e-07 [cse]: 1.78801e-05 [optimize_parallel_all_gather_comm]: 7.98004e-06 [overlap_param_gather]: 7.40052e-07 [cconv]: 1.51701e-05 [loop_unroll]: 0.0004845 [opt_after_cconv]: 0.00013423, [1] [Cycle 1]: 0.0001285, [7] [c_1]: 5.326e-05 [parameter_eliminate]: 2.34006e-06 [updatestate_depend_eliminate]: 8.75001e-06 [updatestate_assign_eliminate]: 4.69002e-06 [updatestate_loads_eliminate]: 5.78992e-06 [cse]: 2.15101e-05 [renormalize]: 3.80096e-07 [remove_dup_value]: 8.67003e-06 [tuple_transform]: 6.73799e-05, [1] [Cycle 1]: 6.291e-05, [2] [d_1]: 5.389e-05 [renormalize]: 1.79978e-07 [partial_unused_args_eliminate]: 1.37999e-06 [add_cache_embedding]: 1.1e-05 [add_recomputation]: 5.155e-05 [cse_after_recomputation]: 2.689e-05, [1] [Cycle 1]: 2.187e-05, [1] [cse]: 1.703e-05 [environ_conv]: 5.81995e-06 [swap_dp_allreduce_reducescatter]: 6.29004e-06 [bias_add_comm_swap]: 1.24995e-06 [label_micro_interleaved_index]: 8.10018e-07 [label_fine_grained_interleaved_index]: 9.89996e-07 [merge_cast_opt]: 5.79981e-07 [slice_recompute_activation]: 7.69971e-07 [micro_interleaved_order_control]: 6.89994e-07 [assign_add_opt]: 2.41101e-05 [ForceFp32Comm]: 5.60074e-07 [remove_cast_before_assign_add]: 6.37001e-06 [full_micro_interleaved_order_control]: 8.10018e-07 [reorder_send_recv_between_fp_bp]: 8.00006e-07 [comm_op_add_attrs]: 2.179e-05 [add_comm_op_reuse_tag]: 1.55997e-06 [interleave_split_concat_branches]: 5.79981e-07 [interleave_parallel_branches]: 5.00004e-07 [overlap_opt_shard_in_pipeline]: 6.10016e-07 [overlap_opt_shard_grad_in_pipeline]: 8.89995e-07 [control_data_broadcast_order]: 5.50062e-07 [grouped_pairwise_exchange_alltoall]: 6.07001e-06 [offloading_packed_experts]: 9.50065e-07 [overlap_recompute_and_grad_model_parallel]: 9.59961e-07 [overlap_grad_matmul_and_grad_allreduce]: 4.10015e-07 [overlap_recompute_allgather_and_fa_grad]: 5.86699e-05 [overlap_grad_ring_attention]: 2.29001e-06 [overlap_grad_flash_sp]: 1.441e-05 [begin_end_overlap_inline]: 1.01002e-06 [split_matmul_comm_elemetwise]: 2.05997e-06 [split_layernorm_comm]: 1.76998e-06 [handle_group_info]: 5.01005e-06 [symbol_engine_optimizer]: 9.103e-05, [1] [Cycle 1]: 8.6e-05, [6] [build]: 4.53996e-06 [elim_shapecalc]: 1.313e-05 [elim_not_effective]: 1.719e-05 [opt_reshape]: 8.98994e-06 [fold_const_symbol]: 1.421e-05 [renormalize]: 2.79979e-07 [pipeline_parallel_scheduler]: 1.51002e-06 [auto_monad_reorder]: 2.978e-05 [get_jit_bprop_graph]: 3.80096e-07 [rewriter_after_jit_bprop_graph]: 4.59957e-07 [eliminate_special_op_node]: 0.00049749 [distribtued_split]: 4.08599e-05 [validate]: 3.34601e-05 [task_emit]: 0.0705537 [execute]: 8.50006e-06 Sums bootstrap : 0.000309s : 0.40% type_inference : 0.002586s : 3.34% auto_monad : 0.000137s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000534s : 0.69% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000009s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000433s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000042s : 0.05% optimize.opt_a.cse : 0.000042s : 0.05% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000127s : 0.16% optimize.convert_after_rewriter : 0.000012s : 0.02% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000484s : 0.63% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000006s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000024s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000022s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000059s : 0.08% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000497s : 0.64% distribtued_split : 0.000041s : 0.05% validate : 0.000033s : 0.04% task_emit : 0.070554s : 91.06% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000115 63 4.15% : 0.000005s : 2: substitution.depend_value_elim 2.40% : 0.000003s : 5: substitution.elim_not_effective 2.08% : 0.000002s : 5: substitution.fold_const_symbol 4.91% : 0.000006s : 6: substitution.graph_param_transform 48.75% : 0.000056s : 1: substitution.inline 4.55% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.37% : 0.000004s : 6: substitution.load_eliminater 2.56% : 0.000003s : 2: substitution.reduce_all_const_elim 6.77% : 0.000008s : 10: substitution.remove_not_recompute_node 2.28% : 0.000003s : 2: substitution.replace_old_param 9.38% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.82% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002557 2 88.78% : 0.002270s : 1: type_inference.infer 11.22% : 0.000287s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000055 1 100.00% : 0.000055s : 1: match.inline ------[predicate.] 0.000231 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.07% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.86% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.26% : 0.000005s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.25% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.24% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.87% : 0.000004s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.23% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.72% : 0.000013s : 63: predicate.inline 1.07% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.69% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.41% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.87% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.79% : 0.000002s : 12: predicate.merge_addn 0.93% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.88% : 0.000002s : 13: predicate.minmaximum_grad 0.84% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.20% : 0.000003s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.00% : 0.000002s : 12: predicate.shard_identity_eliminate 1.51% : 0.000003s : 18: predicate.special_op_eliminate 0.90% : 0.000002s : 12: predicate.specialize_transform 1.03% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.36% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.83% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.48% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.55% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.50% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.42% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.60% : 0.000001s : 6: predicate.value_based_eliminate 0.86% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.93% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000157 4 6.05% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.95% : 0.000148s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090908 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.06% : 0.000056s : 1: add_recomputation 0.03% : 0.000028s : 1: assign_add_opt 0.16% : 0.000149s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.37% : 0.000336s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.03% : 0.000026s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.56% : 0.000511s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000009s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000495s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000003s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.20% : 0.001090s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.12% : 0.005567s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000244s : 1: opt_b 8.06% : 0.007326s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.02% : 0.000017s : 1: overlap_param_gather 0.07% : 0.000064s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.25% : 0.000231s : 1: renormalize.infer 0.22% : 0.000197s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000132s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 77.63% : 0.070576s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.87% : 0.002605s : 1: type_inference 0.07% : 0.000068s : 1: validate TotalTime = 0.0824397, [21] [bootstrap]: 0.00031033 [type_inference]: 0.002642 [auto_monad]: 0.00014255 [graph_reusing]: 2.71003e-06 [inline]: 1.51002e-06 [parallel-infer-symbol]: 2.55997e-06 [pre_auto_parallel]: 2.589e-05 [insert-virtual-dataset]: 3.28e-06 [parallel-infer-symbol-second]: 4.10015e-07 [dataset_repeat_opt]: 1.35007e-06 [pipeline_split]: 2.02993e-06 [optimize]: 0.00743118, [52] [py_interpret_to_execute]: 1.676e-05 [rewriter_before_opt_a]: 3.756e-05 [opt_a]: 0.0054917, [2] [Cycle 1]: 0.00163271, [43] [expand_dump_flag]: 3.40992e-06 [switch_simplify]: 2.973e-05 [loop_unroll]: 1.357e-05 [a_1]: 0.00035653 [recompute_prepare]: 9.18994e-06 [updatestate_depend_eliminate]: 8.80996e-06 [updatestate_assign_eliminate]: 6.20005e-06 [updatestate_loads_eliminate]: 7.76001e-06 [parameter_eliminate]: 3.53996e-06 [a_2]: 0.00011845 [accelerated_algorithm]: 8.80996e-06 [shard]: 2.16998e-06 [meta_shard_fg_expand]: 4.43996e-06 [shard_inline]: 8.58004e-06 [auto_parallel]: 1.244e-05 [parallel]: 7.26001e-06 [flash_sp]: 1.242e-05 [merge_comm]: 8.66002e-06 [allreduce_fusion]: 5.47001e-06 [matmul_add_comm_reduction]: 1.134e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 9.98005e-06 [virtual_dataset]: 8.50996e-06 [get_grad_eliminate_]: 7.86001e-06 [virtual_output]: 8.26991e-06 [merge_forward]: 6.29004e-06 [cell_reuse_recompute_pass]: 1.62004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.765e-05 [before_grad]: 1.43701e-05 [inplace_validation]: 5.86e-06 [meta_fg_expand]: 5.98002e-06 [inplace_validation_after_expand]: 7.12008e-06 [flash_sp_send_recv_attached]: 5.41995e-06 [receive_attached]: 2.52004e-06 [after_resolve]: 1.143e-05 [a_after_grad]: 1.272e-05 [special_op_eliminate]: 7.92998e-06 [renormalize]: 0.00049372 [add_forward_monad_depend]: 3.90003e-06 [auto_monad_grad]: 2.10991e-06 [auto_monad_eliminator]: 3.49099e-05 [cse]: 3.68099e-05 [a_3]: 5.842e-05 [Cycle 2]: 0.00079459, [43] [expand_dump_flag]: 1.05996e-06 [switch_simplify]: 9.09995e-06 [loop_unroll]: 7.63999e-06 [a_1]: 0.00020225 [recompute_prepare]: 7.31996e-06 [updatestate_depend_eliminate]: 6.30005e-06 [updatestate_assign_eliminate]: 4.81005e-06 [updatestate_loads_eliminate]: 5.33997e-06 [parameter_eliminate]: 1.50991e-06 [a_2]: 0.00010618 [accelerated_algorithm]: 8.37992e-06 [shard]: 1.07998e-06 [meta_shard_fg_expand]: 2.61993e-06 [shard_inline]: 7.60006e-06 [auto_parallel]: 1.09699e-05 [parallel]: 3.89991e-06 [flash_sp]: 3.42005e-06 [merge_comm]: 6.23998e-06 [allreduce_fusion]: 4.81005e-06 [matmul_add_comm_reduction]: 8.32998e-06 [allreduce_slice_to_reducescatter]: 2.5006e-07 [virtual_shard_identity]: 9.15001e-06 [virtual_dataset]: 7.76001e-06 [get_grad_eliminate_]: 7.25e-06 [virtual_output]: 7.19004e-06 [merge_forward]: 4.72006e-06 [cell_reuse_recompute_pass]: 2.04006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.498e-05 [before_grad]: 1.248e-05 [inplace_validation]: 4.58001e-06 [meta_fg_expand]: 4.84008e-06 [inplace_validation_after_expand]: 5.52996e-06 [flash_sp_send_recv_attached]: 9.39937e-07 [receive_attached]: 7.49948e-07 [after_resolve]: 1.045e-05 [a_after_grad]: 1.246e-05 [special_op_eliminate]: 8.02998e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 1.03004e-06 [auto_monad_grad]: 1.16008e-06 [auto_monad_eliminator]: 1.85099e-05 [cse]: 1.978e-05 [a_3]: 4.92301e-05 [py_interpret_to_execute_after_opt_a]: 1.03e-05 [slice_cell_reuse_recomputed_activation]: 2.31003e-06 [rewriter_after_opt_a]: 0.00015279 [convert_after_rewriter]: 8.64e-06 [order_py_execute_after_rewriter]: 6.88992e-06 [opt_b]: 0.00028043, [1] [Cycle 1]: 0.00027416, [7] [b_1]: 0.00016247 [b_2]: 1.00801e-05 [updatestate_depend_eliminate]: 5.56e-06 [updatestate_assign_eliminate]: 4.49002e-06 [updatestate_loads_eliminate]: 5.21995e-06 [renormalize]: 3.49944e-07 [cse]: 5.294e-05 [optimize_parallel_all_gather_comm]: 9.51998e-06 [overlap_param_gather]: 9.89996e-07 [cconv]: 2.473e-05 [loop_unroll]: 0.00050859 [opt_after_cconv]: 0.00013553, [1] [Cycle 1]: 0.00012948, [7] [c_1]: 5.29999e-05 [parameter_eliminate]: 2.60002e-06 [updatestate_depend_eliminate]: 8.42998e-06 [updatestate_assign_eliminate]: 4.72995e-06 [updatestate_loads_eliminate]: 5.73008e-06 [cse]: 2.243e-05 [renormalize]: 4.70085e-07 [remove_dup_value]: 1.54299e-05 [tuple_transform]: 7.032e-05, [1] [Cycle 1]: 6.587e-05, [2] [d_1]: 5.687e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 2.01992e-06 [add_cache_embedding]: 1.372e-05 [add_recomputation]: 6.429e-05 [cse_after_recomputation]: 2.823e-05, [1] [Cycle 1]: 2.33999e-05, [1] [cse]: 1.817e-05 [environ_conv]: 8.22998e-06 [swap_dp_allreduce_reducescatter]: 7.97003e-06 [bias_add_comm_swap]: 2.48e-06 [label_micro_interleaved_index]: 2.05997e-06 [label_fine_grained_interleaved_index]: 2.07999e-06 [merge_cast_opt]: 1.27999e-06 [slice_recompute_activation]: 1.89e-06 [micro_interleaved_order_control]: 2.30002e-06 [assign_add_opt]: 2.993e-05 [ForceFp32Comm]: 1.19e-06 [remove_cast_before_assign_add]: 7.45e-06 [full_micro_interleaved_order_control]: 2.08011e-06 [reorder_send_recv_between_fp_bp]: 2.58e-06 [comm_op_add_attrs]: 3.00599e-05 [add_comm_op_reuse_tag]: 2.19001e-06 [interleave_split_concat_branches]: 9.89996e-07 [interleave_parallel_branches]: 8.69972e-07 [overlap_opt_shard_in_pipeline]: 1.04005e-06 [overlap_opt_shard_grad_in_pipeline]: 2.16009e-06 [control_data_broadcast_order]: 1.55007e-06 [grouped_pairwise_exchange_alltoall]: 9.76003e-06 [offloading_packed_experts]: 2.65997e-06 [overlap_recompute_and_grad_model_parallel]: 1.82993e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.14995e-06 [overlap_recompute_allgather_and_fa_grad]: 6.49e-05 [overlap_grad_ring_attention]: 2.76999e-06 [overlap_grad_flash_sp]: 1.641e-05 [begin_end_overlap_inline]: 7.60076e-07 [split_matmul_comm_elemetwise]: 2.29001e-06 [split_layernorm_comm]: 1.82004e-06 [handle_group_info]: 5.43008e-06 [symbol_engine_optimizer]: 9.32299e-05, [1] [Cycle 1]: 8.82499e-05, [6] [build]: 5.19992e-06 [elim_shapecalc]: 1.4e-05 [elim_not_effective]: 1.725e-05 [opt_reshape]: 9.05001e-06 [fold_const_symbol]: 1.427e-05 [renormalize]: 4.30038e-07 [pipeline_parallel_scheduler]: 1.51002e-06 [auto_monad_reorder]: 3.301e-05 [get_jit_bprop_graph]: 4.7998e-07 [rewriter_after_jit_bprop_graph]: 6.80098e-07 [eliminate_special_op_node]: 0.00052279 [distribtued_split]: 4.32599e-05 [validate]: 3.789e-05 [task_emit]: 0.0709751 [execute]: 1.22701e-05 Sums bootstrap : 0.000310s : 0.40% type_inference : 0.002642s : 3.37% auto_monad : 0.000143s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000038s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000559s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000494s : 0.63% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000053s : 0.07% optimize.opt_a.cse : 0.000057s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000153s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000053s : 0.07% optimize.optimize_parallel_all_gather_comm : 0.000010s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000509s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000030s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000002s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000065s : 0.08% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000523s : 0.67% distribtued_split : 0.000043s : 0.06% validate : 0.000038s : 0.05% task_emit : 0.070975s : 90.59% execute : 0.000012s : 0.02% TotalTime = 0.0827107, [21] [bootstrap]: 0.00030024 [type_inference]: 0.00252311 [auto_monad]: 0.00013382 [graph_reusing]: 2.16998e-06 [inline]: 1.40001e-06 [parallel-infer-symbol]: 2.07999e-06 [pre_auto_parallel]: 2.51799e-05 [insert-virtual-dataset]: 2.56998e-06 [parallel-infer-symbol-second]: 4.49945e-07 [dataset_repeat_opt]: 1.42003e-06 [pipeline_split]: 1.89e-06 [optimize]: 0.00732345, [52] [py_interpret_to_execute]: 1.578e-05 [rewriter_before_opt_a]: 3.51501e-05 [opt_a]: 0.0054412, [2] [Cycle 1]: 0.00159632, [43] [expand_dump_flag]: 3.80003e-06 [switch_simplify]: 3.027e-05 [loop_unroll]: 1.31701e-05 [a_1]: 0.00034997 [recompute_prepare]: 8.87003e-06 [updatestate_depend_eliminate]: 8.97993e-06 [updatestate_assign_eliminate]: 5.72996e-06 [updatestate_loads_eliminate]: 7.91997e-06 [parameter_eliminate]: 3.51004e-06 [a_2]: 0.00011898 [accelerated_algorithm]: 8.59005e-06 [shard]: 2.15007e-06 [meta_shard_fg_expand]: 3.48e-06 [shard_inline]: 8.96002e-06 [auto_parallel]: 1.264e-05 [parallel]: 7.46001e-06 [flash_sp]: 1.042e-05 [merge_comm]: 8.21007e-06 [allreduce_fusion]: 5.41005e-06 [matmul_add_comm_reduction]: 1.11699e-05 [allreduce_slice_to_reducescatter]: 4.39934e-07 [virtual_shard_identity]: 9.64e-06 [virtual_dataset]: 8.62998e-06 [get_grad_eliminate_]: 7.88993e-06 [virtual_output]: 8.26002e-06 [merge_forward]: 6.33008e-06 [cell_reuse_recompute_pass]: 1.73005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.739e-05 [before_grad]: 1.419e-05 [inplace_validation]: 5.46e-06 [meta_fg_expand]: 5.44998e-06 [inplace_validation_after_expand]: 6.08992e-06 [flash_sp_send_recv_attached]: 4.60993e-06 [receive_attached]: 2.92994e-06 [after_resolve]: 1.164e-05 [a_after_grad]: 1.311e-05 [special_op_eliminate]: 8.36002e-06 [renormalize]: 0.00048161 [add_forward_monad_depend]: 3.50992e-06 [auto_monad_grad]: 1.67009e-06 [auto_monad_eliminator]: 3.108e-05 [cse]: 3.306e-05 [a_3]: 5.94e-05 [Cycle 2]: 0.00079711, [43] [expand_dump_flag]: 1.15007e-06 [switch_simplify]: 8.98994e-06 [loop_unroll]: 7.99005e-06 [a_1]: 0.0002053 [recompute_prepare]: 7.50995e-06 [updatestate_depend_eliminate]: 6.08002e-06 [updatestate_assign_eliminate]: 4.64998e-06 [updatestate_loads_eliminate]: 7.67002e-06 [parameter_eliminate]: 1.32003e-06 [a_2]: 0.0001063 [accelerated_algorithm]: 8.11007e-06 [shard]: 1.25996e-06 [meta_shard_fg_expand]: 2.64996e-06 [shard_inline]: 7.99994e-06 [auto_parallel]: 1.132e-05 [parallel]: 3.93006e-06 [flash_sp]: 3.34007e-06 [merge_comm]: 6.07991e-06 [allreduce_fusion]: 4.98001e-06 [matmul_add_comm_reduction]: 8.25e-06 [allreduce_slice_to_reducescatter]: 2.2992e-07 [virtual_shard_identity]: 8.82999e-06 [virtual_dataset]: 7.63999e-06 [get_grad_eliminate_]: 7.32997e-06 [virtual_output]: 7.41996e-06 [merge_forward]: 4.65999e-06 [cell_reuse_recompute_pass]: 2.01003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.559e-05 [before_grad]: 1.3e-05 [inplace_validation]: 4.47e-06 [meta_fg_expand]: 4.72006e-06 [inplace_validation_after_expand]: 5.34998e-06 [flash_sp_send_recv_attached]: 8.5996e-07 [receive_attached]: 8.89995e-07 [after_resolve]: 9.9201e-06 [a_after_grad]: 1.25399e-05 [special_op_eliminate]: 7.61996e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 7.19912e-07 [auto_monad_grad]: 1.12993e-06 [auto_monad_eliminator]: 1.856e-05 [cse]: 2.063e-05 [a_3]: 4.91299e-05 [py_interpret_to_execute_after_opt_a]: 9.55001e-06 [slice_cell_reuse_recomputed_activation]: 2.27999e-06 [rewriter_after_opt_a]: 0.0001478 [convert_after_rewriter]: 1.08699e-05 [order_py_execute_after_rewriter]: 6.62007e-06 [opt_b]: 0.00024676, [1] [Cycle 1]: 0.00024112, [7] [b_1]: 0.00016403 [b_2]: 1.01899e-05 [updatestate_depend_eliminate]: 5.48002e-06 [updatestate_assign_eliminate]: 4.50003e-06 [updatestate_loads_eliminate]: 5.28002e-06 [renormalize]: 3.29921e-07 [cse]: 1.92099e-05 [optimize_parallel_all_gather_comm]: 8.58994e-06 [overlap_param_gather]: 1.06997e-06 [cconv]: 2.38901e-05 [loop_unroll]: 0.00051017 [opt_after_cconv]: 0.00013757, [1] [Cycle 1]: 0.00013145, [7] [c_1]: 5.344e-05 [parameter_eliminate]: 2.35997e-06 [updatestate_depend_eliminate]: 8.23999e-06 [updatestate_assign_eliminate]: 4.89003e-06 [updatestate_loads_eliminate]: 5.73997e-06 [cse]: 2.295e-05 [renormalize]: 3.40049e-07 [remove_dup_value]: 1.39001e-05 [tuple_transform]: 6.97699e-05, [1] [Cycle 1]: 6.533e-05, [2] [d_1]: 5.58201e-05 [renormalize]: 2.19909e-07 [partial_unused_args_eliminate]: 2.11003e-06 [add_cache_embedding]: 1.40601e-05 [add_recomputation]: 6.161e-05 [cse_after_recomputation]: 2.84e-05, [1] [Cycle 1]: 2.378e-05, [1] [cse]: 1.852e-05 [environ_conv]: 6.99004e-06 [swap_dp_allreduce_reducescatter]: 7.67992e-06 [bias_add_comm_swap]: 2.60002e-06 [label_micro_interleaved_index]: 1.97999e-06 [label_fine_grained_interleaved_index]: 2.15997e-06 [merge_cast_opt]: 9.89996e-07 [slice_recompute_activation]: 1.67999e-06 [micro_interleaved_order_control]: 1.79e-06 [assign_add_opt]: 2.859e-05 [ForceFp32Comm]: 8.5996e-07 [remove_cast_before_assign_add]: 6.97002e-06 [full_micro_interleaved_order_control]: 2.07999e-06 [reorder_send_recv_between_fp_bp]: 2.13005e-06 [comm_op_add_attrs]: 2.77599e-05 [add_comm_op_reuse_tag]: 1.96998e-06 [interleave_split_concat_branches]: 6.10016e-07 [interleave_parallel_branches]: 4.89992e-07 [overlap_opt_shard_in_pipeline]: 1.11992e-06 [overlap_opt_shard_grad_in_pipeline]: 2.06009e-06 [control_data_broadcast_order]: 9.89996e-07 [grouped_pairwise_exchange_alltoall]: 9.22999e-06 [offloading_packed_experts]: 1.79e-06 [overlap_recompute_and_grad_model_parallel]: 1.64995e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.2003e-07 [overlap_recompute_allgather_and_fa_grad]: 7.169e-05 [overlap_grad_ring_attention]: 1.72993e-06 [overlap_grad_flash_sp]: 1.405e-05 [begin_end_overlap_inline]: 7.79983e-07 [split_matmul_comm_elemetwise]: 1.90001e-06 [split_layernorm_comm]: 1.76998e-06 [handle_group_info]: 4.54998e-06 [symbol_engine_optimizer]: 9.14601e-05, [1] [Cycle 1]: 8.665e-05, [6] [build]: 5.25999e-06 [elim_shapecalc]: 1.299e-05 [elim_not_effective]: 1.711e-05 [opt_reshape]: 9.09995e-06 [fold_const_symbol]: 1.441e-05 [renormalize]: 3.30037e-07 [pipeline_parallel_scheduler]: 1.43005e-06 [auto_monad_reorder]: 3.011e-05 [get_jit_bprop_graph]: 4.89992e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00051539 [distribtued_split]: 3.988e-05 [validate]: 3.605e-05 [task_emit]: 0.0715157 [execute]: 1.174e-05 Sums bootstrap : 0.000300s : 0.38% type_inference : 0.002523s : 3.21% auto_monad : 0.000134s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000555s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000016s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000482s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000148s : 0.19% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000510s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000019s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000515s : 0.66% distribtued_split : 0.000040s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.071516s : 90.93% execute : 0.000012s : 0.01% TotalTime = 0.0826647, [21] [bootstrap]: 0.00031937 [type_inference]: 0.00249443 [auto_monad]: 0.00010651 [graph_reusing]: 1.66008e-06 [inline]: 1.35996e-06 [parallel-infer-symbol]: 1.71002e-06 [pre_auto_parallel]: 2.094e-05 [insert-virtual-dataset]: 2.05997e-06 [parallel-infer-symbol-second]: 4.70085e-07 [dataset_repeat_opt]: 1.05007e-06 [pipeline_split]: 1.09e-06 [optimize]: 0.0073257, [52] [py_interpret_to_execute]: 1.32701e-05 [rewriter_before_opt_a]: 3.024e-05 [opt_a]: 0.00551999, [2] [Cycle 1]: 0.00146277, [43] [expand_dump_flag]: 2.29001e-06 [switch_simplify]: 2.781e-05 [loop_unroll]: 1.357e-05 [a_1]: 0.00032773 [recompute_prepare]: 8.60007e-06 [updatestate_depend_eliminate]: 7.00995e-06 [updatestate_assign_eliminate]: 5.10993e-06 [updatestate_loads_eliminate]: 5.87001e-06 [parameter_eliminate]: 2.12993e-06 [a_2]: 0.00011528 [accelerated_algorithm]: 9.01998e-06 [shard]: 1.62993e-06 [meta_shard_fg_expand]: 3.12005e-06 [shard_inline]: 8.46002e-06 [auto_parallel]: 1.081e-05 [parallel]: 5.02996e-06 [flash_sp]: 7.90996e-06 [merge_comm]: 7.32997e-06 [allreduce_fusion]: 5.54998e-06 [matmul_add_comm_reduction]: 8.64e-06 [allreduce_slice_to_reducescatter]: 3.29921e-07 [virtual_shard_identity]: 1.013e-05 [virtual_dataset]: 8.37992e-06 [get_grad_eliminate_]: 8.24011e-06 [virtual_output]: 8.11997e-06 [merge_forward]: 5.34998e-06 [cell_reuse_recompute_pass]: 1.62004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.61499e-05 [before_grad]: 1.382e-05 [inplace_validation]: 4.57e-06 [meta_fg_expand]: 5.37001e-06 [inplace_validation_after_expand]: 5.83008e-06 [flash_sp_send_recv_attached]: 3.00002e-06 [receive_attached]: 1.55007e-06 [after_resolve]: 1.146e-05 [a_after_grad]: 1.313e-05 [special_op_eliminate]: 8.28994e-06 [renormalize]: 0.00042281 [add_forward_monad_depend]: 2.78e-06 [auto_monad_grad]: 1.32993e-06 [auto_monad_eliminator]: 2.377e-05 [cse]: 2.665e-05 [a_3]: 5.79e-05 [Cycle 2]: 0.00078088, [43] [expand_dump_flag]: 9.10019e-07 [switch_simplify]: 9.04e-06 [loop_unroll]: 8.37003e-06 [a_1]: 0.00020551 [recompute_prepare]: 7.33999e-06 [updatestate_depend_eliminate]: 5.69003e-06 [updatestate_assign_eliminate]: 5.00993e-06 [updatestate_loads_eliminate]: 5.12996e-06 [parameter_eliminate]: 1.07998e-06 [a_2]: 0.00010592 [accelerated_algorithm]: 8.59005e-06 [shard]: 9.30042e-07 [meta_shard_fg_expand]: 2.78e-06 [shard_inline]: 7.51007e-06 [auto_parallel]: 1.035e-05 [parallel]: 2.95998e-06 [flash_sp]: 2.64996e-06 [merge_comm]: 5.70004e-06 [allreduce_fusion]: 4.99003e-06 [matmul_add_comm_reduction]: 7.37002e-06 [allreduce_slice_to_reducescatter]: 2.69967e-07 [virtual_shard_identity]: 8.54e-06 [virtual_dataset]: 7.22997e-06 [get_grad_eliminate_]: 7.21996e-06 [virtual_output]: 7.22008e-06 [merge_forward]: 4.42006e-06 [cell_reuse_recompute_pass]: 1.73005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.52499e-05 [before_grad]: 1.26399e-05 [inplace_validation]: 4.31994e-06 [meta_fg_expand]: 5.00004e-06 [inplace_validation_after_expand]: 5.31005e-06 [flash_sp_send_recv_attached]: 7.89994e-07 [receive_attached]: 7.49948e-07 [after_resolve]: 1.02399e-05 [a_after_grad]: 1.171e-05 [special_op_eliminate]: 7.39004e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 8.10018e-07 [auto_monad_grad]: 8.69972e-07 [auto_monad_eliminator]: 1.56e-05 [cse]: 1.857e-05 [a_3]: 4.968e-05 [py_interpret_to_execute_after_opt_a]: 8.76002e-06 [slice_cell_reuse_recomputed_activation]: 1.63005e-06 [rewriter_after_opt_a]: 0.00013053 [convert_after_rewriter]: 8.15e-06 [order_py_execute_after_rewriter]: 5.84999e-06 [opt_b]: 0.00024627, [1] [Cycle 1]: 0.00024125, [7] [b_1]: 0.00016687 [b_2]: 1.007e-05 [updatestate_depend_eliminate]: 5.28002e-06 [updatestate_assign_eliminate]: 4.25009e-06 [updatestate_loads_eliminate]: 5.1501e-06 [renormalize]: 2.5006e-07 [cse]: 1.77e-05 [optimize_parallel_all_gather_comm]: 7.62008e-06 [overlap_param_gather]: 1.32003e-06 [cconv]: 1.593e-05 [loop_unroll]: 0.00051231 [opt_after_cconv]: 0.00013327, [1] [Cycle 1]: 0.00012743, [7] [c_1]: 5.329e-05 [parameter_eliminate]: 1.86998e-06 [updatestate_depend_eliminate]: 7.45e-06 [updatestate_assign_eliminate]: 4.69002e-06 [updatestate_loads_eliminate]: 5.59993e-06 [cse]: 2e-05 [renormalize]: 3.90108e-07 [remove_dup_value]: 1.09399e-05 [tuple_transform]: 7.044e-05, [1] [Cycle 1]: 6.607e-05, [2] [d_1]: 5.591e-05 [renormalize]: 1.80095e-07 [partial_unused_args_eliminate]: 1.54006e-06 [add_cache_embedding]: 1.116e-05 [add_recomputation]: 5.682e-05 [cse_after_recomputation]: 2.73801e-05, [1] [Cycle 1]: 2.261e-05, [1] [cse]: 1.71401e-05 [environ_conv]: 6.71006e-06 [swap_dp_allreduce_reducescatter]: 7.29004e-06 [bias_add_comm_swap]: 1.82993e-06 [label_micro_interleaved_index]: 1.93005e-06 [label_fine_grained_interleaved_index]: 1.01002e-06 [merge_cast_opt]: 6.49947e-07 [slice_recompute_activation]: 1.13994e-06 [micro_interleaved_order_control]: 1.1801e-06 [assign_add_opt]: 2.482e-05 [ForceFp32Comm]: 6.10016e-07 [remove_cast_before_assign_add]: 6.46e-06 [full_micro_interleaved_order_control]: 1.43005e-06 [reorder_send_recv_between_fp_bp]: 1.29e-06 [comm_op_add_attrs]: 2.474e-05 [add_comm_op_reuse_tag]: 1.33005e-06 [interleave_split_concat_branches]: 7.89994e-07 [interleave_parallel_branches]: 4.7998e-07 [overlap_opt_shard_in_pipeline]: 1.10001e-06 [overlap_opt_shard_grad_in_pipeline]: 1.26997e-06 [control_data_broadcast_order]: 6.29923e-07 [grouped_pairwise_exchange_alltoall]: 6.34999e-06 [offloading_packed_experts]: 1.34995e-06 [overlap_recompute_and_grad_model_parallel]: 1.05996e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.10016e-07 [overlap_recompute_allgather_and_fa_grad]: 6.419e-05 [overlap_grad_ring_attention]: 1.17999e-06 [overlap_grad_flash_sp]: 1.311e-05 [begin_end_overlap_inline]: 4.7998e-07 [split_matmul_comm_elemetwise]: 1.1801e-06 [split_layernorm_comm]: 1.22003e-06 [handle_group_info]: 3.32994e-06 [symbol_engine_optimizer]: 9.149e-05, [1] [Cycle 1]: 8.686e-05, [6] [build]: 4.63007e-06 [elim_shapecalc]: 1.341e-05 [elim_not_effective]: 1.70099e-05 [opt_reshape]: 9.39006e-06 [fold_const_symbol]: 1.45499e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 8.10018e-07 [auto_monad_reorder]: 2.31999e-05 [get_jit_bprop_graph]: 5.19911e-07 [rewriter_after_jit_bprop_graph]: 3.39933e-07 [eliminate_special_op_node]: 0.00051311 [distribtued_split]: 3.42099e-05 [validate]: 2.97499e-05 [task_emit]: 0.0715479 [execute]: 8.35001e-06 Sums bootstrap : 0.000319s : 0.41% type_inference : 0.002494s : 3.18% auto_monad : 0.000107s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000037s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000533s : 0.68% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000221s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000423s : 0.54% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000039s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000131s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000167s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000512s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000057s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000025s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000025s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000064s : 0.08% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000513s : 0.65% distribtued_split : 0.000034s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.071548s : 91.27% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000137 63 5.15% : 0.000007s : 2: substitution.depend_value_elim 2.03% : 0.000003s : 5: substitution.elim_not_effective 2.07% : 0.000003s : 5: substitution.fold_const_symbol 5.45% : 0.000007s : 6: substitution.graph_param_transform 50.56% : 0.000069s : 1: substitution.inline 4.23% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.09% : 0.000004s : 6: substitution.load_eliminater 2.39% : 0.000003s : 2: substitution.reduce_all_const_elim 5.61% : 0.000008s : 10: substitution.remove_not_recompute_node 2.65% : 0.000004s : 2: substitution.replace_old_param 8.52% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.25% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002611 2 88.52% : 0.002311s : 1: type_inference.infer 11.48% : 0.000300s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000235 1420 0.84% : 0.000002s : 13: predicate.accumulaten_eliminater 1.37% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.85% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.23% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.20% : 0.000000s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.49% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.80% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.64% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.09% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.99% : 0.000005s : 31: predicate.environ_get_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.22% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.65% : 0.000013s : 63: predicate.inline 0.99% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000003s : 12: predicate.less_batch_normalization 1.75% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000006s : 38: predicate.load_eliminater 1.37% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.70% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.78% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.21% : 0.000003s : 14: predicate.partial_defer_inline 1.18% : 0.000003s : 19: predicate.partial_eliminate 0.75% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.13% : 0.000003s : 13: predicate.reduce_eliminate 0.64% : 0.000002s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.83% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.45% : 0.000001s : 6: predicate.row_tensor_eliminate 1.01% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.57% : 0.000004s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.40% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.57% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.40% : 0.000010s : 43: predicate.switch_simplify 0.74% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.80% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.47% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.33% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.47% : 0.000001s : 6: predicate.value_based_eliminate 0.92% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.96% : 0.000002s : 12: predicate.virtual_output_eliminate 0.65% : 0.000002s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000165 4 10.47% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.53% : 0.000148s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091766 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000069s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.17% : 0.000156s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000336s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.04% : 0.000034s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000051s : 1: distribtued_split 0.59% : 0.000537s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000009s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000519s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001122s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 5.99% : 0.005496s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.31% : 0.000284s : 1: opt_b 8.11% : 0.007440s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000007s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000071s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000022s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000020s : 1: remove_dup_value 0.28% : 0.000254s : 1: renormalize.infer 0.25% : 0.000233s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000159s : 1: rewriter_after_opt_a 0.05% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000096s : 1: symbol_engine_optimizer 77.38% : 0.071005s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.90% : 0.002661s : 1: type_inference 0.08% : 0.000072s : 1: validate Time group info: ------[substitution.] 0.000131 63 4.98% : 0.000007s : 2: substitution.depend_value_elim 1.95% : 0.000003s : 5: substitution.elim_not_effective 1.97% : 0.000003s : 5: substitution.fold_const_symbol 4.94% : 0.000006s : 6: substitution.graph_param_transform 51.11% : 0.000067s : 1: substitution.inline 4.14% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.05% : 0.000004s : 6: substitution.load_eliminater 2.42% : 0.000003s : 2: substitution.reduce_all_const_elim 6.24% : 0.000008s : 10: substitution.remove_not_recompute_node 2.69% : 0.000004s : 2: substitution.replace_old_param 8.61% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.90% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002492 2 88.68% : 0.002210s : 1: type_inference.infer 11.32% : 0.000282s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000235 1420 0.84% : 0.000002s : 13: predicate.accumulaten_eliminater 1.28% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.92% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.22% : 0.000005s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.51% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.84% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.19% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000005s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.19% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.56% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.81% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.57% : 0.000006s : 38: predicate.load_eliminater 1.28% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.86% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.83% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.84% : 0.000002s : 6: predicate.mutable_eliminate 0.52% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.22% : 0.000003s : 14: predicate.partial_defer_inline 1.18% : 0.000003s : 19: predicate.partial_eliminate 0.73% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000003s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.12% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.43% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.12% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.93% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.33% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.75% : 0.000002s : 13: predicate.transpose_eliminate 1.87% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.71% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.50% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.57% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.35% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 6: predicate.value_based_eliminate 0.87% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.92% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000161 4 10.27% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.73% : 0.000144s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091918 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.35% : 0.000324s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.03% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.58% : 0.000530s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000520s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001119s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 5.92% : 0.005445s : 1: opt_a 0.15% : 0.000141s : 1: opt_after_cconv 0.27% : 0.000250s : 1: opt_b 7.98% : 0.007332s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.08% : 0.000077s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000248s : 1: renormalize.infer 0.25% : 0.000227s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000154s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000095s : 1: symbol_engine_optimizer 77.84% : 0.071545s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.76% : 0.002541s : 1: type_inference 0.08% : 0.000071s : 1: validate Time group info: ------[substitution.] 0.000111 63 4.23% : 0.000005s : 2: substitution.depend_value_elim 2.12% : 0.000002s : 5: substitution.elim_not_effective 1.93% : 0.000002s : 5: substitution.fold_const_symbol 5.53% : 0.000006s : 6: substitution.graph_param_transform 48.88% : 0.000054s : 1: substitution.inline 4.54% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.41% : 0.000004s : 6: substitution.load_eliminater 2.32% : 0.000003s : 2: substitution.reduce_all_const_elim 6.63% : 0.000007s : 10: substitution.remove_not_recompute_node 2.69% : 0.000003s : 2: substitution.replace_old_param 9.37% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.35% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002469 2 90.18% : 0.002227s : 1: type_inference.infer 9.82% : 0.000243s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000234 1420 0.97% : 0.000002s : 13: predicate.accumulaten_eliminater 1.20% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.77% : 0.000002s : 12: predicate.addn_check_dump 1.01% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.35% : 0.000006s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.79% : 0.000002s : 12: predicate.compare_switch_simplify 0.25% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.09% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.64% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.26% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 2.06% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.86% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.20% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.55% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.18% : 0.000003s : 12: predicate.less_batch_normalization 1.66% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.30% : 0.000005s : 38: predicate.load_eliminater 1.29% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.29% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.75% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.70% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.55% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.15% : 0.000003s : 13: predicate.reduce_eliminate 0.60% : 0.000001s : 12: predicate.remove_not_recompute_node 1.08% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.90% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000002s : 12: predicate.same_eliminate 0.45% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.97% : 0.000002s : 12: predicate.shard_identity_eliminate 1.40% : 0.000003s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 1.02% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.07% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.93% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.22% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.75% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.44% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.83% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.49% : 0.000001s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.57% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000149 4 6.98% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.02% : 0.000139s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091783 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000062s : 1: add_recomputation 0.03% : 0.000029s : 1: assign_add_opt 0.13% : 0.000118s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000342s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.03% : 0.000029s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.57% : 0.000526s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000522s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.19% : 0.001090s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.02% : 0.005524s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000250s : 1: opt_b 7.99% : 0.007333s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.08% : 0.000070s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.24% : 0.000224s : 1: renormalize.infer 0.21% : 0.000194s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000136s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000095s : 1: symbol_engine_optimizer 77.98% : 0.071573s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.74% : 0.002511s : 1: type_inference 0.07% : 0.000060s : 1: validate TotalTime = 0.0841137, [21] [bootstrap]: 0.00034345 [type_inference]: 0.00278008 [auto_monad]: 0.00013549 [graph_reusing]: 2.39001e-06 [inline]: 1.9999e-06 [parallel-infer-symbol]: 2.53995e-06 [pre_auto_parallel]: 2.513e-05 [insert-virtual-dataset]: 2.41003e-06 [parallel-infer-symbol-second]: 5.89993e-07 [dataset_repeat_opt]: 1.31992e-06 [pipeline_split]: 1.64006e-06 [optimize]: 0.00766179, [52] [py_interpret_to_execute]: 1.65401e-05 [rewriter_before_opt_a]: 3.63e-05 [opt_a]: 0.00575468, [2] [Cycle 1]: 0.00158522, [43] [expand_dump_flag]: 3.81004e-06 [switch_simplify]: 3.11299e-05 [loop_unroll]: 1.308e-05 [a_1]: 0.00034902 [recompute_prepare]: 8.82999e-06 [updatestate_depend_eliminate]: 9.22999e-06 [updatestate_assign_eliminate]: 6.58003e-06 [updatestate_loads_eliminate]: 7.81007e-06 [parameter_eliminate]: 3.13995e-06 [a_2]: 0.00011793 [accelerated_algorithm]: 8.65001e-06 [shard]: 2.41993e-06 [meta_shard_fg_expand]: 3.88001e-06 [shard_inline]: 8.26002e-06 [auto_parallel]: 1.19901e-05 [parallel]: 7.77002e-06 [flash_sp]: 1.149e-05 [merge_comm]: 8.20006e-06 [allreduce_fusion]: 5.91995e-06 [matmul_add_comm_reduction]: 1.113e-05 [allreduce_slice_to_reducescatter]: 4.30038e-07 [virtual_shard_identity]: 9.30997e-06 [virtual_dataset]: 8.16002e-06 [get_grad_eliminate_]: 7.71997e-06 [virtual_output]: 7.99005e-06 [merge_forward]: 7.1699e-06 [cell_reuse_recompute_pass]: 2.02993e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.731e-05 [before_grad]: 1.422e-05 [inplace_validation]: 5.03007e-06 [meta_fg_expand]: 5.53997e-06 [inplace_validation_after_expand]: 6.32007e-06 [flash_sp_send_recv_attached]: 4.50993e-06 [receive_attached]: 2.65997e-06 [after_resolve]: 1.133e-05 [a_after_grad]: 1.291e-05 [special_op_eliminate]: 7.93999e-06 [renormalize]: 0.00046122 [add_forward_monad_depend]: 4.12005e-06 [auto_monad_grad]: 2.11003e-06 [auto_monad_eliminator]: 3.205e-05 [cse]: 3.57301e-05 [a_3]: 5.88e-05 [Cycle 2]: 0.00080835, [43] [expand_dump_flag]: 1.10001e-06 [switch_simplify]: 9.15991e-06 [loop_unroll]: 8.00006e-06 [a_1]: 0.00020717 [recompute_prepare]: 7.79994e-06 [updatestate_depend_eliminate]: 5.93998e-06 [updatestate_assign_eliminate]: 4.55999e-06 [updatestate_loads_eliminate]: 5.57001e-06 [parameter_eliminate]: 1.49e-06 [a_2]: 0.00010761 [accelerated_algorithm]: 8.42009e-06 [shard]: 1.23004e-06 [meta_shard_fg_expand]: 2.62004e-06 [shard_inline]: 8.20996e-06 [auto_parallel]: 1.10599e-05 [parallel]: 3.76999e-06 [flash_sp]: 3.74008e-06 [merge_comm]: 6.00994e-06 [allreduce_fusion]: 4.93007e-06 [matmul_add_comm_reduction]: 7.9599e-06 [allreduce_slice_to_reducescatter]: 2.39932e-07 [virtual_shard_identity]: 9.00996e-06 [virtual_dataset]: 7.83009e-06 [get_grad_eliminate_]: 7.29004e-06 [virtual_output]: 7.29994e-06 [merge_forward]: 4.43996e-06 [cell_reuse_recompute_pass]: 2.11003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.53501e-05 [before_grad]: 1.26001e-05 [inplace_validation]: 4.73997e-06 [meta_fg_expand]: 4.8501e-06 [inplace_validation_after_expand]: 5.41005e-06 [flash_sp_send_recv_attached]: 9.2003e-07 [receive_attached]: 8.19913e-07 [after_resolve]: 1.00901e-05 [a_after_grad]: 1.20801e-05 [special_op_eliminate]: 7.51996e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 1.03994e-06 [auto_monad_grad]: 1.15996e-06 [auto_monad_eliminator]: 1.806e-05 [cse]: 2.049e-05 [a_3]: 5.783e-05 [py_interpret_to_execute_after_opt_a]: 9.86992e-06 [slice_cell_reuse_recomputed_activation]: 2.65997e-06 [rewriter_after_opt_a]: 0.0001575 [convert_after_rewriter]: 9.39996e-06 [order_py_execute_after_rewriter]: 5.74999e-06 [opt_b]: 0.00024355, [1] [Cycle 1]: 0.00023769, [7] [b_1]: 0.00016242 [b_2]: 9.66992e-06 [updatestate_depend_eliminate]: 5.39003e-06 [updatestate_assign_eliminate]: 4.24997e-06 [updatestate_loads_eliminate]: 5.23007e-06 [renormalize]: 3.29921e-07 [cse]: 1.91299e-05 [optimize_parallel_all_gather_comm]: 8.4599e-06 [overlap_param_gather]: 1.47999e-06 [cconv]: 2.395e-05 [loop_unroll]: 0.00051243 [opt_after_cconv]: 0.00013936, [1] [Cycle 1]: 0.00013257, [7] [c_1]: 5.38899e-05 [parameter_eliminate]: 2.53005e-06 [updatestate_depend_eliminate]: 8.26002e-06 [updatestate_assign_eliminate]: 4.86011e-06 [updatestate_loads_eliminate]: 5.77001e-06 [cse]: 2.311e-05 [renormalize]: 3.89991e-07 [remove_dup_value]: 1.427e-05 [tuple_transform]: 7.086e-05, [1] [Cycle 1]: 6.62101e-05, [2] [d_1]: 5.686e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 2.05997e-06 [add_cache_embedding]: 1.28801e-05 [add_recomputation]: 6.25299e-05 [cse_after_recomputation]: 2.859e-05, [1] [Cycle 1]: 2.37699e-05, [1] [cse]: 1.838e-05 [environ_conv]: 8.06001e-06 [swap_dp_allreduce_reducescatter]: 8.06001e-06 [bias_add_comm_swap]: 2.52994e-06 [label_micro_interleaved_index]: 1.99e-06 [label_fine_grained_interleaved_index]: 2.11003e-06 [merge_cast_opt]: 1.12003e-06 [slice_recompute_activation]: 2.07999e-06 [micro_interleaved_order_control]: 1.50001e-06 [assign_add_opt]: 2.942e-05 [ForceFp32Comm]: 7.89994e-07 [remove_cast_before_assign_add]: 7.23999e-06 [full_micro_interleaved_order_control]: 2.27999e-06 [reorder_send_recv_between_fp_bp]: 2.16009e-06 [comm_op_add_attrs]: 2.833e-05 [add_comm_op_reuse_tag]: 2.37999e-06 [interleave_split_concat_branches]: 9.00007e-07 [interleave_parallel_branches]: 8.49948e-07 [overlap_opt_shard_in_pipeline]: 1.30001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.37999e-06 [control_data_broadcast_order]: 1.34995e-06 [grouped_pairwise_exchange_alltoall]: 9.56992e-06 [offloading_packed_experts]: 2.32004e-06 [overlap_recompute_and_grad_model_parallel]: 2.22004e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.39936e-07 [overlap_recompute_allgather_and_fa_grad]: 7.177e-05 [overlap_grad_ring_attention]: 2.35997e-06 [overlap_grad_flash_sp]: 1.58601e-05 [begin_end_overlap_inline]: 7.79983e-07 [split_matmul_comm_elemetwise]: 2.06009e-06 [split_layernorm_comm]: 1.69e-06 [handle_group_info]: 4.81994e-06 [symbol_engine_optimizer]: 8.964e-05, [1] [Cycle 1]: 8.46999e-05, [6] [build]: 5.00004e-06 [elim_shapecalc]: 1.278e-05 [elim_not_effective]: 1.64399e-05 [opt_reshape]: 8.72998e-06 [fold_const_symbol]: 1.38901e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.53005e-06 [auto_monad_reorder]: 3.25299e-05 [get_jit_bprop_graph]: 5.00004e-07 [rewriter_after_jit_bprop_graph]: 4.29922e-07 [eliminate_special_op_node]: 0.00052926 [distribtued_split]: 4.285e-05 [validate]: 3.47899e-05 [task_emit]: 0.0722546 [execute]: 1.11599e-05 Sums bootstrap : 0.000343s : 0.43% type_inference : 0.002780s : 3.49% auto_monad : 0.000135s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000556s : 0.70% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000226s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000461s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000056s : 0.07% optimize.opt_a.a_3 : 0.000117s : 0.15% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000157s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.20% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000512s : 0.64% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000529s : 0.66% distribtued_split : 0.000043s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.072255s : 90.63% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000138 63 5.08% : 0.000007s : 2: substitution.depend_value_elim 2.03% : 0.000003s : 5: substitution.elim_not_effective 1.74% : 0.000002s : 5: substitution.fold_const_symbol 5.81% : 0.000008s : 6: substitution.graph_param_transform 50.60% : 0.000070s : 1: substitution.inline 3.99% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.41% : 0.000005s : 6: substitution.load_eliminater 2.88% : 0.000004s : 2: substitution.reduce_all_const_elim 5.74% : 0.000008s : 10: substitution.remove_not_recompute_node 2.71% : 0.000004s : 2: substitution.replace_old_param 8.61% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.41% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002750 2 89.49% : 0.002461s : 1: type_inference.infer 10.51% : 0.000289s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000069 1 100.00% : 0.000069s : 1: match.inline ------[predicate.] 0.000233 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.33% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.87% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.38% : 0.000006s : 25: predicate.arithmetic_simplify 0.91% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.52% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.53% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.25% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_depend_swap 2.00% : 0.000005s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.69% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.88% : 0.000002s : 12: predicate.get_grad_eliminate 0.37% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.58% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.16% : 0.000003s : 12: predicate.less_batch_normalization 1.77% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.32% : 0.000005s : 38: predicate.load_eliminater 1.43% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.75% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.87% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.07% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000003s : 13: predicate.reduce_eliminate 0.59% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.78% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.10% : 0.000003s : 12: predicate.same_eliminate 0.43% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.47% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 0.98% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.41% : 0.000006s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.39% : 0.000010s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.88% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.77% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.51% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.43% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.24% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.28% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.87% : 0.000002s : 12: predicate.virtual_output_eliminate 0.63% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000164 4 10.04% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.96% : 0.000147s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.093643 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.16% : 0.000148s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000367s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.03% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000051s : 1: distribtued_split 0.58% : 0.000543s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000522s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.20% : 0.001127s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.16% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.15% : 0.005759s : 1: opt_a 0.15% : 0.000144s : 1: opt_after_cconv 0.26% : 0.000247s : 1: opt_b 8.19% : 0.007670s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000007s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.08% : 0.000078s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.02% : 0.000015s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.27% : 0.000250s : 1: renormalize.infer 0.22% : 0.000205s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000164s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 77.19% : 0.072285s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.99% : 0.002799s : 1: type_inference 0.08% : 0.000071s : 1: validate [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:29.796.390 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:29.796.857 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:29.796.885 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:29.796.989 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:29.797.103 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:29.797.263 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:29.797.445 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:29.797.729 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.0797754, [21] [bootstrap]: 0.00029144 [type_inference]: 0.00227921 [auto_monad]: 0.00010509 [graph_reusing]: 1.99e-06 [inline]: 1.25007e-06 [parallel-infer-symbol]: 1.25996e-06 [pre_auto_parallel]: 2.167e-05 [insert-virtual-dataset]: 2.52004e-06 [parallel-infer-symbol-second]: 3.80096e-07 [dataset_repeat_opt]: 9.39937e-07 [pipeline_split]: 1.11992e-06 [optimize]: 0.00701283, [52] [py_interpret_to_execute]: 1.352e-05 [rewriter_before_opt_a]: 3.169e-05 [opt_a]: 0.00525079, [2] [Cycle 1]: 0.00152823, [43] [expand_dump_flag]: 2.76999e-06 [switch_simplify]: 2.58699e-05 [loop_unroll]: 1.31801e-05 [a_1]: 0.00038159 [recompute_prepare]: 8.61997e-06 [updatestate_depend_eliminate]: 8.10006e-06 [updatestate_assign_eliminate]: 5.63997e-06 [updatestate_loads_eliminate]: 6.79004e-06 [parameter_eliminate]: 2.64007e-06 [a_2]: 0.00011525 [accelerated_algorithm]: 8.71008e-06 [shard]: 1.66008e-06 [meta_shard_fg_expand]: 3.46999e-06 [shard_inline]: 8.54e-06 [auto_parallel]: 1.211e-05 [parallel]: 6.06e-06 [flash_sp]: 8.65001e-06 [merge_comm]: 7.47002e-06 [allreduce_fusion]: 5.12996e-06 [matmul_add_comm_reduction]: 9.70997e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 1.034e-05 [virtual_dataset]: 8.27992e-06 [get_grad_eliminate_]: 8.05e-06 [virtual_output]: 7.68993e-06 [merge_forward]: 5.74999e-06 [cell_reuse_recompute_pass]: 1.51992e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.702e-05 [before_grad]: 1.372e-05 [inplace_validation]: 5.03997e-06 [meta_fg_expand]: 5.38002e-06 [inplace_validation_after_expand]: 5.84999e-06 [flash_sp_send_recv_attached]: 3.76999e-06 [receive_attached]: 1.56998e-06 [after_resolve]: 1.154e-05 [a_after_grad]: 1.219e-05 [special_op_eliminate]: 7.93999e-06 [renormalize]: 0.00041907 [add_forward_monad_depend]: 2.74007e-06 [auto_monad_grad]: 1.44006e-06 [auto_monad_eliminator]: 2.484e-05 [cse]: 2.62e-05 [a_3]: 5.90601e-05 [Cycle 2]: 0.00078747, [43] [expand_dump_flag]: 1.03004e-06 [switch_simplify]: 8.7599e-06 [loop_unroll]: 7.78993e-06 [a_1]: 0.00020231 [recompute_prepare]: 7.42998e-06 [updatestate_depend_eliminate]: 6.03998e-06 [updatestate_assign_eliminate]: 4.71005e-06 [updatestate_loads_eliminate]: 5.62996e-06 [parameter_eliminate]: 1.17999e-06 [a_2]: 0.00010403 [accelerated_algorithm]: 8.18993e-06 [shard]: 1.05996e-06 [meta_shard_fg_expand]: 2.68e-06 [shard_inline]: 8.18004e-06 [auto_parallel]: 1.03901e-05 [parallel]: 3.31004e-06 [flash_sp]: 2.95008e-06 [merge_comm]: 5.96e-06 [allreduce_fusion]: 5.15999e-06 [matmul_add_comm_reduction]: 7.32997e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 9.97004e-06 [virtual_dataset]: 7.93999e-06 [get_grad_eliminate_]: 7.45e-06 [virtual_output]: 7.76001e-06 [merge_forward]: 4.49002e-06 [cell_reuse_recompute_pass]: 1.90001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65401e-05 [before_grad]: 1.311e-05 [inplace_validation]: 4.53996e-06 [meta_fg_expand]: 5.20004e-06 [inplace_validation_after_expand]: 5.3799e-06 [flash_sp_send_recv_attached]: 7.3004e-07 [receive_attached]: 6.50063e-07 [after_resolve]: 9.92999e-06 [a_after_grad]: 1.19001e-05 [special_op_eliminate]: 7.67992e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.80101e-07 [auto_monad_grad]: 1.00001e-06 [auto_monad_eliminator]: 1.62401e-05 [cse]: 1.841e-05 [a_3]: 4.949e-05 [py_interpret_to_execute_after_opt_a]: 7.98004e-06 [slice_cell_reuse_recomputed_activation]: 2.07999e-06 [rewriter_after_opt_a]: 0.00013408 [convert_after_rewriter]: 8.01007e-06 [order_py_execute_after_rewriter]: 6.08002e-06 [opt_b]: 0.00024099, [1] [Cycle 1]: 0.00023571, [7] [b_1]: 0.00016175 [b_2]: 9.78005e-06 [updatestate_depend_eliminate]: 5.52996e-06 [updatestate_assign_eliminate]: 4.40003e-06 [updatestate_loads_eliminate]: 4.97e-06 [renormalize]: 3.00002e-07 [cse]: 1.706e-05 [optimize_parallel_all_gather_comm]: 8.33999e-06 [overlap_param_gather]: 6.79982e-07 [cconv]: 1.646e-05 [loop_unroll]: 0.00048515 [opt_after_cconv]: 0.00013182, [1] [Cycle 1]: 0.00012578, [7] [c_1]: 5.332e-05 [parameter_eliminate]: 2.06998e-06 [updatestate_depend_eliminate]: 7.72998e-06 [updatestate_assign_eliminate]: 4.62006e-06 [updatestate_loads_eliminate]: 5.69993e-06 [cse]: 1.949e-05 [renormalize]: 3.20026e-07 [remove_dup_value]: 9.47993e-06 [tuple_transform]: 6.976e-05, [1] [Cycle 1]: 6.546e-05, [2] [d_1]: 5.556e-05 [renormalize]: 1.79978e-07 [partial_unused_args_eliminate]: 1.60001e-06 [add_cache_embedding]: 1.183e-05 [add_recomputation]: 5.532e-05 [cse_after_recomputation]: 2.591e-05, [1] [Cycle 1]: 2.17101e-05, [1] [cse]: 1.652e-05 [environ_conv]: 6.37001e-06 [swap_dp_allreduce_reducescatter]: 7.67002e-06 [bias_add_comm_swap]: 1.80991e-06 [label_micro_interleaved_index]: 1.44995e-06 [label_fine_grained_interleaved_index]: 1.20001e-06 [merge_cast_opt]: 7.3004e-07 [slice_recompute_activation]: 1.14006e-06 [micro_interleaved_order_control]: 1.34995e-06 [assign_add_opt]: 2.574e-05 [ForceFp32Comm]: 6.89994e-07 [remove_cast_before_assign_add]: 5.97001e-06 [full_micro_interleaved_order_control]: 1.53005e-06 [reorder_send_recv_between_fp_bp]: 1.23004e-06 [comm_op_add_attrs]: 2.38001e-05 [add_comm_op_reuse_tag]: 1.70001e-06 [interleave_split_concat_branches]: 9.39937e-07 [interleave_parallel_branches]: 5.49946e-07 [overlap_opt_shard_in_pipeline]: 6.3004e-07 [overlap_opt_shard_grad_in_pipeline]: 1.44995e-06 [control_data_broadcast_order]: 6.59958e-07 [grouped_pairwise_exchange_alltoall]: 7.07002e-06 [offloading_packed_experts]: 1.23004e-06 [overlap_recompute_and_grad_model_parallel]: 1.85007e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.50062e-07 [overlap_recompute_allgather_and_fa_grad]: 6.556e-05 [overlap_grad_ring_attention]: 1.69e-06 [overlap_grad_flash_sp]: 1.32699e-05 [begin_end_overlap_inline]: 5.10016e-07 [split_matmul_comm_elemetwise]: 1.53005e-06 [split_layernorm_comm]: 1.35996e-06 [handle_group_info]: 3.34007e-06 [symbol_engine_optimizer]: 9.011e-05, [1] [Cycle 1]: 8.582e-05, [6] [build]: 4.74998e-06 [elim_shapecalc]: 1.28699e-05 [elim_not_effective]: 1.63501e-05 [opt_reshape]: 9.29006e-06 [fold_const_symbol]: 1.427e-05 [renormalize]: 2.89991e-07 [pipeline_parallel_scheduler]: 1.05007e-06 [auto_monad_reorder]: 2.49801e-05 [get_jit_bprop_graph]: 3.69968e-07 [rewriter_after_jit_bprop_graph]: 3.29921e-07 [eliminate_special_op_node]: 0.0004978 [distribtued_split]: 3.43301e-05 [validate]: 3.058e-05 [task_emit]: 0.0692059 [execute]: 8.97993e-06 Sums bootstrap : 0.000291s : 0.38% type_inference : 0.002279s : 3.01% auto_monad : 0.000105s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000584s : 0.77% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000219s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000020s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000034s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000419s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000134s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000485s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000019s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000055s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000026s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000024s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000066s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000025s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000498s : 0.66% distribtued_split : 0.000034s : 0.05% validate : 0.000031s : 0.04% task_emit : 0.069206s : 91.27% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000117 63 4.55% : 0.000005s : 2: substitution.depend_value_elim 2.13% : 0.000002s : 5: substitution.elim_not_effective 2.05% : 0.000002s : 5: substitution.fold_const_symbol 5.55% : 0.000007s : 6: substitution.graph_param_transform 49.32% : 0.000058s : 1: substitution.inline 4.74% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.71% : 0.000004s : 6: substitution.load_eliminater 2.35% : 0.000003s : 2: substitution.reduce_all_const_elim 6.48% : 0.000008s : 10: substitution.remove_not_recompute_node 2.54% : 0.000003s : 2: substitution.replace_old_param 8.95% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.62% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002255 2 89.63% : 0.002021s : 1: type_inference.infer 10.37% : 0.000234s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000057 1 100.00% : 0.000057s : 1: match.inline ------[predicate.] 0.000276 1420 0.65% : 0.000002s : 13: predicate.accumulaten_eliminater 1.06% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.63% : 0.000002s : 12: predicate.addn_check_dump 0.66% : 0.000002s : 13: predicate.addn_zero_filter 0.61% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.86% : 0.000005s : 25: predicate.arithmetic_simplify 0.71% : 0.000002s : 13: predicate.cast_eliminate 0.67% : 0.000002s : 12: predicate.check_bprop_eliminate 0.63% : 0.000002s : 12: predicate.compare_switch_simplify 0.17% : 0.000000s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.01% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.63% : 0.000002s : 12: predicate.depend_value_elim 0.69% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.75% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.74% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.22% : 0.000001s : 6: predicate.elim_not_effective 0.48% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 0.92% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.00% : 0.000003s : 19: predicate.environ_get_add_eliminate 0.87% : 0.000002s : 19: predicate.environ_get_depend_swap 1.67% : 0.000005s : 31: predicate.environ_get_eliminate 1.00% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.73% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.05% : 0.000003s : 14: predicate.float_depend_g_call 0.66% : 0.000002s : 12: predicate.float_environ_get_switch 0.93% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.18% : 0.000001s : 6: predicate.fold_const_symbol 0.70% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.65% : 0.000002s : 12: predicate.incorporate_call 0.59% : 0.000002s : 12: predicate.incorporate_call_switch 4.61% : 0.000013s : 63: predicate.inline 0.90% : 0.000002s : 12: predicate.inline_without_move 0.33% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.86% : 0.000002s : 12: predicate.less_batch_normalization 1.45% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.00% : 0.000006s : 38: predicate.load_eliminater 1.03% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.03% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.54% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.63% : 0.000002s : 12: predicate.merge_addn 0.61% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.68% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.61% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.42% : 0.000001s : 6: predicate.parallel_virtual_node 1.02% : 0.000003s : 14: predicate.partial_defer_inline 1.03% : 0.000003s : 19: predicate.partial_eliminate 0.68% : 0.000002s : 13: predicate.print_const_string_wrapper 0.74% : 0.000002s : 12: predicate.reduce_all_const_elim 0.91% : 0.000003s : 13: predicate.reduce_eliminate 0.46% : 0.000001s : 12: predicate.remove_not_recompute_node 0.95% : 0.000003s : 25: predicate.replace_applicator 0.36% : 0.000001s : 12: predicate.replace_old_param 0.22% : 0.000001s : 6: predicate.reset_defer_inline 0.71% : 0.000002s : 13: predicate.reshape_eliminate 0.68% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.40% : 0.000001s : 6: predicate.row_tensor_eliminate 0.93% : 0.000003s : 12: predicate.same_eliminate 0.39% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.77% : 0.000002s : 12: predicate.shard_identity_eliminate 1.19% : 0.000003s : 18: predicate.special_op_eliminate 0.82% : 0.000002s : 12: predicate.specialize_transform 0.82% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.79% : 0.000002s : 12: predicate.stack_unstack_eliminate 1.91% : 0.000005s : 38: predicate.stopgrad_eliminater 0.37% : 0.000001s : 6: predicate.switch_call_monad_eliminater 17.59% : 0.000049s : 14: predicate.switch_defer_inline 1.35% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.58% : 0.000010s : 43: predicate.switch_simplify 0.73% : 0.000002s : 13: predicate.tile_eliminate 0.71% : 0.000002s : 13: predicate.transpose_eliminate 1.57% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.40% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.30% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.23% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.32% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.09% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.35% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 1.97% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 2.80% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.44% : 0.000001s : 6: predicate.value_based_eliminate 0.68% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.64% : 0.000002s : 12: predicate.virtual_output_eliminate 0.43% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000137 4 8.06% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.94% : 0.000126s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088625 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000060s : 1: add_recomputation 0.03% : 0.000030s : 1: assign_add_opt 0.13% : 0.000117s : 1: auto_monad 0.05% : 0.000048s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000316s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.03% : 0.000028s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.58% : 0.000511s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000494s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.28% : 0.001138s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 5.93% : 0.005255s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.28% : 0.000245s : 1: opt_b 7.92% : 0.007022s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000071s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000005s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000028s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.25% : 0.000222s : 1: renormalize.infer 0.22% : 0.000191s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000140s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000093s : 1: symbol_engine_optimizer 78.12% : 0.069232s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.59% : 0.002297s : 1: type_inference 0.07% : 0.000063s : 1: validate TotalTime = 0.0802382, [21] [bootstrap]: 0.00030543 [type_inference]: 0.00251298 [auto_monad]: 0.00012831 [graph_reusing]: 2.36998e-06 [inline]: 1.29e-06 [parallel-infer-symbol]: 1.85997e-06 [pre_auto_parallel]: 2.64201e-05 [insert-virtual-dataset]: 2.30002e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 1.62993e-06 [pipeline_split]: 1.56998e-06 [optimize]: 0.00734689, [52] [py_interpret_to_execute]: 1.657e-05 [rewriter_before_opt_a]: 3.385e-05 [opt_a]: 0.00545708, [2] [Cycle 1]: 0.00158001, [43] [expand_dump_flag]: 3.70992e-06 [switch_simplify]: 3.15401e-05 [loop_unroll]: 1.38599e-05 [a_1]: 0.0003517 [recompute_prepare]: 8.84e-06 [updatestate_depend_eliminate]: 8.52998e-06 [updatestate_assign_eliminate]: 5.87001e-06 [updatestate_loads_eliminate]: 8.46002e-06 [parameter_eliminate]: 3.54007e-06 [a_2]: 0.00011926 [accelerated_algorithm]: 8.72998e-06 [shard]: 2.31003e-06 [meta_shard_fg_expand]: 3.52995e-06 [shard_inline]: 8.59995e-06 [auto_parallel]: 1.219e-05 [parallel]: 7.51996e-06 [flash_sp]: 1.107e-05 [merge_comm]: 8.42998e-06 [allreduce_fusion]: 5.51995e-06 [matmul_add_comm_reduction]: 1.06801e-05 [allreduce_slice_to_reducescatter]: 5.10016e-07 [virtual_shard_identity]: 9.75002e-06 [virtual_dataset]: 7.92998e-06 [get_grad_eliminate_]: 7.60006e-06 [virtual_output]: 7.83999e-06 [merge_forward]: 6.23998e-06 [cell_reuse_recompute_pass]: 1.75997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.725e-05 [before_grad]: 1.374e-05 [inplace_validation]: 5.67001e-06 [meta_fg_expand]: 5.47001e-06 [inplace_validation_after_expand]: 7.00005e-06 [flash_sp_send_recv_attached]: 5.02996e-06 [receive_attached]: 2.58e-06 [after_resolve]: 1.115e-05 [a_after_grad]: 1.30701e-05 [special_op_eliminate]: 8.12009e-06 [renormalize]: 0.00045552 [add_forward_monad_depend]: 3.98001e-06 [auto_monad_grad]: 2.09e-06 [auto_monad_eliminator]: 3.30401e-05 [cse]: 3.628e-05 [a_3]: 6.117e-05 [Cycle 2]: 0.00079002, [43] [expand_dump_flag]: 1.16997e-06 [switch_simplify]: 9.42999e-06 [loop_unroll]: 7.75e-06 [a_1]: 0.00020931 [recompute_prepare]: 7.46001e-06 [updatestate_depend_eliminate]: 6.23998e-06 [updatestate_assign_eliminate]: 4.78001e-06 [updatestate_loads_eliminate]: 5.41995e-06 [parameter_eliminate]: 1.25996e-06 [a_2]: 0.00010567 [accelerated_algorithm]: 8.54e-06 [shard]: 1.09e-06 [meta_shard_fg_expand]: 2.74996e-06 [shard_inline]: 7.86001e-06 [auto_parallel]: 1.07799e-05 [parallel]: 3.60992e-06 [flash_sp]: 3.99991e-06 [merge_comm]: 6.21006e-06 [allreduce_fusion]: 5.30994e-06 [matmul_add_comm_reduction]: 8.03999e-06 [allreduce_slice_to_reducescatter]: 3.20026e-07 [virtual_shard_identity]: 8.80996e-06 [virtual_dataset]: 7.61007e-06 [get_grad_eliminate_]: 7.53999e-06 [virtual_output]: 7.23009e-06 [merge_forward]: 4.63007e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.509e-05 [before_grad]: 1.22901e-05 [inplace_validation]: 4.61994e-06 [meta_fg_expand]: 4.92996e-06 [inplace_validation_after_expand]: 5.59003e-06 [flash_sp_send_recv_attached]: 8.89995e-07 [receive_attached]: 6.50063e-07 [after_resolve]: 1.029e-05 [a_after_grad]: 1.239e-05 [special_op_eliminate]: 7.29994e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 9.2003e-07 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 1.779e-05 [cse]: 2.014e-05 [a_3]: 4.907e-05 [py_interpret_to_execute_after_opt_a]: 8.92999e-06 [slice_cell_reuse_recomputed_activation]: 1.97999e-06 [rewriter_after_opt_a]: 0.00015527 [convert_after_rewriter]: 9.89996e-06 [order_py_execute_after_rewriter]: 6.36e-06 [opt_b]: 0.0002452, [1] [Cycle 1]: 0.00023982, [7] [b_1]: 0.00016512 [b_2]: 9.99996e-06 [updatestate_depend_eliminate]: 5.69993e-06 [updatestate_assign_eliminate]: 4.27e-06 [updatestate_loads_eliminate]: 5.34998e-06 [renormalize]: 4.7998e-07 [cse]: 1.886e-05 [optimize_parallel_all_gather_comm]: 8.42998e-06 [overlap_param_gather]: 1.09e-06 [cconv]: 2.416e-05 [loop_unroll]: 0.00049206 [opt_after_cconv]: 0.00013628, [1] [Cycle 1]: 0.0001299, [7] [c_1]: 5.566e-05 [parameter_eliminate]: 2.30002e-06 [updatestate_depend_eliminate]: 8.45001e-06 [updatestate_assign_eliminate]: 4.82996e-06 [updatestate_loads_eliminate]: 5.37001e-06 [cse]: 2.197e-05 [renormalize]: 4.10015e-07 [remove_dup_value]: 1.388e-05 [tuple_transform]: 7.07499e-05, [1] [Cycle 1]: 6.59199e-05, [2] [d_1]: 5.654e-05 [renormalize]: 2.40048e-07 [partial_unused_args_eliminate]: 2.09e-06 [add_cache_embedding]: 1.349e-05 [add_recomputation]: 6.40999e-05 [cse_after_recomputation]: 2.742e-05, [1] [Cycle 1]: 2.25999e-05, [1] [cse]: 1.777e-05 [environ_conv]: 7.79994e-06 [swap_dp_allreduce_reducescatter]: 7.40006e-06 [bias_add_comm_swap]: 2.39001e-06 [label_micro_interleaved_index]: 2.35997e-06 [label_fine_grained_interleaved_index]: 1.87999e-06 [merge_cast_opt]: 9.39937e-07 [slice_recompute_activation]: 1.92004e-06 [micro_interleaved_order_control]: 1.73005e-06 [assign_add_opt]: 2.858e-05 [ForceFp32Comm]: 1.01002e-06 [remove_cast_before_assign_add]: 7.06001e-06 [full_micro_interleaved_order_control]: 2.10002e-06 [reorder_send_recv_between_fp_bp]: 2.06998e-06 [comm_op_add_attrs]: 2.91801e-05 [add_comm_op_reuse_tag]: 2.02993e-06 [interleave_split_concat_branches]: 7.00005e-07 [interleave_parallel_branches]: 8.40053e-07 [overlap_opt_shard_in_pipeline]: 9.49949e-07 [overlap_opt_shard_grad_in_pipeline]: 1.96998e-06 [control_data_broadcast_order]: 1.2099e-06 [grouped_pairwise_exchange_alltoall]: 9.84001e-06 [offloading_packed_experts]: 2.58e-06 [overlap_recompute_and_grad_model_parallel]: 1.65997e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.10018e-07 [overlap_recompute_allgather_and_fa_grad]: 7.79601e-05 [overlap_grad_ring_attention]: 1.71002e-06 [overlap_grad_flash_sp]: 1.37601e-05 [begin_end_overlap_inline]: 7.89994e-07 [split_matmul_comm_elemetwise]: 1.90001e-06 [split_layernorm_comm]: 2.04996e-06 [handle_group_info]: 5.03007e-06 [symbol_engine_optimizer]: 9.139e-05, [1] [Cycle 1]: 8.62899e-05, [6] [build]: 5.04998e-06 [elim_shapecalc]: 1.345e-05 [elim_not_effective]: 1.668e-05 [opt_reshape]: 8.97993e-06 [fold_const_symbol]: 1.444e-05 [renormalize]: 3.00002e-07 [pipeline_parallel_scheduler]: 1.73994e-06 [auto_monad_reorder]: 3.25501e-05 [get_jit_bprop_graph]: 4.39934e-07 [rewriter_after_jit_bprop_graph]: 4.10015e-07 [eliminate_special_op_node]: 0.00051489 [distribtued_split]: 4.11599e-05 [validate]: 3.648e-05 [task_emit]: 0.0690157 [execute]: 1.151e-05 Sums bootstrap : 0.000305s : 0.40% type_inference : 0.002513s : 3.30% auto_monad : 0.000128s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000561s : 0.74% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000456s : 0.60% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000056s : 0.07% optimize.opt_a.a_3 : 0.000110s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000155s : 0.20% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000492s : 0.65% optimize.opt_after_cconv.c_1 : 0.000056s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000029s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000078s : 0.10% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000515s : 0.68% distribtued_split : 0.000041s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.069016s : 90.65% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000134 63 5.25% : 0.000007s : 2: substitution.depend_value_elim 2.10% : 0.000003s : 5: substitution.elim_not_effective 2.19% : 0.000003s : 5: substitution.fold_const_symbol 5.29% : 0.000007s : 6: substitution.graph_param_transform 50.20% : 0.000067s : 1: substitution.inline 3.80% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.24% : 0.000004s : 6: substitution.load_eliminater 2.62% : 0.000004s : 2: substitution.reduce_all_const_elim 5.98% : 0.000008s : 10: substitution.remove_not_recompute_node 2.84% : 0.000004s : 2: substitution.replace_old_param 8.87% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.63% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002483 2 88.48% : 0.002197s : 1: type_inference.infer 11.52% : 0.000286s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000236 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.16% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.92% : 0.000002s : 13: predicate.addn_zero_filter 0.70% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.31% : 0.000005s : 25: predicate.arithmetic_simplify 0.77% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.53% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.91% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.21% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.04% : 0.000002s : 19: predicate.environ_get_depend_swap 1.89% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.33% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.02% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.74% : 0.000014s : 63: predicate.inline 1.12% : 0.000003s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.11% : 0.000003s : 12: predicate.less_batch_normalization 1.57% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.61% : 0.000006s : 38: predicate.load_eliminater 1.42% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.92% : 0.000005s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicat TotalTime = 0.0806047, [21] [bootstrap]: 0.00030617 [type_inference]: 0.0025125 [auto_monad]: 0.00013106 [graph_reusing]: 1.29e-06 [inline]: 1.22003e-06 [parallel-infer-symbol]: 2.14006e-06 [pre_auto_parallel]: 2.621e-05 [insert-virtual-dataset]: 3.03006e-06 [parallel-infer-symbol-second]: 4.1991e-07 [dataset_repeat_opt]: 1.71002e-06 [pipeline_split]: 1.33005e-06 [optimize]: 0.00747085, [52] [py_interpret_to_execute]: 1.626e-05 [rewriter_before_opt_a]: 3.41199e-05 [opt_a]: 0.00573192, [2] [Cycle 1]: 0.001446, [43] [expand_dump_flag]: 2.09e-06 [switch_simplify]: 2.551e-05 [loop_unroll]: 1.29901e-05 [a_1]: 0.00032476 [recompute_prepare]: 9.12999e-06 [updatestate_depend_eliminate]: 7.75e-06 [updatestate_assign_eliminate]: 5.74999e-06 [updatestate_loads_eliminate]: 5.31995e-06 [parameter_eliminate]: 2.79001e-06 [a_2]: 0.00011354 [accelerated_algorithm]: 8.92999e-06 [shard]: 1.3801e-06 [meta_shard_fg_expand]: 3.19001e-06 [shard_inline]: 8.3301e-06 [auto_parallel]: 1.169e-05 [parallel]: 4.45999e-06 [flash_sp]: 6.58003e-06 [merge_comm]: 6.30005e-06 [allreduce_fusion]: 5.00004e-06 [matmul_add_comm_reduction]: 8.27992e-06 [allreduce_slice_to_reducescatter]: 3.89991e-07 [virtual_shard_identity]: 9.62999e-06 [virtual_dataset]: 8.23999e-06 [get_grad_eliminate_]: 8.03999e-06 [virtual_output]: 7.97003e-06 [merge_forward]: 4.97e-06 [cell_reuse_recompute_pass]: 1.47009e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.688e-05 [before_grad]: 1.339e-05 [inplace_validation]: 4.35999e-06 [meta_fg_expand]: 4.97e-06 [inplace_validation_after_expand]: 5.18002e-06 [flash_sp_send_recv_attached]: 2.36009e-06 [receive_attached]: 1.80991e-06 [after_resolve]: 1.10499e-05 [a_after_grad]: 1.30699e-05 [special_op_eliminate]: 7.98004e-06 [renormalize]: 0.00042525 [add_forward_monad_depend]: 3.29001e-06 [auto_monad_grad]: 1.56998e-06 [auto_monad_eliminator]: 2.245e-05 [cse]: 2.273e-05 [a_3]: 5.98601e-05 [Cycle 2]: 0.00078705, [43] [expand_dump_flag]: 1.12003e-06 [switch_simplify]: 8.74e-06 [loop_unroll]: 7.76001e-06 [a_1]: 0.00020185 [recompute_prepare]: 7.21996e-06 [updatestate_depend_eliminate]: 5.78002e-06 [updatestate_assign_eliminate]: 4.51005e-06 [updatestate_loads_eliminate]: 5.62007e-06 [parameter_eliminate]: 1.40001e-06 [a_2]: 0.00010619 [accelerated_algorithm]: 8.12009e-06 [shard]: 1.21002e-06 [meta_shard_fg_expand]: 2.66999e-06 [shard_inline]: 7.77992e-06 [auto_parallel]: 1.06e-05 [parallel]: 3.29001e-06 [flash_sp]: 2.39001e-06 [merge_comm]: 5.92996e-06 [allreduce_fusion]: 4.90993e-06 [matmul_add_comm_reduction]: 7.81997e-06 [allreduce_slice_to_reducescatter]: 4.59957e-07 [virtual_shard_identity]: 8.99006e-06 [virtual_dataset]: 7.79994e-06 [get_grad_eliminate_]: 7.27002e-06 [virtual_output]: 7.28003e-06 [merge_forward]: 4.50993e-06 [cell_reuse_recompute_pass]: 1.76998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.559e-05 [before_grad]: 1.25e-05 [inplace_validation]: 4.40003e-06 [meta_fg_expand]: 4.77e-06 [inplace_validation_after_expand]: 4.94998e-06 [flash_sp_send_recv_attached]: 8.00006e-07 [receive_attached]: 7.00005e-07 [after_resolve]: 9.41998e-06 [a_after_grad]: 1.199e-05 e.mini_step_allgather_replace 0.79% : 0.000002s : 13: predicate.minmaximum_grad 0.82% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.34% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.25% : 0.000003s : 13: predicate.reduce_eliminate 0.60% : 0.000001s : 12: predicate.remove_not_recompute_node 1.09% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.22% : 0.000001s : 6: predicate.reset_defer_inline 0.91% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.24% : 0.000003s : 18: predicate.special_op_eliminate 0.85% : 0.000002s : 12: predicate.specialize_transform 0.97% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.22% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.60% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.27% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.91% : 0.000002s : 13: predicate.transpose_eliminate 1.68% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.80% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.75% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.44% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.45% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000172 4 9.93% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.07% : 0.000155s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089454 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000141s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000334s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.04% : 0.000033s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.00004 [special_op_eliminate]: 7.81997e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 1.00001e-06 [auto_monad_grad]: 1.11992e-06 [auto_monad_eliminator]: 2.001e-05 [cse]: 1.956e-05 [a_3]: 4.989e-05 [py_interpret_to_execute_after_opt_a]: 9.44e-06 [slice_cell_reuse_recomputed_activation]: 1.32003e-06 [rewriter_after_opt_a]: 0.00012866 [convert_after_rewriter]: 8.59995e-06 [order_py_execute_after_rewriter]: 5.21005e-06 [opt_b]: 0.00024414, [1] [Cycle 1]: 0.00023873, [7] [b_1]: 0.00016488 [b_2]: 9.77004e-06 [updatestate_depend_eliminate]: 5.39003e-06 [updatestate_assign_eliminate]: 4.40003e-06 [updatestate_loads_eliminate]: 5.41005e-06 [renormalize]: 2.19909e-07 [cse]: 1.817e-05 [optimize_parallel_all_gather_comm]: 8.12998e-06 [overlap_param_gather]: 6.10016e-07 [cconv]: 1.439e-05 [loop_unroll]: 0.00050504 [opt_after_cconv]: 0.00013295, [1] [Cycle 1]: 0.00012697, [7] [c_1]: 5.16101e-05 [parameter_eliminate]: 2.30991e-06 [updatestate_depend_eliminate]: 8.56002e-06 [updatestate_assign_eliminate]: 4.85999e-06 [updatestate_loads_eliminate]: 6.12997e-06 [cse]: 2.10101e-05 [renormalize]: 4.49945e-07 [remove_dup_value]: 8.62998e-06 [tuple_transform]: 6.76e-05, [1] [Cycle 1]: 6.29e-05, [2] [d_1]: 5.40901e-05 [renormalize]: 1.8999e-07 [partial_unused_args_eliminate]: 1.39e-06 [add_cache_embedding]: 1.054e-05 [add_recomputation]: 5.11199e-05 [cse_after_recomputation]: 2.59901e-05, [1] [Cycle 1]: 2.143e-05, [1] [cse]: 1.67e-05 [environ_conv]: 6.01995e-06 [swap_dp_allreduce_reducescatter]: 6.83998e-06 [bias_add_comm_swap]: 1.23994e-06 [label_micro_interleaved_index]: 1.15996e-06 [label_fine_grained_interleaved_index]: 9.79984e-07 [merge_cast_opt]: 5.29923e-07 [slice_recompute_activation]: 8.89995e-07 [micro_interleaved_order_control]: 7.60076e-07 [assign_add_opt]: 2.535e-05 [ForceFp32Comm]: 6.49947e-07 [remove_cast_before_assign_add]: 6.16e-06 [full_micro_interleaved_order_control]: 9.00007e-07 [reorder_send_recv_between_fp_bp]: 8.10018e-07 [comm_op_add_attrs]: 2.25899e-05 [add_comm_op_reuse_tag]: 1.31002e-06 [interleave_split_concat_branches]: 5.59958e-07 [interleave_parallel_branches]: 4.59957e-07 [overlap_opt_shard_in_pipeline]: 6.99889e-07 [overlap_opt_shard_grad_in_pipeline]: 9.40054e-07 [control_data_broadcast_order]: 5.59958e-07 [grouped_pairwise_exchange_alltoall]: 5.91995e-06 [offloading_packed_experts]: 9.69972e-07 [overlap_recompute_and_grad_model_parallel]: 9.00007e-07 [overlap_grad_matmul_and_grad_allreduce]: 4.20026e-07 [overlap_recompute_allgather_and_fa_grad]: 4.67801e-05 [overlap_grad_ring_attention]: 9.89996e-07 [overlap_grad_flash_sp]: 1.17701e-05 [begin_end_overlap_inline]: 4.4005e-07 [split_matmul_comm_elemetwise]: 9.70089e-07 [split_layernorm_comm]: 7.3004e-07 [handle_group_info]: 2.68e-06 [symbol_engine_optimizer]: 8.68699e-05, [1] [Cycle 1]: 8.225e-05, [6] [build]: 3.93996e-06 [elim_shapecalc]: 1.251e-05 [elim_not_effective]: 1.66e-05 [opt_reshape]: 8.86002e-06 [fold_const_symbol]: 1.33599e-05 [renormalize]: 3.09898e-07 [pipeline_parallel_scheduler]: 7.69971e-07 [auto_monad_reorder]: 2.312e-05 [get_jit_bprop_graph]: 2.89991e-07 [rewriter_after_jit_bprop_graph]: 2.19909e-07 [eliminate_special_op_node]: 0.00049438 [distribtued_split]: 3.142e-05 [validate]: 2.908e-05 [task_emit]: 0.0693314 [execute]: 6.83998e-06 Sums bootstrap : 0.000306s : 0.40% type_inference : 0.002513s : 3.30% auto_monad : 0.000131s : 0.17% graph_reusing9s : 1: distribtued_split 0.59% : 0.000529s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000502s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.26% : 0.001126s : 80: opt.transform.opt_a 0.06% : 0.000054s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.10% : 0.005461s : 1: opt_a 0.18% : 0.000158s : 1: opt_after_cconv 0.28% : 0.000249s : 1: opt_b 8.22% : 0.007355s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.09% : 0.000083s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000242s : 1: renormalize.infer 0.23% : 0.000207s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000161s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000094s : 1: symbol_engine_optimizer 77.19% : 0.069049s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.83% : 0.002532s : 1: type_inference 0.08% : 0.000071s : 1: validate : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000527s : 0.69% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000009s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000425s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000042s : 0.06% optimize.opt_a.cse : 0.000042s : 0.06% optimize.opt_a.a_3 : 0.000110s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000129s : 0.17% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000014s : 0.02% optimize.loop_unroll : 0.000505s : 0.66% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000051s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000025s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000023s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000047s : 0.06% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000494s : 0.65% distribtued_split : 0.000031s : 0.04% validate : 0.000029s : 0.04% task_emit : 0.069331s : 91.07% execute : 0.000007s : 0.01% TotalTime = 0.0809571, [21] [bootstrap]: 0.00029494 [type_inference]: 0.00245777 [auto_monad]: 0.00012939 [graph_reusing]: 2.42994e-06 [inline]: 1.37999e-06 [parallel-infer-symbol]: 2.04996e-06 [pre_auto_parallel]: 2.652e-05 [insert-virtual-dataset]: 2.26998e-06 [parallel-infer-symbol-second]: 4.1991e-07 [dataset_repeat_opt]: 1.26008e-06 [pipeline_split]: 1.54006e-06 [optimize]: 0.00727404, [52] [py_interpret_to_execute]: 1.502e-05 [rewriter_before_opt_a]: 3.378e-05 [opt_a]: 0.00539733, [2] [Cycle 1]: 0.00155463, [43] [expand_dump_flag]: 2.98989e-06 [switch_simplify]: 3.01399e-05 [loop_unroll]: 1.29e-05 [a_1]: 0.00034963 [recompute_prepare]: 8.98994e-06 [updatestate_depend_eliminate]: 8.95001e-06 [updatestate_assign_eliminate]: 5.84999e-06 [updatestate_loads_eliminate]: 7.19004e-06 [parameter_eliminate]: 3.32005e-06 [a_2]: 0.00011973 [accelerated_algorithm]: 8.49995e-06 [shard]: 2.34996e-06 [meta_shard_fg_expand]: 3.39001e-06 [shard_inline]: 8.47003e-06 [auto_parallel]: 1.19701e-05 [parallel]: 7.37002e-06 [flash_sp]: 1.09499e-05 [merge_comm]: 7.62008e-06 [allreduce_fusion]: 5.4501e-06 [matmul_add_comm_reduction]: 1.04599e-05 [allreduce_slice_to_reducescatter]: 4.20026e-07 [virtual_shard_identity]: 9.60997e-06 [virtual_dataset]: 7.86001e-06 [get_grad_eliminate_]: 8.06991e-06 [virtual_output]: 7.62998e-06 [merge_forward]: 5.99003e-06 [cell_reuse_recompute_pass]: 1.81003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65401e-05 [before_grad]: 1.42501e-05 [inplace_validation]: 5.24998e-06 [meta_fg_expand]: 5.46e-06 [inplace_validation_after_expand]: 5.96989e-06 [flash_sp_send_recv_attached]: 4.74998e-06 [receive_attached]: 2.42994e-06 [after_resolve]: 1.159e-05 [a_after_grad]: 1.29601e-05 [special_op_eliminate]: 8.2301e-06 [renormalize]: 0.00044525 [add_forward_monad_depend]: 3.59002e-06 [auto_monad_grad]: 1.96998e-06 [auto_monad_eliminator]: 3.215e-05 [cse]: 3.25e-05 [a_3]: 6.09601e-05 [Cycle 2]: 0.00079455, [43] [expand_dump_flag]: 1.11002e-06 [switch_simplify]: 9.14e-06 [loop_unroll]: 8.02998e-06 [a_1]: 0.00021046 [recompute_prepare]: 7.53999e-06 [updatestate_depend_eliminate]: 6.13998e-06 [updatestate_assign_eliminate]: 4.55999e-06 [updatestate_loads_eliminate]: 5.42006e-06 [parameter_eliminate]: 1.26997e-06 [a_2]: 0.00010854 [accelerated_algorithm]: 8.57993e-06 [shard]: 1.17999e-06 [meta_shard_fg_expand]: 2.52004e-06 [shard_inline]: 8.05e-06 [auto_parallel]: 1.10201e-05 [parallel]: 3.58e-06 [flash_sp]: 3.73006e-06 [merge_comm]: 5.96e-06 [allreduce_fusion]: 5.08991e-06 [matmul_add_comm_reduction]: 8.10006e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 8.98005e-06 [virtual_dataset]: 7.93999e-06 [get_grad_eliminate_]: 7.66991e-06 [virtual_output]: 7.06001e-06 [merge_forward]: 4.5601e-06 [cell_reuse_recompute_pass]: 1.97999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51501e-05 [before_grad]: 1.251e-05 [inplace_validation]: 4.43996e-06 [meta_fg_expand]: 4.81005e-06 [inplace_validation_after_expand]: 5.13997e-06 [flash_sp_send_recv_attached]: 7.69971e-07 [receive_attached]: 8.30041e-07 [after_resolve]: 9.86992e-06 [a_after_grad]: 1.17e-05 [special_op_eliminate]: 7.52998e-06 [renormalize]: 7.0082e-08 [add_forward_monad_depend]: 9.30042e-07 [auto_monad_grad]: 1.26008e-06 [auto_monad_eliminator]: 1.83301e-05 [cse]: 1.97301e-05 [a_3]: 4.927e-05 [py_interpret_to_execute_after_opt_a]: 9.53989e-06 [slice_cell_reuse_recomputed_activation]: 2.1701e-06 [rewriter_after_opt_a]: 0.00014556 [convert_after_rewriter]: 8.71997e-06 [order_py_execute_after_rewriter]: 5.92996e-06 [opt_b]: 0.00024584, [1] [Cycle 1]: 0.00024074, [7] [b_1]: 0.00016607 [b_2]: 9.81998e-06 [updatestate_depend_eliminate]: 5.38002e-06 [updatestate_assign_eliminate]: 4.50003e-06 [updatestate_loads_eliminate]: 5.06011e-06 [renormalize]: 2.2992e-07 [cse]: 1.92e-05 [optimize_parallel_all_gather_comm]: 8.61997e-06 [overlap_param_gather]: 1.06997e-06 [cconv]: 2.375e-05 [loop_unroll]: 0.00049623 [opt_after_cconv]: 0.00013613, [1] [Cycle 1]: 0.00013007, [7] [c_1]: 5.445e-05 [parameter_eliminate]: 2.45008e-06 [updatestate_depend_eliminate]: 8.18004e-06 [updatestate_assign_eliminate]: 4.90993e-06 [updatestate_loads_eliminate]: 5.91006e-06 [cse]: 2.22999e-05 [renormalize]: 4.20026e-07 [remove_dup_value]: 1.365e-05 [tuple_transform]: 7.018e-05, [1] [Cycle 1]: 6.59201e-05, [2] [d_1]: 5.664e-05 [renormalize]: 1.59955e-07 [partial_unused_args_eliminate]: 1.89e-06 [add_cache_embedding]: 1.32701e-05 [add_recomputation]: 6.192e-05 [cse_after_recomputation]: 2.619e-05, [1] [Cycle 1]: 2.194e-05, [1] [cse]: 1.67801e-05 [environ_conv]: 3.72699e-05 [swap_dp_allreduce_reducescatter]: 7.68004e-06 [bias_add_comm_swap]: 2.27999e-06 [label_micro_interleaved_index]: 2.3999e-06 [label_fine_grained_interleaved_index]: 1.93994e-06 [merge_cast_opt]: 1.03994e-06 [slice_recompute_activation]: 2.01003e-06 [micro_interleaved_order_control]: 1.64006e-06 [assign_add_opt]: 2.83601e-05 [ForceFp32Comm]: 1.16997e-06 [remove_cast_before_assign_add]: 7.26001e-06 [full_micro_interleaved_order_control]: 2.29001e-06 [reorder_send_recv_between_fp_bp]: 1.89e-06 [comm_op_add_attrs]: 2.57799e-05 [add_comm_op_reuse_tag]: 2.00002e-06 [interleave_split_concat_branches]: 8.30041e-07 [interleave_parallel_branches]: 8.29925e-07 [overlap_opt_shard_in_pipeline]: 9.39937e-07 [overlap_opt_shard_grad_in_pipeline]: 1.63005e-06 [control_data_broadcast_order]: 1.29e-06 [grouped_pairwise_exchange_alltoall]: 9.47004e-06 [offloading_packed_experts]: 1.79e-06 [overlap_recompute_and_grad_model_parallel]: 1.96008e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.3004e-07 [overlap_recompute_allgather_and_fa_grad]: 6.786e-05 [overlap_grad_ring_attention]: 2.12993e-06 [overlap_grad_flash_sp]: 1.46599e-05 [begin_end_overlap_inline]: 9.30042e-07 [split_matmul_comm_elemetwise]: 2.04006e-06 [split_layernorm_comm]: 1.67999e-06 [handle_group_info]: 4.99003e-06 [symbol_engine_optimizer]: 9.052e-05, [1] [Cycle 1]: 8.574e-05, [6] [build]: 4.55999e-06 [elim_shapecalc]: 1.365e-05 [elim_not_effective]: 1.73401e-05 [opt_reshape]: 8.91997e-06 [fold_const_symbol]: 1.381e-05 [renormalize]: 3.7998e-07 [pipeline_parallel_scheduler]: 1.40001e-06 [auto_monad_reorder]: 2.982e-05 [get_jit_bprop_graph]: 4.49945e-07 [rewriter_after_jit_bprop_graph]: 4.30038e-07 [eliminate_special_op_node]: 0.00051778 [distribtued_split]: 4.05901e-05 [validate]: 3.57799e-05 [task_emit]: 0.069888 [execute]: 9.94001e-06 Sums bootstrap : 0.000295s : 0.38% type_inference : 0.002458s : 3.20% auto_monad : 0.000129s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000560s : 0.73% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000228s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000445s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.07% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000110s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000166s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000496s : 0.65% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000037s : 0.05% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000026s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000068s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000518s : 0.67% distribtued_split : 0.000041s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.069888s : 90.86% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000109 63 4.17% : 0.000005s : 2: substitution.depend_value_elim 2.08% : 0.000002s : 5: substitution.elim_not_effective 1.91% : 0.000002s : 5: substitution.fold_const_symbol 5.15% : 0.000006s : 6: substitution.graph_param_transform 48.30% : 0.000053s : 1: substitution.inline 4.50% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.74% : 0.000004s : 6: substitution.load_eliminater 2.28% : 0.000002s : 2: substitution.reduce_all_const_elim 6.99% : 0.000008s : 10: substitution.remove_not_recompute_node 2.31% : 0.000003s : 2: substitution.replace_old_param 9.44% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 9.13% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002484 2 88.44% : 0.002197s : 1: type_inference.infer 11.56% : 0.000287s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000052 1 100.00% : 0.000052s : 1: match.inline ------[predicate.] 0.000225 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.17% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.19% : 0.000005s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.18% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_depend_swap 2.06% : 0.000005s : 31: predicate.environ_get_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000000s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.82% : 0.000002s : 12: predicate.incorporate_call 0.71% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 1.09% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.74% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.50% : 0.000006s : 38: predicate.load_eliminater 1.29% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.74% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.81% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.53% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.29% : 0.000003s : 19: predicate.partial_eliminate 0.97% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.01% : 0.000002s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.85% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.55% : 0.000001s : 6: predicate.row_tensor_eliminate 1.11% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.00% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.94% : 0.000002s : 14: predicate.switch_defer_inline 1.79% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.21% : 0.000009s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.93% : 0.000002s : 13: predicate.transpose_eliminate 1.84% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.64% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.36% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000160 4 5.64% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 94.36% : 0.000151s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089861 192 0.00% : 0.000003s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.06% : 0.000056s : 1: add_recomputation 0.03% : 0.000029s : 1: assign_add_opt 0.16% : 0.000144s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.37% : 0.000334s : 1: bootstrap 0.02% : 0.000018s : 1: cconv 0.03% : 0.000026s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.04% : 0.000039s : 1: distribtued_split 0.57% : 0.000508s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000014s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000009s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.57% : 0.000515s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.20% : 0.001081s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000155s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.38% : 0.005736s : 1: opt_a 0.15% : 0.000137s : 1: opt_after_cconv 0.28% : 0.000247s : 1: opt_b 8.32% : 0.007479s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.06% : 0.000052s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.25% : 0.000221s : 1: renormalize.infer 0.22% : 0.000199s : 1: renormalize.specialize 0.00% : 0.000003s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000134s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 77.18% : 0.069354s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.82% : 0.002531s : 1: type_inference 0.07% : 0.000061s : 1: validate Time group info: ------[substitution.] 0.000132 63 5.14% : 0.000007s : 2: substitution.depend_value_elim 1.91% : 0.000003s : 5: substitution.elim_not_effective 1.81% : 0.000002s : 5: substitution.fold_const_symbol 5.13% : 0.000007s : 6: substitution.graph_param_transform 51.40% : 0.000068s : 1: substitution.inline 4.01% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.16% : 0.000004s : 6: substitution.load_eliminater 2.68% : 0.000004s : 2: substitution.reduce_all_const_elim 5.82% : 0.000008s : 10: substitution.remove_not_recompute_node 2.43% : 0.000003s : 2: substitution.replace_old_param 8.61% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.91% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002429 2 88.44% : 0.002148s : 1: type_inference.infer 11.56% : 0.000281s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000231 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.24% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.71% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.80% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.33% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.95% : 0.000005s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.39% : 0.000003s : 14: predicate.float_depend_g_call 0.69% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.62% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.86% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.46% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.85% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.74% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.84% : 0.000002s : 13: predicate.minmaximum_grad 0.84% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.44% : 0.000001s : 6: predicate.parallel_virtual_node 1.10% : 0.000003s : 14: predicate.partial_defer_inline 1.34% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.92% : 0.000002s : 12: predicate.reduce_all_const_elim 1.12% : 0.000003s : 13: predicate.reduce_eliminate 0.65% : 0.000002s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.56% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.45% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.39% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.08% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.25% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.60% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.41% : 0.000010s : 43: predicate.switch_simplify 0.86% : 0.000002s : 13: predicate.tile_eliminate 0.72% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.46% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.79% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.75% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.33% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.38% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.60% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000163 4 10.17% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.83% : 0.000146s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090093 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000142s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.35% : 0.000319s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000030s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.59% : 0.000532s : 1: eliminate_special_op_node 0.05% : 0.000042s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000506s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001127s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.00% : 0.005401s : 1: opt_a 0.16% : 0.000140s : 1: opt_after_cconv 0.28% : 0.000249s : 1: opt_b 8.08% : 0.007282s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.26% : 0.000238s : 1: renormalize.infer 0.22% : 0.000201s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000152s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 77.60% : 0.069916s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.75% : 0.002476s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.0813546, [21] [bootstrap]: 0.00029683 [type_inference]: 0.00244415 [auto_monad]: 0.00012991 [graph_reusing]: 2.07999e-06 [inline]: 1.31992e-06 [parallel-infer-symbol]: 2.69001e-06 [pre_auto_parallel]: 2.557e-05 [insert-virtual-dataset]: 2.73995e-06 [parallel-infer-symbol-second]: 4.20026e-07 [dataset_repeat_opt]: 1.40001e-06 [pipeline_split]: 1.54995e-06 [optimize]: 0.00725483, [52] [py_interpret_to_execute]: 1.489e-05 [rewriter_before_opt_a]: 3.529e-05 [opt_a]: 0.00537456, [2] [Cycle 1]: 0.00156181, [43] [expand_dump_flag]: 3.54007e-06 [switch_simplify]: 2.96499e-05 [loop_unroll]: 1.318e-05 [a_1]: 0.0003441 [recompute_prepare]: 8.89995e-06 [updatestate_depend_eliminate]: 9.16002e-06 [updatestate_assign_eliminate]: 6.27991e-06 [updatestate_loads_eliminate]: 7.18993e-06 [parameter_eliminate]: 3.49991e-06 [a_2]: 0.00011951 [accelerated_algorithm]: 8.38994e-06 [shard]: 1.85997e-06 [meta_shard_fg_expand]: 3.51993e-06 [shard_inline]: 8.30996e-06 [auto_parallel]: 1.17901e-05 [parallel]: 7.51996e-06 [flash_sp]: 1.034e-05 [merge_comm]: 7.52998e-06 [allreduce_fusion]: 5.19003e-06 [matmul_add_comm_reduction]: 1.101e-05 [allreduce_slice_to_reducescatter]: 5.19911e-07 [virtual_shard_identity]: 9.62999e-06 [virtual_dataset]: 8.09995e-06 [get_grad_eliminate_]: 8.02998e-06 [virtual_output]: 8.17003e-06 [merge_forward]: 6.06e-06 [cell_reuse_recompute_pass]: 1.85997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.769e-05 [before_grad]: 1.393e-05 [inplace_validation]: 5.07e-06 [meta_fg_expand]: 5.15999e-06 [inplace_validation_after_expand]: 6.10005e-06 [flash_sp_send_recv_attached]: 4.73997e-06 [receive_attached]: 2.76009e-06 [after_resolve]: 1.21399e-05 [a_after_grad]: 1.31e-05 [special_op_eliminate]: 8.16002e-06 [renormalize]: 0.00045383 [add_forward_monad_depend]: 3.67989e-06 [auto_monad_grad]: 1.83994e-06 [auto_monad_eliminator]: 3.201e-05 [cse]: 3.211e-05 [a_3]: 6.089e-05 [Cycle 2]: 0.00079654, [43] [expand_dump_flag]: 1.20001e-06 [switch_simplify]: 9.16002e-06 [loop_unroll]: 8.12009e-06 [a_1]: 0.00020569 [recompute_prepare]: 7.43999e-06 [updatestate_depend_eliminate]: 5.78002e-06 [updatestate_assign_eliminate]: 4.80004e-06 [updatestate_loads_eliminate]: 5.29992e-06 [parameter_eliminate]: 1.30001e-06 [a_2]: 0.00010584 [accelerated_algorithm]: 8.75001e-06 [shard]: 1.26997e-06 [meta_shard_fg_expand]: 2.78e-06 [shard_inline]: 7.91997e-06 [auto_parallel]: 1.161e-05 [parallel]: 3.74008e-06 [flash_sp]: 3.38e-06 [merge_comm]: 6.28002e-06 [allreduce_fusion]: 4.82006e-06 [matmul_add_comm_reduction]: 8.11997e-06 [allreduce_slice_to_reducescatter]: 2.40048e-07 [virtual_shard_identity]: 8.69005e-06 [virtual_dataset]: 7.82998e-06 [get_grad_eliminate_]: 7.53999e-06 [virtual_output]: 7.05e-06 [merge_forward]: 4.8799e-06 [cell_reuse_recompute_pass]: 1.92004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.532e-05 [before_grad]: 1.29701e-05 [inplace_validation]: 4.42995e-06 [meta_fg_expand]: 4.54998e-06 [inplace_validation_after_expand]: 5.50004e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 8.19913e-07 [after_resolve]: 1.063e-05 [a_after_grad]: 1.25699e-05 [special_op_eliminate]: 7.62998e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.49949e-07 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 1.859e-05 [cse]: 1.979e-05 [a_3]: 4.96199e-05 [py_interpret_to_execute_after_opt_a]: 9.68995e-06 [slice_cell_reuse_recomputed_activation]: 2.21992e-06 [rewriter_after_opt_a]: 0.00014502 [convert_after_rewriter]: 8.42998e-06 [order_py_execute_after_rewriter]: 6.37001e-06 [opt_b]: 0.00024946, [1] [Cycle 1]: 0.00024404, [7] [b_1]: 0.00016645 [b_2]: 1.013e-05 [updatestate_depend_eliminate]: 5.60004e-06 [updatestate_assign_eliminate]: 4.50003e-06 [updatestate_loads_eliminate]: 5.37001e-06 [renormalize]: 2.59955e-07 [cse]: 1.916e-05 [optimize_parallel_all_gather_comm]: 8.61008e-06 [overlap_param_gather]: 9.30042e-07 [cconv]: 2.214e-05 [loop_unroll]: 0.00049009 [opt_after_cconv]: 0.00013477, [1] [Cycle 1]: 0.00012898, [7] [c_1]: 5.26899e-05 [parameter_eliminate]: 2.50002e-06 [updatestate_depend_eliminate]: 8.26002e-06 [updatestate_assign_eliminate]: 4.93007e-06 [updatestate_loads_eliminate]: 5.62996e-06 [cse]: 2.22001e-05 [renormalize]: 3.89991e-07 [remove_dup_value]: 1.31699e-05 [tuple_transform]: 6.906e-05, [1] [Cycle 1]: 6.485e-05, [2] [d_1]: 5.60901e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 2.09e-06 [add_cache_embedding]: 1.373e-05 [add_recomputation]: 6.05e-05 [cse_after_recomputation]: 2.66101e-05, [1] [Cycle 1]: 2.21899e-05, [1] [cse]: 1.727e-05 [environ_conv]: 7.32997e-06 [swap_dp_allreduce_reducescatter]: 7.57992e-06 [bias_add_comm_swap]: 2.10002e-06 [label_micro_interleaved_index]: 2.24996e-06 [label_fine_grained_interleaved_index]: 2.14006e-06 [merge_cast_opt]: 1.20001e-06 [slice_recompute_activation]: 1.86008e-06 [micro_interleaved_order_control]: 1.86998e-06 [assign_add_opt]: 2.981e-05 [ForceFp32Comm]: 9.00007e-07 [remove_cast_before_assign_add]: 7.02008e-06 [full_micro_interleaved_order_control]: 1.76998e-06 [reorder_send_recv_between_fp_bp]: 1.79e-06 [comm_op_add_attrs]: 2.78499e-05 [add_comm_op_reuse_tag]: 1.64995e-06 [interleave_split_concat_branches]: 7.59959e-07 [interleave_parallel_branches]: 6.6997e-07 [overlap_opt_shard_in_pipeline]: 8.49948e-07 [overlap_opt_shard_grad_in_pipeline]: 1.90001e-06 [control_data_broadcast_order]: 1.00001e-06 [grouped_pairwise_exchange_alltoall]: 8.84e-06 [offloading_packed_experts]: 1.69e-06 [overlap_recompute_and_grad_model_parallel]: 1.4801e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.90111e-07 [overlap_recompute_allgather_and_fa_grad]: 7.00699e-05 [overlap_grad_ring_attention]: 2.10002e-06 [overlap_grad_flash_sp]: 1.463e-05 [begin_end_overlap_inline]: 5.89993e-07 [split_matmul_comm_elemetwise]: 2.19001e-06 [split_layernorm_comm]: 1.82993e-06 [handle_group_info]: 4.25999e-06 [symbol_engine_optimizer]: 8.963e-05, [1] [Cycle 1]: 8.451e-05, [6] [build]: 4.84008e-06 [elim_shapecalc]: 1.362e-05 [elim_not_effective]: 1.638e-05 [opt_reshape]: 8.96002e-06 [fold_const_symbol]: 1.36601e-05 [renormalize]: 2.80095e-07 [pipeline_parallel_scheduler]: 1.33005e-06 [auto_monad_reorder]: 2.912e-05 [get_jit_bprop_graph]: 8.60076e-07 [rewriter_after_jit_bprop_graph]: 4.10015e-07 [eliminate_special_op_node]: 0.00051238 [distribtued_split]: 3.966e-05 [validate]: 3.516e-05 [task_emit]: 0.070317 [execute]: 1.212e-05 Sums bootstrap : 0.000297s : 0.38% type_inference : 0.002444s : 3.16% auto_monad : 0.000130s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000550s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000023s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000454s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000111s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000145s : 0.19% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000166s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000022s : 0.03% optimize.loop_unroll : 0.000490s : 0.63% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000070s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000004s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000512s : 0.66% distribtued_split : 0.000040s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.070317s : 90.97% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000131 63 5.05% : 0.000007s : 2: substitution.depend_value_elim 1.80% : 0.000002s : 5: substitution.elim_not_effective 1.85% : 0.000002s : 5: substitution.fold_const_symbol 5.52% : 0.000007s : 6: substitution.graph_param_transform 50.65% : 0.000066s : 1: substitution.inline 4.19% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.11% : 0.000004s : 6: substitution.load_eliminater 2.40% : 0.000003s : 2: substitution.reduce_all_const_elim 6.11% : 0.000008s : 10: substitution.remove_not_recompute_node 2.71% : 0.000004s : 2: substitution.replace_old_param 8.67% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.95% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002414 2 88.43% : 0.002135s : 1: type_inference.infer 11.57% : 0.000279s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000230 1420 0.81% : 0.000002s : 13: predicate.accumulaten_eliminater 1.15% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.15% : 0.000005s : 25: predicate.arithmetic_simplify 0.89% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.52% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.95% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.55% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 2.03% : 0.000005s : 31: predicate.environ_get_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.04% : 0.000002s : 12: predicate.less_batch_normalization 1.65% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.47% : 0.000006s : 38: predicate.load_eliminater 1.38% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.80% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.68% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.88% : 0.000002s : 12: predicate.reduce_all_const_elim 1.01% : 0.000002s : 13: predicate.reduce_eliminate 0.63% : 0.000001s : 12: predicate.remove_not_recompute_node 1.08% : 0.000002s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.44% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.45% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.88% : 0.000002s : 12: predicate.shard_identity_eliminate 1.40% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.12% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.95% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.46% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.76% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.77% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.81% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.78% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.48% : 0.000001s : 6: predicate.value_based_eliminate 0.88% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000164 4 9.48% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.52% : 0.000148s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090463 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.16% : 0.000143s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000320s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.58% : 0.000526s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.05% : 0.000042s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000500s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001115s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.95% : 0.005378s : 1: opt_a 0.15% : 0.000139s : 1: opt_after_cconv 0.28% : 0.000253s : 1: opt_b 8.03% : 0.007263s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000075s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000243s : 1: renormalize.infer 0.23% : 0.000204s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000151s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 77.76% : 0.070345s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.72% : 0.002461s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.0817234, [21] [bootstrap]: 0.00033542 [type_inference]: 0.00261578 [auto_monad]: 0.00013314 [graph_reusing]: 2.20002e-06 [inline]: 1.31002e-06 [parallel-infer-symbol]: 2.05007e-06 [pre_auto_parallel]: 2.532e-05 [insert-virtual-dataset]: 3.14007e-06 [parallel-infer-symbol-second]: 5.79981e-07 [dataset_repeat_opt]: 1.14995e-06 [pipeline_split]: 1.55997e-06 [optimize]: 0.00750979, [52] [py_interpret_to_execute]: 1.561e-05 [rewriter_before_opt_a]: 3.6e-05 [opt_a]: 0.00552299, [2] [Cycle 1]: 0.00160782, [43] [expand_dump_flag]: 4.14008e-06 [switch_simplify]: 3.14199e-05 [loop_unroll]: 1.36701e-05 [a_1]: 0.0003488 [recompute_prepare]: 9.21998e-06 [updatestate_depend_eliminate]: 9.02999e-06 [updatestate_assign_eliminate]: 6.24999e-06 [updatestate_loads_eliminate]: 8.01007e-06 [parameter_eliminate]: 3.50003e-06 [a_2]: 0.00012133 [accelerated_algorithm]: 8.69005e-06 [shard]: 2.15007e-06 [meta_shard_fg_expand]: 3.92995e-06 [shard_inline]: 8.64e-06 [auto_parallel]: 1.219e-05 [parallel]: 7.21996e-06 [flash_sp]: 1.162e-05 [merge_comm]: 8.33999e-06 [allreduce_fusion]: 5.29003e-06 [matmul_add_comm_reduction]: 1.13901e-05 [allreduce_slice_to_reducescatter]: 4.59957e-07 [virtual_shard_identity]: 1.02599e-05 [virtual_dataset]: 8.16002e-06 [get_grad_eliminate_]: 8.05e-06 [virtual_output]: 7.81997e-06 [merge_forward]: 6.67002e-06 [cell_reuse_recompute_pass]: 1.79e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.696e-05 [before_grad]: 1.487e-05 [inplace_validation]: 6.30994e-06 [meta_fg_expand]: 5.56e-06 [inplace_validation_after_expand]: 7.02008e-06 [flash_sp_send_recv_attached]: 4.95999e-06 [receive_attached]: 2.96999e-06 [after_resolve]: 1.204e-05 [a_after_grad]: 1.28601e-05 [special_op_eliminate]: 8.19005e-06 [renormalize]: 0.0004735 [add_forward_monad_depend]: 3.97e-06 [auto_monad_grad]: 2.03995e-06 [auto_monad_eliminator]: 3.46199e-05 [cse]: 3.664e-05 [a_3]: 5.99601e-05 [Cycle 2]: 0.00079586, [43] [expand_dump_flag]: 1.16997e-06 [switch_simplify]: 8.98005e-06 [loop_unroll]: 7.9301e-06 [a_1]: 0.00020506 [recompute_prepare]: 7.73999e-06 [updatestate_depend_eliminate]: 5.96e-06 [updatestate_assign_eliminate]: 4.67e-06 [updatestate_loads_eliminate]: 5.89993e-06 [parameter_eliminate]: 1.56998e-06 [a_2]: 0.00010536 [accelerated_algorithm]: 8.80996e-06 [shard]: 1.24006e-06 [meta_shard_fg_expand]: 2.60992e-06 [shard_inline]: 8.23999e-06 [auto_parallel]: 1.165e-05 [parallel]: 3.86999e-06 [flash_sp]: 3.41993e-06 [merge_comm]: 6.07001e-06 [allreduce_fusion]: 4.78991e-06 [matmul_add_comm_reduction]: 8.01007e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 8.88004e-06 [virtual_dataset]: 7.43999e-06 [get_grad_eliminate_]: 7.36001e-06 [virtual_output]: 7.05e-06 [merge_forward]: 4.75999e-06 [cell_reuse_recompute_pass]: 2.03005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.56199e-05 [before_grad]: 1.275e-05 [inplace_validation]: 4.42006e-06 [meta_fg_expand]: 4.68991e-06 [inplace_validation_after_expand]: 5.46e-06 [flash_sp_send_recv_attached]: 9.50065e-07 [receive_attached]: 6.50063e-07 [after_resolve]: 9.61998e-06 [a_after_grad]: 1.208e-05 [special_op_eliminate]: 7.53999e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 1.14995e-06 [auto_monad_grad]: 1.35007e-06 [auto_monad_eliminator]: 1.847e-05 [cse]: 2.031e-05 [a_3]: 4.859e-05 [py_interpret_to_execute_after_opt_a]: 9.56003e-06 [slice_cell_reuse_recomputed_activation]: 2.33005e-06 [rewriter_after_opt_a]: 0.00014761 [convert_after_rewriter]: 9.9201e-06 [order_py_execute_after_rewriter]: 6.31995e-06 [opt_b]: 0.00024774, [1] [Cycle 1]: 0.0002423, [7] [b_1]: 0.00016489 [b_2]: 9.75991e-06 [updatestate_depend_eliminate]: 5.58002e-06 [updatestate_assign_eliminate]: 4.52995e-06 [updatestate_loads_eliminate]: 5.20993e-06 [renormalize]: 3.80096e-07 [cse]: 1.936e-05 [optimize_parallel_all_gather_comm]: 8.36002e-06 [overlap_param_gather]: 9.50065e-07 [cconv]: 2.434e-05 [loop_unroll]: 0.00059731 [opt_after_cconv]: 0.000138, [1] [Cycle 1]: 0.00013164, [7] [c_1]: 5.35899e-05 [parameter_eliminate]: 2.48e-06 [updatestate_depend_eliminate]: 9.01008e-06 [updatestate_assign_eliminate]: 4.84998e-06 [updatestate_loads_eliminate]: 5.82007e-06 [cse]: 2.22301e-05 [renormalize]: 4.20026e-07 [remove_dup_value]: 1.341e-05 [tuple_transform]: 7.16201e-05, [1] [Cycle 1]: 6.719e-05, [2] [d_1]: 5.761e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 2.12993e-06 [add_cache_embedding]: 1.487e-05 [add_recomputation]: 6.29501e-05 [cse_after_recomputation]: 2.808e-05, [1] [Cycle 1]: 2.35201e-05, [1] [cse]: 1.824e-05 [environ_conv]: 7.60006e-06 [swap_dp_allreduce_reducescatter]: 8.15e-06 [bias_add_comm_swap]: 2.64007e-06 [label_micro_interleaved_index]: 1.89e-06 [label_fine_grained_interleaved_index]: 2.30002e-06 [merge_cast_opt]: 1.21002e-06 [slice_recompute_activation]: 2.24996e-06 [micro_interleaved_order_control]: 1.94006e-06 [assign_add_opt]: 2.979e-05 [ForceFp32Comm]: 9.59961e-07 [remove_cast_before_assign_add]: 7.78993e-06 [full_micro_interleaved_order_control]: 2.15007e-06 [reorder_send_recv_between_fp_bp]: 2.63005e-06 [comm_op_add_attrs]: 2.88599e-05 [add_comm_op_reuse_tag]: 1.99e-06 [interleave_split_concat_branches]: 8.50065e-07 [interleave_parallel_branches]: 9.30042e-07 [overlap_opt_shard_in_pipeline]: 1.04995e-06 [overlap_opt_shard_grad_in_pipeline]: 2.73006e-06 [control_data_broadcast_order]: 1.11002e-06 [grouped_pairwise_exchange_alltoall]: 1.00801e-05 [offloading_packed_experts]: 2.24996e-06 [overlap_recompute_and_grad_model_parallel]: 2.11003e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89994e-07 [overlap_recompute_allgather_and_fa_grad]: 6.261e-05 [overlap_grad_ring_attention]: 2.37999e-06 [overlap_grad_flash_sp]: 1.649e-05 [begin_end_overlap_inline]: 7.29924e-07 [split_matmul_comm_elemetwise]: 1.99e-06 [split_layernorm_comm]: 2.13995e-06 [handle_group_info]: 4.54008e-06 [symbol_engine_optimizer]: 9.342e-05, [1] [Cycle 1]: 8.84801e-05, [6] [build]: 5.03997e-06 [elim_shapecalc]: 1.34801e-05 [elim_not_effective]: 1.778e-05 [opt_reshape]: 8.78994e-06 [fold_const_symbol]: 1.442e-05 [renormalize]: 3.39933e-07 [pipeline_parallel_scheduler]: 1.57999e-06 [auto_monad_reorder]: 3.491e-05 [get_jit_bprop_graph]: 4.59957e-07 [rewriter_after_jit_bprop_graph]: 4.30038e-07 [eliminate_special_op_node]: 0.0005222 [distribtued_split]: 4.328e-05 [validate]: 3.835e-05 [task_emit]: 0.0701859 [execute]: 1.211e-05 Sums bootstrap : 0.000335s : 0.43% type_inference : 0.002616s : 3.37% auto_monad : 0.000133s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000554s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000227s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000028s : 0.04% optimize.opt_a.inplace_validation : 0.000011s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000474s : 0.61% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000053s : 0.07% optimize.opt_a.cse : 0.000057s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000148s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000597s : 0.77% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000058s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000015s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000008s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000029s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000063s : 0.08% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000035s : 0.05% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000522s : 0.67% distribtued_split : 0.000043s : 0.06% validate : 0.000038s : 0.05% task_emit : 0.070186s : 90.48% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000139 63 5.03% : 0.000007s : 2: substitution.depend_value_elim 2.25% : 0.000003s : 5: substitution.elim_not_effective 1.93% : 0.000003s : 5: substitution.fold_const_symbol 5.33% : 0.000007s : 6: substitution.graph_param_transform 51.15% : 0.000071s : 1: substitution.inline 4.29% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.20% : 0.000004s : 6: substitution.load_eliminater 2.72% : 0.000004s : 2: substitution.reduce_all_const_elim 5.72% : 0.000008s : 10: substitution.remove_not_recompute_node 2.47% : 0.000003s : 2: substitution.replace_old_param 8.42% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.47% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002586 2 88.70% : 0.002294s : 1: type_inference.infer 11.30% : 0.000292s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000070 1 100.00% : 0.000070s : 1: match.inline ------[predicate.] 0.000233 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.16% : 0.000005s : 25: predicate.arithmetic_simplify 0.83% : 0.000002s : 13: predicate.cast_eliminate 0.85% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.40% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.07% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.41% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.75% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.66% : 0.000013s : 63: predicate.inline 1.08% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.34% : 0.000005s : 38: predicate.load_eliminater 1.52% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.91% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.82% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.70% : 0.000002s : 13: predicate.minmaximum_grad 0.83% : 0.000002s : 6: predicate.mutable_eliminate 0.52% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.29% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.25% : 0.000003s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.05% : 0.000002s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.43% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.02% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.19% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.96% : 0.000002s : 14: predicate.switch_defer_inline 1.60% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.76% : 0.000011s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.44% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.68% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.62% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.39% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.57% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000176 4 9.86% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.14% : 0.000159s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091108 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.02% : 0.000019s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.05% : 0.000042s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.40% : 0.000362s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.04% : 0.000033s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000051s : 1: distribtued_split 0.59% : 0.000537s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.67% : 0.000608s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001120s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.07% : 0.005527s : 1: opt_a 0.16% : 0.000143s : 1: opt_after_cconv 0.28% : 0.000251s : 1: opt_b 8.25% : 0.007518s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.07% : 0.000068s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.28% : 0.000254s : 1: renormalize.infer 0.23% : 0.000214s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000154s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000006s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000097s : 1: symbol_engine_optimizer 77.07% : 0.070216s : 1: task_emit 0.08% : 0.000075s : 1: tuple_transform 2.89% : 0.002635s : 1: type_inference 0.08% : 0.000073s : 1: validate TotalTime = 0.0826622, [21] [bootstrap]: 0.00031971 [type_inference]: 0.00244875 [auto_monad]: 0.00010583 [graph_reusing]: 1.61992e-06 [inline]: 1.05007e-06 [parallel-infer-symbol]: 1.35996e-06 [pre_auto_parallel]: 2.166e-05 [insert-virtual-dataset]: 1.92004e-06 [parallel-infer-symbol-second]: 3.89991e-07 [dataset_repeat_opt]: 7.00005e-07 [pipeline_split]: 1.01002e-06 [optimize]: 0.00732599, [52] [py_interpret_to_execute]: 1.438e-05 [rewriter_before_opt_a]: 3.119e-05 [opt_a]: 0.00550382, [2] [Cycle 1]: 0.00145977, [43] [expand_dump_flag]: 3.05998e-06 [switch_simplify]: 2.704e-05 [loop_unroll]: 1.28499e-05 [a_1]: 0.00033059 [recompute_prepare]: 9.00007e-06 [updatestate_depend_eliminate]: 7.58003e-06 [updatestate_assign_eliminate]: 5.99993e-06 [updatestate_loads_eliminate]: 6.30005e-06 [parameter_eliminate]: 2.19001e-06 [a_2]: 0.00011453 [accelerated_algorithm]: 8.6599e-06 [shard]: 1.47009e-06 [meta_shard_fg_expand]: 3.23006e-06 [shard_inline]: 8.96992e-06 [auto_parallel]: 1.093e-05 [parallel]: 5.76e-06 [flash_sp]: 7.79994e-06 [merge_comm]: 6.47991e-06 [allreduce_fusion]: 5.10993e-06 [matmul_add_comm_reduction]: 8.69995e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 9.59996e-06 [virtual_dataset]: 8.26002e-06 [get_grad_eliminate_]: 8.15e-06 [virtual_output]: 7.88004e-06 [merge_forward]: 5.49003e-06 [cell_reuse_recompute_pass]: 1.30001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.618e-05 [before_grad]: 1.44701e-05 [inplace_validation]: 4.51994e-06 [meta_fg_expand]: 5.44998e-06 [inplace_validation_after_expand]: 5.44009e-06 [flash_sp_send_recv_attached]: 2.49001e-06 [receive_attached]: 1.87999e-06 [after_resolve]: 1.029e-05 [a_after_grad]: 1.29601e-05 [special_op_eliminate]: 8.00006e-06 [renormalize]: 0.00041506 [add_forward_monad_depend]: 2.5999e-06 [auto_monad_grad]: 1.57999e-06 [auto_monad_eliminator]: 2.375e-05 [cse]: 2.545e-05 [a_3]: 5.931e-05 [Cycle 2]: 0.00078247, [43] [expand_dump_flag]: 9.39937e-07 [switch_simplify]: 9.47993e-06 [loop_unroll]: 8.10006e-06 [a_1]: 0.00020449 [recompute_prepare]: 7.61996e-06 [updatestate_depend_eliminate]: 5.48002e-06 [updatestate_assign_eliminate]: 4.71994e-06 [updatestate_loads_eliminate]: 5.29992e-06 [parameter_eliminate]: 9.69972e-07 [a_2]: 0.00010544 [accelerated_algorithm]: 8.37003e-06 [shard]: 9.30042e-07 [meta_shard_fg_expand]: 2.68e-06 [shard_inline]: 7.65e-06 [auto_parallel]: 1.015e-05 [parallel]: 3.02994e-06 [flash_sp]: 2.59001e-06 [merge_comm]: 5.88002e-06 [allreduce_fusion]: 5.01005e-06 [matmul_add_comm_reduction]: 7.50995e-06 [allreduce_slice_to_reducescatter]: 2.49944e-07 [virtual_shard_identity]: 8.82999e-06 [virtual_dataset]: 7.70995e-06 [get_grad_eliminate_]: 7.55e-06 [virtual_output]: 7.29004e-06 [merge_forward]: 4.25999e-06 [cell_reuse_recompute_pass]: 1.67999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50299e-05 [before_grad]: 1.26101e-05 [inplace_validation]: 4.28001e-06 [meta_fg_expand]: 4.87e-06 [inplace_validation_after_expand]: 5.31995e-06 [flash_sp_send_recv_attached]: 1.0099e-06 [receive_attached]: 7.69971e-07 [after_resolve]: 9.46003e-06 [a_after_grad]: 1.19e-05 [special_op_eliminate]: 7.29004e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 8.30041e-07 [auto_monad_grad]: 1.04995e-06 [auto_monad_eliminator]: 1.67e-05 [cse]: 1.886e-05 [a_3]: 4.886e-05 [py_interpret_to_execute_after_opt_a]: 8.95001e-06 [slice_cell_reuse_recomputed_activation]: 1.75007e-06 [rewriter_after_opt_a]: 0.00013397 [convert_after_rewriter]: 8.1301e-06 [order_py_execute_after_rewriter]: 5.88002e-06 [opt_b]: 0.00024183, [1] [Cycle 1]: 0.00023676, [7] [b_1]: 0.00016383 [b_2]: 1.03101e-05 [updatestate_depend_eliminate]: 4.9501e-06 [updatestate_assign_eliminate]: 4.08001e-06 [updatestate_loads_eliminate]: 4.8799e-06 [renormalize]: 2.79979e-07 [cse]: 1.78999e-05 [optimize_parallel_all_gather_comm]: 7.60006e-06 [overlap_param_gather]: 1.16997e-06 [cconv]: 1.532e-05 [loop_unroll]: 0.00048957 [opt_after_cconv]: 0.00013219, [1] [Cycle 1]: 0.00012656, [7] [c_1]: 5.382e-05 [parameter_eliminate]: 1.70001e-06 [updatestate_depend_eliminate]: 7.48993e-06 [updatestate_assign_eliminate]: 4.80004e-06 [updatestate_loads_eliminate]: 5.11995e-06 [cse]: 2.112e-05 [renormalize]: 3.30037e-07 [remove_dup_value]: 1.02399e-05 [tuple_transform]: 6.945e-05, [1] [Cycle 1]: 6.526e-05, [2] [d_1]: 5.56e-05 [renormalize]: 2.00002e-07 [partial_unused_args_eliminate]: 1.68011e-06 [add_cache_embedding]: 5.91499e-05 [add_recomputation]: 5.60201e-05 [cse_after_recomputation]: 2.774e-05, [1] [Cycle 1]: 2.246e-05, [1] [cse]: 1.729e-05 [environ_conv]: 7.17002e-06 [swap_dp_allreduce_reducescatter]: 7.10005e-06 [bias_add_comm_swap]: 1.97999e-06 [label_micro_interleaved_index]: 1.24006e-06 [label_fine_grained_interleaved_index]: 1.04995e-06 [merge_cast_opt]: 8.10018e-07 [slice_recompute_activation]: 1.03994e-06 [micro_interleaved_order_control]: 1.36008e-06 [assign_add_opt]: 2.41101e-05 [ForceFp32Comm]: 5.60074e-07 [remove_cast_before_assign_add]: 6.37001e-06 [full_micro_interleaved_order_control]: 1.07009e-06 [reorder_send_recv_between_fp_bp]: 1.06008e-06 [comm_op_add_attrs]: 2.23301e-05 [add_comm_op_reuse_tag]: 1.51002e-06 [interleave_split_concat_branches]: 7.69971e-07 [interleave_parallel_branches]: 7.00005e-07 [overlap_opt_shard_in_pipeline]: 1.13994e-06 [overlap_opt_shard_grad_in_pipeline]: 1.35996e-06 [control_data_broadcast_order]: 6.79982e-07 [grouped_pairwise_exchange_alltoall]: 6.27001e-06 [offloading_packed_experts]: 1.13994e-06 [overlap_recompute_and_grad_model_parallel]: 1.11002e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.69969e-07 [overlap_recompute_allgather_and_fa_grad]: 6.71099e-05 [overlap_grad_ring_attention]: 1.22993e-06 [overlap_grad_flash_sp]: 1.183e-05 [begin_end_overlap_inline]: 4.49945e-07 [split_matmul_comm_elemetwise]: 1.32993e-06 [split_layernorm_comm]: 1.03004e-06 [handle_group_info]: 3.46999e-06 [symbol_engine_optimizer]: 8.91801e-05, [1] [Cycle 1]: 8.441e-05, [6] [build]: 4.08001e-06 [elim_shapecalc]: 1.31e-05 [elim_not_effective]: 1.689e-05 [opt_reshape]: 9.00996e-06 [fold_const_symbol]: 1.422e-05 [renormalize]: 2.20025e-07 [pipeline_parallel_scheduler]: 9.10019e-07 [auto_monad_reorder]: 2.39901e-05 [get_jit_bprop_graph]: 3.39933e-07 [rewriter_after_jit_bprop_graph]: 3.29921e-07 [eliminate_special_op_node]: 0.00051998 [distribtued_split]: 3.337e-05 [validate]: 3.054e-05 [task_emit]: 0.0715777 [execute]: 9.54e-06 Sums bootstrap : 0.000320s : 0.41% type_inference : 0.002449s : 3.12% auto_monad : 0.000106s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000014s : 0.02% optimize.rewriter_before_opt_a : 0.000031s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000037s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000535s : 0.68% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000220s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000003s : 0.00% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000415s : 0.53% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000134s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000490s : 0.62% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000059s : 0.08% optimize.add_recomputation : 0.000056s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000024s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000022s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000067s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000520s : 0.66% distribtued_split : 0.000033s : 0.04% validate : 0.000031s : 0.04% task_emit : 0.071578s : 91.30% execute : 0.000010s : 0.01% TotalTime = 0.0829098, [21] [bootstrap]: 0.00032878 [type_inference]: 0.00265792 [auto_monad]: 0.00013575 [graph_reusing]: 3.04997e-06 [inline]: 1.46998e-06 [parallel-infer-symbol]: 2.51993e-06 [pre_auto_parallel]: 2.67e-05 [insert-virtual-dataset]: 2.96999e-06 [parallel-infer-symbol-second]: 6.89994e-07 [dataset_repeat_opt]: 1.33994e-06 [pipeline_split]: 1.8701e-06 [optimize]: 0.00780129, [52] [py_interpret_to_execute]: 1.645e-05 [rewriter_before_opt_a]: 3.67999e-05 [opt_a]: 0.00589113, [2] [Cycle 1]: 0.00156107, [43] [expand_dump_flag]: 3.79002e-06 [switch_simplify]: 3.022e-05 [loop_unroll]: 1.30499e-05 [a_1]: 0.00034701 [recompute_prepare]: 8.71008e-06 [updatestate_depend_eliminate]: 8.74e-06 [updatestate_assign_eliminate]: 6.48992e-06 [updatestate_loads_eliminate]: 7.38993e-06 [parameter_eliminate]: 3.01003e-06 [a_2]: 0.00011782 [accelerated_algorithm]: 8.45001e-06 [shard]: 2.20002e-06 [meta_shard_fg_expand]: 4.01994e-06 [shard_inline]: 8.31007e-06 [auto_parallel]: 1.21e-05 [parallel]: 7.51007e-06 [flash_sp]: 1.12101e-05 [merge_comm]: 7.79005e-06 [allreduce_fusion]: 5.74999e-06 [matmul_add_comm_reduction]: 1.129e-05 [allreduce_slice_to_reducescatter]: 9.69972e-07 [virtual_shard_identity]: 9.37004e-06 [virtual_dataset]: 8.18004e-06 [get_grad_eliminate_]: 8.12009e-06 [virtual_output]: 8.37992e-06 [merge_forward]: 6.55011e-06 [cell_reuse_recompute_pass]: 1.71002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.713e-05 [before_grad]: 1.41701e-05 [inplace_validation]: 5.15999e-06 [meta_fg_expand]: 5.51005e-06 [inplace_validation_after_expand]: 6.12007e-06 [flash_sp_send_recv_attached]: 5.4501e-06 [receive_attached]: 2.85008e-06 [after_resolve]: 1.273e-05 [a_after_grad]: 1.258e-05 [special_op_eliminate]: 7.91997e-06 [renormalize]: 0.00044991 [add_forward_monad_depend]: 3.88001e-06 [auto_monad_grad]: 2.14996e-06 [auto_monad_eliminator]: 3.101e-05 [cse]: 3.406e-05 [a_3]: 5.85e-05 [Cycle 2]: 0.00079106, [43] [expand_dump_flag]: 1.00001e-06 [switch_simplify]: 8.88994e-06 [loop_unroll]: 7.63999e-06 [a_1]: 0.00020359 [recompute_prepare]: 7.52008e-06 [updatestate_depend_eliminate]: 5.98002e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.59993e-06 [parameter_eliminate]: 1.27999e-06 [a_2]: 0.00010525 [accelerated_algorithm]: 8.13999e-06 [shard]: 1.2099e-06 [meta_shard_fg_expand]: 2.34996e-06 [shard_inline]: 8.22009e-06 [auto_parallel]: 1.11599e-05 [parallel]: 3.4601e-06 [flash_sp]: 3.32005e-06 [merge_comm]: 6.14009e-06 [allreduce_fusion]: 4.95999e-06 [matmul_add_comm_reduction]: 8.06001e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 9.30997e-06 [virtual_dataset]: 8.02008e-06 [get_grad_eliminate_]: 7.68993e-06 [virtual_output]: 7.18993e-06 [merge_forward]: 4.75009e-06 [cell_reuse_recompute_pass]: 1.97999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.573e-05 [before_grad]: 1.233e-05 [inplace_validation]: 4.43996e-06 [meta_fg_expand]: 5.03997e-06 [inplace_validation_after_expand]: 5.48002e-06 [flash_sp_send_recv_attached]: 8.79983e-07 [receive_attached]: 8.10018e-07 [after_resolve]: 9.64e-06 [a_after_grad]: 1.189e-05 [special_op_eliminate]: 7.50995e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 1.07998e-06 [auto_monad_grad]: 1.23004e-06 [auto_monad_eliminator]: 1.859e-05 [cse]: 2.03201e-05 [a_3]: 4.91301e-05 [py_interpret_to_execute_after_opt_a]: 9.20997e-06 [slice_cell_reuse_recomputed_activation]: 2.35008e-06 [rewriter_after_opt_a]: 0.00015 [convert_after_rewriter]: 1.033e-05 [order_py_execute_after_rewriter]: 5.70004e-06 [opt_b]: 0.00024477, [1] [Cycle 1]: 0.00023906, [7] [b_1]: 0.00016336 [b_2]: 1.029e-05 [updatestate_depend_eliminate]: 5.32006e-06 [updatestate_assign_eliminate]: 4.35999e-06 [updatestate_loads_eliminate]: 5.18002e-06 [renormalize]: 2.00002e-07 [cse]: 1.928e-05 [optimize_parallel_all_gather_comm]: 8.3301e-06 [overlap_param_gather]: 1.52003e-06 [cconv]: 2.404e-05 [loop_unroll]: 0.00052709 [opt_after_cconv]: 0.00013687, [1] [Cycle 1]: 0.00013073, [7] [c_1]: 5.364e-05 [parameter_eliminate]: 2.32994e-06 [updatestate_depend_eliminate]: 8.16991e-06 [updatestate_assign_eliminate]: 4.77e-06 [updatestate_loads_eliminate]: 5.37001e-06 [cse]: 2.31001e-05 [renormalize]: 4.00003e-07 [remove_dup_value]: 1.394e-05 [tuple_transform]: 6.944e-05, [1] [Cycle 1]: 6.497e-05, [2] [d_1]: 5.55699e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 2.25997e-06 [add_cache_embedding]: 1.35399e-05 [add_recomputation]: 6.20199e-05 [cse_after_recomputation]: 2.804e-05, [1] [Cycle 1]: 2.278e-05, [1] [cse]: 1.778e-05 [environ_conv]: 7.56991e-06 [swap_dp_allreduce_reducescatter]: 7.41996e-06 [bias_add_comm_swap]: 2.31003e-06 [label_micro_interleaved_index]: 1.86998e-06 [label_fine_grained_interleaved_index]: 2.02004e-06 [merge_cast_opt]: 1.32993e-06 [slice_recompute_activation]: 1.82004e-06 [micro_interleaved_order_control]: 2.17999e-06 [assign_add_opt]: 2.943e-05 [ForceFp32Comm]: 8.29925e-07 [remove_cast_before_assign_add]: 6.88992e-06 [full_micro_interleaved_order_control]: 2.09e-06 [reorder_send_recv_between_fp_bp]: 2.07999e-06 [comm_op_add_attrs]: 2.84601e-05 [add_comm_op_reuse_tag]: 2.11003e-06 [interleave_split_concat_branches]: 1.55007e-06 [interleave_parallel_branches]: 9.2003e-07 [overlap_opt_shard_in_pipeline]: 1.60001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.54996e-06 [control_data_broadcast_order]: 1.22003e-06 [grouped_pairwise_exchange_alltoall]: 9.09006e-06 [offloading_packed_experts]: 2.55008e-06 [overlap_recompute_and_grad_model_parallel]: 2.01992e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.13004e-06 [overlap_recompute_allgather_and_fa_grad]: 7.343e-05 [overlap_grad_ring_attention]: 2.05997e-06 [overlap_grad_flash_sp]: 1.63499e-05 [begin_end_overlap_inline]: 8.89995e-07 [split_matmul_comm_elemetwise]: 2.00002e-06 [split_layernorm_comm]: 1.92993e-06 [handle_group_info]: 5.20004e-06 [symbol_engine_optimizer]: 9.11801e-05, [1] [Cycle 1]: 8.61801e-05, [6] [build]: 4.92996e-06 [elim_shapecalc]: 1.318e-05 [elim_not_effective]: 1.71399e-05 [opt_reshape]: 8.78004e-06 [fold_const_symbol]: 1.40499e-05 [renormalize]: 3.39933e-07 [pipeline_parallel_scheduler]: 1.56998e-06 [auto_monad_reorder]: 3.138e-05 [get_jit_bprop_graph]: 4.80097e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00052776 [distribtued_split]: 4.17299e-05 [validate]: 3.55199e-05 [task_emit]: 0.0710564 [execute]: 1.092e-05 Sums bootstrap : 0.000329s : 0.42% type_inference : 0.002658s : 3.39% auto_monad : 0.000136s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000551s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000450s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000150s : 0.19% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000527s : 0.67% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000002s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000073s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000528s : 0.67% distribtued_split : 0.000042s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.071056s : 90.68% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000111 63 4.11% : 0.000005s : 2: substitution.depend_value_elim 2.15% : 0.000002s : 5: substitution.elim_not_effective 2.31% : 0.000003s : 5: substitution.fold_const_symbol 5.83% : 0.000006s : 6: substitution.graph_param_transform 48.88% : 0.000054s : 1: substitution.inline 4.72% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.57% : 0.000004s : 6: substitution.load_eliminater 2.45% : 0.000003s : 2: substitution.reduce_all_const_elim 6.41% : 0.000007s : 10: substitution.remove_not_recompute_node 2.12% : 0.000002s : 2: substitution.replace_old_param 9.23% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.22% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002424 2 90.13% : 0.002185s : 1: type_inference.infer 9.87% : 0.000239s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000232 1420 0.88% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.93% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.45% : 0.000006s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.23% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.86% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.20% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.04% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_depend_swap 1.95% : 0.000005s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.88% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.21% : 0.000003s : 14: predicate.float_depend_g_call 0.68% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.89% : 0.000002s : 12: predicate.get_grad_eliminate 0.30% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.39% : 0.000013s : 63: predicate.inline 1.12% : 0.000003s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.14% : 0.000003s : 12: predicate.less_batch_normalization 1.83% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000006s : 38: predicate.load_eliminater 1.37% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.70% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.83% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.70% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.55% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.12% : 0.000003s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.05% : 0.000002s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.22% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.57% : 0.000001s : 6: predicate.row_tensor_eliminate 0.99% : 0.000002s : 12: predicate.same_eliminate 0.44% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.38% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 1.02% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.89% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.41% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.61% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.48% : 0.000010s : 43: predicate.switch_simplify 0.85% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.44% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.59% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.47% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000146 4 7.20% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.80% : 0.000135s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091767 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.07% : 0.000064s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000061s : 1: add_recomputation 0.03% : 0.000028s : 1: assign_add_opt 0.13% : 0.000117s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000341s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.03% : 0.000026s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000041s : 1: distribtued_split 0.58% : 0.000533s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000498s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.19% : 0.001089s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.00% : 0.005508s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.27% : 0.000245s : 1: opt_b 7.99% : 0.007334s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000072s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.24% : 0.000221s : 1: renormalize.infer 0.21% : 0.000189s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000140s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 78.03% : 0.071607s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.69% : 0.002466s : 1: type_inference 0.07% : 0.000064s : 1: validate Time group info: ------[substitution.] 0.000134 63 4.61% : 0.000006s : 2: substitution.depend_value_elim 2.03% : 0.000003s : 5: substitution.elim_not_effective 1.90% : 0.000003s : 5: substitution.fold_const_symbol 5.40% : 0.000007s : 6: substitution.graph_param_transform 51.29% : 0.000069s : 1: substitution.inline 3.82% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.04% : 0.000004s : 6: substitution.load_eliminater 2.92% : 0.000004s : 2: substitution.reduce_all_const_elim 5.75% : 0.000008s : 10: substitution.remove_not_recompute_node 2.95% : 0.000004s : 2: substitution.replace_old_param 8.46% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.82% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002628 2 89.29% : 0.002347s : 1: type_inference.infer 10.71% : 0.000282s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000229 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.22% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.87% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.30% : 0.000005s : 25: predicate.arithmetic_simplify 0.94% : 0.000002s : 13: predicate.cast_eliminate 0.77% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.40% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.37% : 0.000001s : 6: predicate.elim_not_effective 0.51% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.26% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.04% : 0.000002s : 19: predicate.environ_get_depend_swap 2.06% : 0.000005s : 31: predicate.environ_get_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.10% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.92% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.81% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.66% : 0.000013s : 63: predicate.inline 1.07% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.09% : 0.000002s : 12: predicate.less_batch_normalization 1.74% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.49% : 0.000006s : 38: predicate.load_eliminater 1.29% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.17% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.83% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.85% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.94% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.44% : 0.000001s : 6: predicate.parallel_virtual_node 1.10% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.12% : 0.000003s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.78% : 0.000002s : 13: predicate.reshape_eliminate 0.77% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.41% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.09% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.28% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.34% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.89% : 0.000002s : 13: predicate.transpose_eliminate 1.77% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.68% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.41% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.83% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000172 4 9.87% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.13% : 0.000155s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092555 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000148s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000351s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.59% : 0.000542s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.01% : 0.000005s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.58% : 0.000537s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.20% : 0.001112s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 6.37% : 0.005895s : 1: opt_a 0.15% : 0.000142s : 1: opt_after_cconv 0.27% : 0.000248s : 1: opt_b 8.44% : 0.007811s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.09% : 0.000079s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.26% : 0.000240s : 1: renormalize.infer 0.22% : 0.000203s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000156s : 1: rewriter_after_opt_a 0.04% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 76.80% : 0.071086s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.89% : 0.002676s : 1: type_inference 0.08% : 0.000070s : 1: validate [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:29.908.176 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:29.908.547 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:29.908.700 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:29.908.900 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:29.908.900 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:29.908.966 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:29.909.088 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:29.909.367 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.0803143, [21] [bootstrap]: 0.00028861 [type_inference]: 0.00231162 [auto_monad]: 0.00010393 [graph_reusing]: 1.85997e-06 [inline]: 1.15996e-06 [parallel-infer-symbol]: 1.76008e-06 [pre_auto_parallel]: 2.166e-05 [insert-virtual-dataset]: 2.22004e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 8.49948e-07 [pipeline_split]: 9.89996e-07 [optimize]: 0.00694787, [52] [py_interpret_to_execute]: 1.309e-05 [rewriter_before_opt_a]: 3.23399e-05 [opt_a]: 0.00518889, [2] [Cycle 1]: 0.00150766, [43] [expand_dump_flag]: 2.76999e-06 [switch_simplify]: 2.563e-05 [loop_unroll]: 1.29801e-05 [a_1]: 0.00037128 [recompute_prepare]: 8.87003e-06 [updatestate_depend_eliminate]: 8.02998e-06 [updatestate_assign_eliminate]: 5.82996e-06 [updatestate_loads_eliminate]: 6.38003e-06 [parameter_eliminate]: 2.46998e-06 [a_2]: 0.00011486 [accelerated_algorithm]: 8.31997e-06 [shard]: 1.52993e-06 [meta_shard_fg_expand]: 3.19991e-06 [shard_inline]: 8.27003e-06 [auto_parallel]: 1.106e-05 [parallel]: 5.30994e-06 [flash_sp]: 8.03999e-06 [merge_comm]: 6.81006e-06 [allreduce_fusion]: 5.51995e-06 [matmul_add_comm_reduction]: 9.31998e-06 [allreduce_slice_to_reducescatter]: 3.59956e-07 [virtual_shard_identity]: 9.56003e-06 [virtual_dataset]: 8.42998e-06 [get_grad_eliminate_]: 7.72998e-06 [virtual_output]: 7.61007e-06 [merge_forward]: 5.25999e-06 [cell_reuse_recompute_pass]: 1.76998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.691e-05 [before_grad]: 1.415e-05 [inplace_validation]: 4.59992e-06 [meta_fg_expand]: 5.46e-06 [inplace_validation_after_expand]: 6.18002e-06 [flash_sp_send_recv_attached]: 3.45008e-06 [receive_attached]: 1.87999e-06 [after_resolve]: 1.095e-05 [a_after_grad]: 1.266e-05 [special_op_eliminate]: 7.87003e-06 [renormalize]: 0.00042102 [add_forward_monad_depend]: 2.82004e-06 [auto_monad_grad]: 1.53994e-06 [auto_monad_eliminator]: 2.507e-05 [cse]: 2.61001e-05 [a_3]: 5.811e-05 [Cycle 2]: 0.00078757, [43] [expand_dump_flag]: 9.50065e-07 [switch_simplify]: 9.16002e-06 [loop_unroll]: 8.23999e-06 [a_1]: 0.00020234 [recompute_prepare]: 7.80006e-06 [updatestate_depend_eliminate]: 6.16e-06 [updatestate_assign_eliminate]: 4.67e-06 [updatestate_loads_eliminate]: 5.43997e-06 [parameter_eliminate]: 1.12003e-06 [a_2]: 0.00010502 [accelerated_algorithm]: 8.64e-06 [shard]: 1.27009e-06 [meta_shard_fg_expand]: 2.82004e-06 [shard_inline]: 8.19995e-06 [auto_parallel]: 9.91998e-06 [parallel]: 3.19001e-06 [flash_sp]: 2.75997e-06 [merge_comm]: 6.39004e-06 [allreduce_fusion]: 5.24998e-06 [matmul_add_comm_reduction]: 7.43999e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 9.12999e-06 [virtual_dataset]: 7.50995e-06 [get_grad_eliminate_]: 7.25e-06 [virtual_output]: 7.36001e-06 [merge_forward]: 4.37989e-06 [cell_reuse_recompute_pass]: 1.78011e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.558e-05 [before_grad]: 1.233e-05 [inplace_validation]: 4.27e-06 [meta_fg_expand]: 4.81005e-06 [inplace_validation_after_expand]: 5.23997e-06 [flash_sp_send_recv_attached]: 7.89994e-07 [receive_attached]: 7.40052e-07 [after_resolve]: 9.37004e-06 [a_after_grad]: 1.163e-05 [special_op_eliminate]: 7.38003e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 8.89995e-07 [auto_monad_grad]: 9.50065e-07 [auto_monad_eliminator]: 1.558e-05 [cse]: 1.838e-05 [a_3]: 4.955e-05 [py_interpret_to_execute_after_opt_a]: 8.76002e-06 [slice_cell_reuse_recomputed_activation]: 1.50001e-06 [rewriter_after_opt_a]: 0.00013626 [convert_after_rewriter]: 8.40996e-06 [order_py_execute_after_rewriter]: 5.94999e-06 [opt_b]: 0.00024332, [1] [Cycle 1]: 0.00023787, [7] [b_1]: 0.00016247 [b_2]: 9.95002e-06 [updatestate_depend_eliminate]: 5.07e-06 [updatestate_assign_eliminate]: 4.31004e-06 [updatestate_loads_eliminate]: 5.27001e-06 [renormalize]: 2.5006e-07 [cse]: 1.791e-05 [optimize_parallel_all_gather_comm]: 8.38004e-06 [overlap_param_gather]: 8.10018e-07 [cconv]: 1.731e-05 [loop_unroll]: 0.00048019 [opt_after_cconv]: 0.00013053, [1] [Cycle 1]: 0.00012461, [7] [c_1]: 5.114e-05 [parameter_eliminate]: 1.69e-06 [updatestate_depend_eliminate]: 7.75e-06 [updatestate_assign_eliminate]: 5.19003e-06 [updatestate_loads_eliminate]: 5.22006e-06 [cse]: 2.02301e-05 [renormalize]: 3.49944e-07 [remove_dup_value]: 1.082e-05 [tuple_transform]: 7.00201e-05, [1] [Cycle 1]: 6.55301e-05, [2] [d_1]: 5.60899e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.91003e-06 [add_cache_embedding]: 1.156e-05 [add_recomputation]: 5.324e-05 [cse_after_recomputation]: 2.713e-05, [1] [Cycle 1]: 2.294e-05, [1] [cse]: 1.76399e-05 [environ_conv]: 5.88992e-06 [swap_dp_allreduce_reducescatter]: 6.81006e-06 [bias_add_comm_swap]: 1.74006e-06 [label_micro_interleaved_index]: 1.1801e-06 [label_fine_grained_interleaved_index]: 1.15007e-06 [merge_cast_opt]: 7.10017e-07 [slice_recompute_activation]: 1.36998e-06 [micro_interleaved_order_control]: 1.36008e-06 [assign_add_opt]: 2.607e-05 [ForceFp32Comm]: 6.10016e-07 [remove_cast_before_assign_add]: 6.32997e-06 [full_micro_interleaved_order_control]: 1.41002e-06 [reorder_send_recv_between_fp_bp]: 1.11002e-06 [comm_op_add_attrs]: 2.167e-05 [add_comm_op_reuse_tag]: 1.45007e-06 [interleave_split_concat_branches]: 5.89993e-07 [interleave_parallel_branches]: 5.79981e-07 [overlap_opt_shard_in_pipeline]: 1.27999e-06 [overlap_opt_shard_grad_in_pipeline]: 1.44995e-06 [control_data_broadcast_order]: 6.6997e-07 [grouped_pairwise_exchange_alltoall]: 7.05e-06 [offloading_packed_experts]: 1.26997e-06 [overlap_recompute_and_grad_model_parallel]: 1.23994e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.59958e-07 [overlap_recompute_allgather_and_fa_grad]: 6.63e-05 [overlap_grad_ring_attention]: 1.73994e-06 [overlap_grad_flash_sp]: 1.282e-05 [begin_end_overlap_inline]: 4.89992e-07 [split_matmul_comm_elemetwise]: 1.24995e-06 [split_layernorm_comm]: 1.13994e-06 [handle_group_info]: 3.30992e-06 [symbol_engine_optimizer]: 8.86701e-05, [1] [Cycle 1]: 8.43799e-05, [6] [build]: 4.89992e-06 [elim_shapecalc]: 1.273e-05 [elim_not_effective]: 1.63399e-05 [opt_reshape]: 8.69005e-06 [fold_const_symbol]: 1.361e-05 [renormalize]: 3.39933e-07 [pipeline_parallel_scheduler]: 9.60077e-07 [auto_monad_reorder]: 2.388e-05 [get_jit_bprop_graph]: 3.30037e-07 [rewriter_after_jit_bprop_graph]: 3.00002e-07 [eliminate_special_op_node]: 0.00051112 [distribtued_split]: 3.386e-05 [validate]: 3.03801e-05 [task_emit]: 0.069789 [execute]: 9.59996e-06 Sums bootstrap : 0.000289s : 0.38% type_inference : 0.002312s : 3.02% auto_monad : 0.000104s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000574s : 0.75% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000421s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000136s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000480s : 0.63% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000053s : 0.07% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000026s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000022s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000066s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000511s : 0.67% distribtued_split : 0.000034s : 0.04% validate : 0.000030s : 0.04% task_emit : 0.069789s : 91.32% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000112 63 4.17% : 0.000005s : 2: substitution.depend_value_elim 2.15% : 0.000002s : 5: substitution.elim_not_effective 2.08% : 0.000002s : 5: substitution.fold_const_symbol 5.51% : 0.000006s : 6: substitution.graph_param_transform 49.07% : 0.000055s : 1: substitution.inline 4.59% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.53% : 0.000004s : 6: substitution.load_eliminater 2.57% : 0.000003s : 2: substitution.reduce_all_const_elim 6.76% : 0.000008s : 10: substitution.remove_not_recompute_node 2.56% : 0.000003s : 2: substitution.replace_old_param 9.19% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.81% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002288 2 89.94% : 0.002058s : 1: type_inference.infer 10.06% : 0.000230s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000054 1 100.00% : 0.000054s : 1: match.inline ------[predicate.] 0.000229 1420 0.85% : 0.000002s : 13: predicate.accumulaten_eliminater 1.24% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.18% : 0.000005s : 25: predicate.arithmetic_simplify 0.81% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000000s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.26% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.85% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 1.00% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.20% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.19% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.23% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.78% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.81% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.56% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.73% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.56% : 0.000006s : 38: predicate.load_eliminater 1.21% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.25% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.78% : 0.000002s : 12: predicate.reduce_all_const_elim 1.02% : 0.000002s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.94% : 0.000002s : 12: predicate.shard_identity_eliminate 1.39% : 0.000003s : 18: predicate.special_op_eliminate 0.89% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.73% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.23% : 0.000010s : 43: predicate.switch_simplify 0.82% : 0.000002s : 13: predicate.tile_eliminate 0.85% : 0.000002s : 13: predicate.transpose_eliminate 1.83% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.65% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.73% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.37% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.61% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000133 4 9.12% : 0.000012s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.88% : 0.000121s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089085 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000058s : 1: add_recomputation 0.03% : 0.000030s : 1: assign_add_opt 0.13% : 0.000116s : 1: auto_monad 0.03% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.35% : 0.000314s : 1: bootstrap 0.02% : 0.000021s : 1: cconv 0.03% : 0.000026s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.59% : 0.000524s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000490s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.26% : 0.001123s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.83% : 0.005193s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.28% : 0.000246s : 1: opt_b 7.81% : 0.006957s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000072s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000028s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.25% : 0.000223s : 1: renormalize.infer 0.22% : 0.000193s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000142s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 78.37% : 0.069815s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.61% : 0.002328s : 1: type_inference 0.07% : 0.000063s : 1: validate TotalTime = 0.0813522, [21] [bootstrap]: 0.00031758 [type_inference]: 0.00254381 [auto_monad]: 0.00013103 [graph_reusing]: 1.42003e-06 [inline]: 9.89996e-07 [parallel-infer-symbol]: 1.07998e-06 [pre_auto_parallel]: 2.253e-05 [insert-virtual-dataset]: 2.50002e-06 [parallel-infer-symbol-second]: 3.70084e-07 [dataset_repeat_opt]: 1.23994e-06 [pipeline_split]: 1.05996e-06 [optimize]: 0.00725428, [52] [py_interpret_to_execute]: 1.47499e-05 [rewriter_before_opt_a]: 3.279e-05 [opt_a]: 0.0053831, [2] [Cycle 1]: 0.00154727, [43] [expand_dump_flag]: 3.60992e-06 [switch_simplify]: 2.993e-05 [loop_unroll]: 1.32499e-05 [a_1]: 0.00035023 [recompute_prepare]: 9.47993e-06 [updatestate_depend_eliminate]: 8.62998e-06 [updatestate_assign_eliminate]: 5.79993e-06 [updatestate_loads_eliminate]: 7.09994e-06 [parameter_eliminate]: 3.38e-06 [a_2]: 0.00011733 [accelerated_algorithm]: 8.74e-06 [shard]: 1.82004e-06 [meta_shard_fg_expand]: 3.92995e-06 [shard_inline]: 8.58994e-06 [auto_parallel]: 1.248e-05 [parallel]: 7.72998e-06 [flash_sp]: 1.103e-05 [merge_comm]: 8.07003e-06 [allreduce_fusion]: 5.58002e-06 [matmul_add_comm_reduction]: 1.042e-05 [allreduce_slice_to_reducescatter]: 3.40049e-07 [virtual_shard_identity]: 1.007e-05 [virtual_dataset]: 8.20006e-06 [get_grad_eliminate_]: 7.87003e-06 [virtual_output]: 7.69994e-06 [merge_forward]: 6.08002e-06 [cell_reuse_recompute_pass]: 1.62004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.711e-05 [before_grad]: 1.39499e-05 [inplace_validation]: 5.22996e-06 [meta_fg_expand]: 5.46e-06 [inplace_validation_after_expand]: 5.99003e-06 [flash_sp_send_recv_attached]: 5.24998e-06 [receive_attached]: 2.39001e-06 [after_resolve]: 1.22901e-05 [a_after_grad]: 1.272e-05 [special_op_eliminate]: 7.98993e-06 [renormalize]: 0.00043972 [add_forward_monad_depend]: 3.80992e-06 [auto_monad_grad]: 1.85997e-06 [auto_monad_eliminator]: 3.199e-05 [cse]: 3.388e-05 [a_3]: 5.904e-05 [Cycle 2]: 0.00077918, [43] [expand_dump_flag]: 1.47999e-06 [switch_simplify]: 8.8599e-06 [loop_unroll]: 7.59005e-06 [a_1]: 0.00020262 [recompute_prepare]: 7.76001e-06 [updatestate_depend_eliminate]: 6.19993e-06 [updatestate_assign_eliminate]: 4.7799e-06 [updatestate_loads_eliminate]: 5.38002e-06 [parameter_eliminate]: 1.72993e-06 [a_2]: 0.00010546 [accelerated_algorithm]: 8.15e-06 [shard]: 1.17999e-06 [meta_shard_fg_expand]: 2.36998e-06 [shard_inline]: 7.82998e-06 [auto_parallel]: 1.086e-05 [parallel]: 3.52005e-06 [flash_sp]: 3.30003e-06 [merge_comm]: 5.99003e-06 [allreduce_fusion]: 4.84008e-06 [matmul_add_comm_reduction]: 8.25e-06 [allreduce_slice_to_reducescatter]: 2.39932e-07 [virtual_shard_identity]: 8.87993e-06 [virtual_dataset]: 7.69994e-06 [get_grad_eliminate_]: 7.43009e-06 [virtual_output]: 7.27002e-06 [merge_forward]: 4.72006e-06 [cell_reuse_recompute_pass]: 1.74006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.521e-05 [before_grad]: 1.25699e-05 [inplace_validation]: 4.30003e-06 [meta_fg_expand]: 4.84998e-06 [inplace_validation_after_expand]: 5.29003e-06 [flash_sp_send_recv_attached]: 9.69972e-07 [receive_attached]: 6.89994e-07 [after_resolve]: 9.92999e-06 [a_after_grad]: 1.173e TotalTime = 0.0813521, [21] [bootstrap]: 0.00031698 [type_inference]: 0.00254388 [auto_monad]: 0.00012807 [graph_reusing]: 2.13005e-06 [inline]: 1.12003e-06 [parallel-infer-symbol]: 1.82004e-06 [pre_auto_parallel]: 2.333e-05 [insert-virtual-dataset]: 2.11003e-06 [parallel-infer-symbol-second]: 4.10015e-07 [dataset_repeat_opt]: 6.80098e-07 [pipeline_split]: 1.36998e-06 [optimize]: 0.00725457, [52] [py_interpret_to_execute]: 1.47e-05 [rewriter_before_opt_a]: 3.335e-05 [opt_a]: 0.00538461, [2] [Cycle 1]: 0.00154356, [43] [expand_dump_flag]: 2.10002e-06 [switch_simplify]: 2.957e-05 [loop_unroll]: 1.312e-05 [a_1]: 0.00035005 [recompute_prepare]: 9.25001e-06 [updatestate_depend_eliminate]: 8.15e-06 [updatestate_assign_eliminate]: 5.44998e-06 [updatestate_loads_eliminate]: 7.53009e-06 [parameter_eliminate]: 3.40992e-06 [a_2]: 0.00011744 [accelerated_algorithm]: 8.65001e-06 [shard]: 2.00002e-06 [meta_shard_fg_expand]: 3.84997e-06 [shard_inline]: 8.81008e-06 [auto_parallel]: 1.21499e-05 [parallel]: 7.88004e-06 [flash_sp]: 1.093e-05 [merge_comm]: 8.00996e-06 [allreduce_fusion]: 5.22006e-06 [matmul_add_comm_reduction]: 1.12399e-05 [allreduce_slice_to_reducescatter]: 5.20027e-07 [virtual_shard_identity]: 9.60997e-06 [virtual_dataset]: 7.86001e-06 [get_grad_eliminate_]: 8.25e-06 [virtual_output]: 7.83999e-06 [merge_forward]: 5.87001e-06 [cell_reuse_recompute_pass]: 1.69e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.688e-05 [before_grad]: 1.408e-05 [inplace_validation]: 5.49993e-06 [meta_fg_expand]: 5.19003e-06 [inplace_validation_after_expand]: 6.54999e-06 [flash_sp_send_recv_attached]: 4.97e-06 [receive_attached]: 2.43005e-06 [after_resolve]: 1.22e-05 [a_after_grad]: 1.309e-05 [special_op_eliminate]: 8.33999e-06 [renormalize]: 0.00044884 [add_forward_monad_depend]: 3.29001e-06 [auto_monad_grad]: 1.50001e-06 [auto_monad_eliminator]: 2.35001e-05 [cse]: 3.35601e-05 [a_3]: 6.04e-05 [Cycle 2]: 0.00077869, [43] [expand_dump_flag]: 1.02993e-06 [switch_simplify]: 9.46003e-06 [loop_unroll]: 7.55e-06 [a_1]: 0.00020531 [recompute_prepare]: 7.47002e-06 [updatestate_depend_eliminate]: 5.78002e-06 [updatestate_assign_eliminate]: 4.68001e-06 [updatestate_loads_eliminate]: 5.27001e-06 [parameter_eliminate]: 1.24995e-06 [a_2]: 0.00010674 [accelerated_algorithm]: 8.52998e-06 [shard]: 1.23004e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 8.10006e-06 [auto_parallel]: 1.081e-05 [parallel]: 3.59991e-06 [flash_sp]: 2.96999e-06 [merge_comm]: 5.67001e-06 [allreduce_fusion]: 4.78991e-06 [matmul_add_comm_reduction]: 7.70006e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 8.32998e-06 [virtual_dataset]: 7.43999e-06 [get_grad_eliminate_]: 7.33999e-06 [virtual_output]: 7.08993e-06 [merge_forward]: 4.60004e-06 [cell_reuse_recompute_pass]: 1.74006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.507e-05 [before_grad]: 1.232e-05 [inplace_validation]: 4.15999e-06 [meta_fg_expand]: 4.77e-06 [inplace_validation_after_expand]: 5.28002e-06 [flash_sp_send_recv_attached]: 8.49948e-07 [receive_attached]: 8.10018e-07 [after_resolve]: 9.66003e-06 [a_after_grad]: 1.204e-05-05 [special_op_eliminate]: 7.58003e-06 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 1.09e-06 [auto_monad_grad]: 1.16008e-06 [auto_monad_eliminator]: 1.78399e-05 [cse]: 1.988e-05 [a_3]: 4.777e-05 [py_interpret_to_execute_after_opt_a]: 9.11008e-06 [slice_cell_reuse_recomputed_activation]: 2.61003e-06 [rewriter_after_opt_a]: 0.00015825 [convert_after_rewriter]: 8.57003e-06 [order_py_execute_after_rewriter]: 5.91006e-06 [opt_b]: 0.00024087, [1] [Cycle 1]: 0.00023573, [7] [b_1]: 0.00016193 [b_2]: 9.67004e-06 [updatestate_depend_eliminate]: 5.44998e-06 [updatestate_assign_eliminate]: 4.30003e-06 [updatestate_loads_eliminate]: 5.23007e-06 [renormalize]: 2.79979e-07 [cse]: 1.818e-05 [optimize_parallel_all_gather_comm]: 8.52009e-06 [overlap_param_gather]: 1.11002e-06 [cconv]: 2.29001e-05 [loop_unroll]: 0.00048482 [opt_after_cconv]: 0.0001318, [1] [Cycle 1]: 0.00012616, [7] [c_1]: 5.238e-05 [parameter_eliminate]: 2.44996e-06 [updatestate_depend_eliminate]: 8.11997e-06 [updatestate_assign_eliminate]: 4.83007e-06 [updatestate_loads_eliminate]: 5.28002e-06 [cse]: 2.16101e-05 [renormalize]: 4.7998e-07 [remove_dup_value]: 1.24399e-05 [tuple_transform]: 6.93901e-05, [1] [Cycle 1]: 6.507e-05, [2] [d_1]: 5.583e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 2.05997e-06 [add_cache_embedding]: 1.399e-05 [add_recomputation]: 6.39e-05 [cse_after_recomputation]: 3.824e-05, [1] [Cycle 1]: 3.366e-05, [1] [cse]: 2.79701e-05 [environ_conv]: 7.61996e-06 [swap_dp_allreduce_reducescatter]: 8.31997e-06 [bias_add_comm_swap]: 2.35008e-06 [label_micro_interleaved_index]: 1.99e-06 [label_fine_grained_interleaved_index]: 1.96998e-06 [merge_cast_opt]: 1.43005e-06 [slice_recompute_activation]: 1.79e-06 [micro_interleaved_order_control]: 1.63005e-06 [assign_add_opt]: 2.901e-05 [ForceFp32Comm]: 8.70088e-07 [remove_cast_before_assign_add]: 7.31996e-06 [full_micro_interleaved_order_control]: 2.21992e-06 [reorder_send_recv_between_fp_bp]: 2.17999e-06 [comm_op_add_attrs]: 2.778e-05 [add_comm_op_reuse_tag]: 1.95997e-06 [interleave_split_concat_branches]: 1.07009e-06 [interleave_parallel_branches]: 6.70087e-07 [overlap_opt_shard_in_pipeline]: 1.43005e-06 [overlap_opt_shard_grad_in_pipeline]: 1.81003e-06 [control_data_broadcast_order]: 1.13004e-06 [grouped_pairwise_exchange_alltoall]: 9.46003e-06 [offloading_packed_experts]: 2.23995e-06 [overlap_recompute_and_grad_model_parallel]: 2.15997e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.3004e-07 [overlap_recompute_allgather_and_fa_grad]: 8.832e-05 [overlap_grad_ring_attention]: 2.20002e-06 [overlap_grad_flash_sp]: 1.381e-05 [begin_end_overlap_inline]: 6.89994e-07 [split_matmul_comm_elemetwise]: 1.89e-06 [split_layernorm_comm]: 1.94996e-06 [handle_group_info]: 5.42006e-06 [symbol_engine_optimizer]: 9.042e-05, [1] [Cycle 1]: 8.583e-05, [6] [build]: 4.60004e-06 [elim_shapecalc]: 1.421e-05 [elim_not_effective]: 1.696e-05 [opt_reshape]: 9.00007e-06 [fold_const_symbol]: 1.31599e-05 [renormalize]: 3.7998e-07 [pipeline_parallel_scheduler]: 1.41992e-06 [auto_monad_reorder]: 3.148e-05 [get_jit_bprop_graph]: 5.60074e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00049957 [distribtued_split]: 4.169e-05 [validate]: 3.419e-05 [task_emit]: 0.0702013 [execute]: 1.159e-05 Sums bootstrap : 0.000318s : 0.41% type_inference : 0.002544s : 3.29% auto_monad : 0.000131s : 0.17% graph_reusing [special_op_eliminate]: 7.23999e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 8.29925e-07 [auto_monad_grad]: 1.30001e-06 [auto_monad_eliminator]: 1.71e-05 [cse]: 2.004e-05 [a_3]: 4.978e-05 [py_interpret_to_execute_after_opt_a]: 9.02999e-06 [slice_cell_reuse_recomputed_activation]: 1.74996e-06 [rewriter_after_opt_a]: 0.00015811 [convert_after_rewriter]: 8.54e-06 [order_py_execute_after_rewriter]: 6.04009e-06 [opt_b]: 0.00024982, [1] [Cycle 1]: 0.00024429, [7] [b_1]: 0.00016808 [b_2]: 1.003e-05 [updatestate_depend_eliminate]: 5.07e-06 [updatestate_assign_eliminate]: 4.63997e-06 [updatestate_loads_eliminate]: 5.49003e-06 [renormalize]: 2.79979e-07 [cse]: 1.909e-05 [optimize_parallel_all_gather_comm]: 7.47002e-06 [overlap_param_gather]: 7.00005e-07 [cconv]: 1.49499e-05 [loop_unroll]: 0.00049306 [opt_after_cconv]: 0.00013718, [1] [Cycle 1]: 0.00013043, [7] [c_1]: 5.37899e-05 [parameter_eliminate]: 2.60002e-06 [updatestate_depend_eliminate]: 8.52998e-06 [updatestate_assign_eliminate]: 4.64008e-06 [updatestate_loads_eliminate]: 5.47001e-06 [cse]: 2.291e-05 [renormalize]: 5.20027e-07 [remove_dup_value]: 9.02999e-06 [tuple_transform]: 6.735e-05, [1] [Cycle 1]: 6.30399e-05, [2] [d_1]: 5.336e-05 [renormalize]: 1.90106e-07 [partial_unused_args_eliminate]: 1.25996e-06 [add_cache_embedding]: 1.11801e-05 [add_recomputation]: 5.114e-05 [cse_after_recomputation]: 3.98e-05, [1] [Cycle 1]: 3.484e-05, [1] [cse]: 2.925e-05 [environ_conv]: 6.16e-06 [swap_dp_allreduce_reducescatter]: 7.55e-06 [bias_add_comm_swap]: 2.4999e-06 [label_micro_interleaved_index]: 2.31003e-06 [label_fine_grained_interleaved_index]: 1.41002e-06 [merge_cast_opt]: 1.45007e-06 [slice_recompute_activation]: 1.62004e-06 [micro_interleaved_order_control]: 1.66008e-06 [assign_add_opt]: 2.89801e-05 [ForceFp32Comm]: 6.3004e-07 [remove_cast_before_assign_add]: 7.47002e-06 [full_micro_interleaved_order_control]: 2.02004e-06 [reorder_send_recv_between_fp_bp]: 2.06998e-06 [comm_op_add_attrs]: 2.742e-05 [add_comm_op_reuse_tag]: 1.99e-06 [interleave_split_concat_branches]: 7.3004e-07 [interleave_parallel_branches]: 8.49948e-07 [overlap_opt_shard_in_pipeline]: 1.06997e-06 [overlap_opt_shard_grad_in_pipeline]: 2.41993e-06 [control_data_broadcast_order]: 1.1099e-06 [grouped_pairwise_exchange_alltoall]: 9.27993e-06 [offloading_packed_experts]: 2.32004e-06 [overlap_recompute_and_grad_model_parallel]: 2.00991e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.30038e-07 [overlap_recompute_allgather_and_fa_grad]: 8.85101e-05 [overlap_grad_ring_attention]: 1.77999e-06 [overlap_grad_flash_sp]: 1.353e-05 [begin_end_overlap_inline]: 9.49949e-07 [split_matmul_comm_elemetwise]: 1.56998e-06 [split_layernorm_comm]: 1.55997e-06 [handle_group_info]: 5.21005e-06 [symbol_engine_optimizer]: 9.053e-05, [1] [Cycle 1]: 8.57e-05, [6] [build]: 4.62995e-06 [elim_shapecalc]: 1.399e-05 [elim_not_effective]: 1.73701e-05 [opt_reshape]: 9.44e-06 [fold_const_symbol]: 1.39e-05 [renormalize]: 3.40049e-07 [pipeline_parallel_scheduler]: 1.05007e-06 [auto_monad_reorder]: 3.129e-05 [get_jit_bprop_graph]: 8.60076e-07 [rewriter_after_jit_bprop_graph]: 4.30038e-07 [eliminate_special_op_node]: 0.00051726 [distribtued_split]: 3.251e-05 [validate]: 3.17199e-05 [task_emit]: 0.0702003 [execute]: 1.152e-05 Sums bootstrap : 0.000317s : 0.41% type_inference : 0.002544s : 3.29% auto_monad : 0.000128s : 0.17% graph_reusing : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000023s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000553s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000440s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000158s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000023s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000033s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000555s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000449s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.05% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000110s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000158s : 0.20% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000168s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000485s : 0.63% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000028s : 0.04% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000088s : 0.11% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engi : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000007s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000493s : 0.64% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000009s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000051s : 0.07% optimize.cse_after_recomputation.cse : 0.000029s : 0.04% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000089s : 0.11% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizne_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000500s : 0.65% distribtued_split : 0.000042s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.070201s : 90.82% execute : 0.000012s : 0.01% er.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000517s : 0.67% distribtued_split : 0.000033s : 0.04% validate : 0.000032s : 0.04% task_emit : 0.070200s : 90.83% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000134 63 5.11% : 0.000007s : 2: substitution.depend_value_elim 2.19% : 0.000003s : 5: substitution.elim_not_effective 1.86% : 0.000002s : 5: substitution.fold_const_symbol 4.12% : 0.000006s : 6: substitution.graph_param_transform 50.91% : 0.000068s : 1: substitution.inline 4.13% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.49% : 0.000005s : 6: substitution.load_eliminater 2.46% : 0.000003s : 2: substitution.reduce_all_const_elim 5.68% : 0.000008s : 10: substitution.remove_not_recompute_node 3.07% : 0.000004s : 2: substitution.replace_old_param 8.96% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.02% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002514 2 88.73% : 0.002231s : 1: type_inference.infer 11.27% : 0.000283s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000231 1420 0.91% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.68% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.50% : 0.000006s : 25: predicate.arithmetic_simplify 0.79% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.26% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.89% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.29% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.78% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.33% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.94% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.72% : 0.000013s : 63: predicate.inline 1.02% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.03% : 0.000002s : 12: predicate.less_batch_normalization 1.65% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.46% : 0.000006s : 38: predicate.load_eliminater 1.28% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.86% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.90% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.70% : 0.000002s : 13: predicate.minmaximum_grad 0.80% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.45% : 0.000001s : 6: predicate.parallel_virtual_node 1.11% : 0.000003s : 14: predicate.partial_defer_inline 1.33% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.82% : 0.000002s : 12: predicate.reduce_all_const_elim 1.13% : 0.000003s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.12% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.51% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 0.97% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.09% : 0.000003s : 12: predicate.stack_unstack_eliminate 2.25% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.93% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.40% : 0.000010s : 43: predicate.switch_simplify 0.73% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.84% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.83% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.57% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.44% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.81% : 0.000002s : 12: predicate.virtual_output_eliminate 0.68% : 0.000002s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000151 4 6.09% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.91% : 0.000142s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090464 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.06% : 0.000056s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000141s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000346s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.03% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.05% : 0.000043s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000040s : 1: distribtued_split 0.59% : 0.000532s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000009s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000503s : 1: loop_unroll 0.01% : 0.000005s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001119s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.18% : 0.000158s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 5.96% : 0.005389s : 1: opt_a 0.16% : 0.000142s : 1: opt_after_cconv 0.28% : 0.000253s : 1: opt_b 8.03% : 0.007263s : 1: optimize 0.02% : 0.000018s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.10% : 0.000094s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000029s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.01% : 0.000013s : 1: remove_dup_value 0.27% : 0.000248s : 1: renormalize.infer 0.22% : 0.000196s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000164s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 77.64% : 0.070233s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.83% : 0.002564s : 1: type_inference 0.07% : 0.000063s : 1: validate Time group info: ------[substitution.] 0.000134 63 3.61% : 0.000005s : 2: substitution.depend_value_elim 2.09% : 0.000003s : 5: substitution.elim_not_effective 1.69% : 0.000002s : 5: substitution.fold_const_symbol 5.67% : 0.000008s : 6: substitution.graph_param_transform 52.36% : 0.000070s : 1: substitution.inline 3.97% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.33% : 0.000004s : 6: substitution.load_eliminater 2.20% : 0.000003s : 2: substitution.reduce_all_const_elim 6.05% : 0.000008s : 10: substitution.remove_not_recompute_node 2.75% : 0.000004s : 2: substitution.replace_old_param 8.35% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.93% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002515 2 88.70% : 0.002231s : 1: type_inference.infer 11.30% : 0.000284s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000069 1 100.00% : 0.000069s : 1: match.inline ------[predicate.] 0.000231 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.24% : 0.000005s : 25: predicate.arithmetic_simplify 1.02% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.69% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.39% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.78% : 0.000002s : 12: predicate.depend_value_elim 0.87% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.91% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.20% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.22% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.93% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.26% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.24% : 0.000001s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.75% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.41% : 0.000006s : 38: predicate.load_eliminater 1.25% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.61% : 0.000001s : 6: predicate.parallel_virtual_node 1.12% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.17% : 0.000003s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.78% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.98% : 0.000002s : 12: predicate.shard_identity_eliminate 1.29% : 0.000003s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 0.99% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.37% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.74% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.19% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.44% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.72% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.47% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.31% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.88% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000158 4 10.25% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.75% : 0.000142s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090433 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000144s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000346s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.05% : 0.000041s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.57% : 0.000514s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000009s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000495s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001108s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 5.96% : 0.005387s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.27% : 0.000244s : 1: opt_b 8.03% : 0.007263s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.10% : 0.000093s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000028s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000233s : 1: renormalize.infer 0.22% : 0.000201s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000164s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 77.66% : 0.070234s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.83% : 0.002563s : 1: type_inference 0.08% : 0.000071s : 1: validate TotalTime = 0.0822819, [21] [bootstrap]: 0.00028865 [type_inference]: 0.00246458 [auto_monad]: 0.0001261 [graph_reusing]: 2.34006e-06 [inline]: 1.35007e-06 [parallel-infer-symbol]: 2.51003e-06 [pre_auto_parallel]: 2.311e-05 [insert-virtual-dataset]: 2.79001e-06 [parallel-infer-symbol-second]: 3.69968e-07 [dataset_repeat_opt]: 1.41002e-06 [pipeline_split]: 1.54995e-06 [optimize]: 0.00714972, [52] [py_interpret_to_execute]: 1.502e-05 [rewriter_before_opt_a]: 3.528e-05 [opt_a]: 0.0052922, [2] [Cycle 1]: 0.00159316, [43] [expand_dump_flag]: 3.56999e-06 [switch_simplify]: 2.93299e-05 [loop_unroll]: 1.423e-05 [a_1]: 0.00034503 [recompute_prepare]: 8.70007e-06 [updatestate_depend_eliminate]: 8.61997e-06 [updatestate_assign_eliminate]: 5.70004e-06 [updatestate_loads_eliminate]: 7.40995e-06 [parameter_eliminate]: 3.11993e-06 [a_2]: 0.00011666 [accelerated_algorithm]: 8.86002e-06 [shard]: 2.05997e-06 [meta_shard_fg_expand]: 3.93996e-06 [shard_inline]: 8.52998e-06 [auto_parallel]: 1.252e-05 [parallel]: 7.2401e-06 [flash_sp]: 1.017e-05 [merge_comm]: 8.10996e-06 [allreduce_fusion]: 5.22996e-06 [matmul_add_comm_reduction]: 1.124e-05 [allreduce_slice_to_reducescatter]: 4.50062e-07 [virtual_shard_identity]: 9.50007e-06 [virtual_dataset]: 7.80995e-06 [get_grad_eliminate_]: 7.53999e-06 [virtual_output]: 7.77002e-06 [merge_forward]: 6.18992e-06 [cell_reuse_recompute_pass]: 1.77009e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.63e-05 [before_grad]: 1.331e-05 [inplace_validation]: 4.78001e-06 [meta_fg_expand]: 5.80994e-06 [inplace_validation_after_expand]: 5.99003e-06 [flash_sp_send_recv_attached]: 4.34997e-06 [receive_attached]: 2.66999e-06 [after_resolve]: 1.12e-05 [a_after_grad]: 1.299e-05 [special_op_eliminate]: 7.75e-06 [renormalize]: 0.0004452 [add_forward_monad_depend]: 3.79991e-06 [auto_monad_grad]: 1.91003e-06 [auto_monad_eliminator]: 3.141e-05 [cse]: 3.203e-05 [a_3]: 5.919e-05 [Cycle 2]: 0.00079392, [43] [expand_dump_flag]: 1.26008e-06 [switch_simplify]: 9.12999e-06 [loop_unroll]: 7.71997e-06 [a_1]: 0.00020311 [recompute_prepare]: 7.28993e-06 [updatestate_depend_eliminate]: 6.26e-06 [updatestate_assign_eliminate]: 5.13007e-06 [updatestate_loads_eliminate]: 6.19993e-06 [parameter_eliminate]: 1.31002e-06 [a_2]: 0.00010604 [accelerated_algorithm]: 8.48004e-06 [shard]: 1.13994e-06 [meta_shard_fg_expand]: 2.71003e-06 [shard_inline]: 8.55001e-06 [auto_parallel]: 1.142e-05 [parallel]: 3.79002e-06 [flash_sp]: 3.26999e-06 [merge_comm]: 6.16e-06 [allreduce_fusion]: 5.27001e-06 [matmul_add_comm_reduction]: 8.42009e-06 [allreduce_slice_to_reducescatter]: 2.20025e-07 [virtual_shard_identity]: 9.01998e-06 [virtual_dataset]: 7.46991e-06 [get_grad_eliminate_]: 7.16001e-06 [virtual_output]: 7.22997e-06 [merge_forward]: 4.78001e-06 [cell_reuse_recompute_pass]: 2.00002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.55199e-05 [before_grad]: 1.24499e-05 [inplace_validation]: 4.47e-06 [meta_fg_expand]: 4.94998e-06 [inplace_validation_after_expand]: 5.32006e-06 [flash_sp_send_recv_attached]: 9.29926e-07 [receive_attached]: 9.69972e-07 [after_resolve]: 1.056e-05 [a_after_grad]: 1.22e-05 [special_op_eliminate]: 7.25e-06 [renormalize]: 5.99539e-08 [add_forward_monad_depend]: 9.79984e-07 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 1.829e-05 [cse]: 1.967e-05 [a_3]: 4.827e-05 [py_interpret_to_execute_after_opt_a]: 9.68995e-06 [slice_cell_reuse_recomputed_activation]: 2.00991e-06 [rewriter_after_opt_a]: 0.00014796 [convert_after_rewriter]: 8.66002e-06 [order_py_execute_after_rewriter]: 5.92996e-06 [opt_b]: 0.00024424, [1] [Cycle 1]: 0.0002385, [7] [b_1]: 0.00016246 [b_2]: 9.92999e-06 [updatestate_depend_eliminate]: 5.49993e-06 [updatestate_assign_eliminate]: 4.63007e-06 [updatestate_loads_eliminate]: 5.27001e-06 [renormalize]: 4.29922e-07 [cse]: 1.847e-05 [optimize_parallel_all_gather_comm]: 8.48004e-06 [overlap_param_gather]: 9.10019e-07 [cconv]: 2.325e-05 [loop_unroll]: 0.0004835 [opt_after_cconv]: 0.00013563, [1] [Cycle 1]: 0.00012937, [7] [c_1]: 5.25999e-05 [parameter_eliminate]: 2.48e-06 [updatestate_depend_eliminate]: 8.31007e-06 [updatestate_assign_eliminate]: 4.91994e-06 [updatestate_loads_eliminate]: 5.76e-06 [cse]: 2.21401e-05 [renormalize]: 3.50061e-07 [remove_dup_value]: 1.25701e-05 [tuple_transform]: 6.926e-05, [1] [Cycle 1]: 6.511e-05, [2] [d_1]: 5.562e-05 [renormalize]: 1.30036e-07 [partial_unused_args_eliminate]: 2.05007e-06 [add_cache_embedding]: 1.38e-05 [add_recomputation]: 6.27401e-05 [cse_after_recomputation]: 2.623e-05, [1] [Cycle 1]: 2.155e-05, [1] [cse]: 1.661e-05 [environ_conv]: 7.37002e-06 [swap_dp_allreduce_reducescatter]: 7.65e-06 [bias_add_comm_swap]: 2.16009e-06 [label_micro_interleaved_index]: 1.96998e-06 [label_fine_grained_interleaved_index]: 2.06998e-06 [merge_cast_opt]: 1.11992e-06 [slice_recompute_activation]: 1.76008e-06 [micro_interleaved_order_control]: 1.90001e-06 [assign_add_opt]: 3.037e-05 [ForceFp32Comm]: 8.70088e-07 [remove_cast_before_assign_add]: 7.29004e-06 [full_micro_interleaved_order_control]: 2.20002e-06 [reorder_send_recv_between_fp_bp]: 1.72004e-06 [comm_op_add_attrs]: 2.635e-05 [add_comm_op_reuse_tag]: 1.73005e-06 [interleave_split_concat_branches]: 8.60076e-07 [interleave_parallel_branches]: 8.39937e-07 [overlap_opt_shard_in_pipeline]: 1.09e-06 [overlap_opt_shard_grad_in_pipeline]: 1.94996e-06 [control_data_broadcast_order]: 1.03004e-06 [grouped_pairwise_exchange_alltoall]: 9.22999e-06 [offloading_packed_experts]: 1.84006e-06 [overlap_recompute_and_grad_model_parallel]: 1.73994e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.40053e-07 [overlap_recompute_allgather_and_fa_grad]: 7.159e-05 [overlap_grad_ring_attention]: 1.71002e-06 [overlap_grad_flash_sp]: 1.41e-05 [begin_end_overlap_inline]: 8.39937e-07 [split_matmul_comm_elemetwise]: 1.751e-05 [split_layernorm_comm]: 2.03995e-06 [handle_group_info]: 4.38001e-06 [symbol_engine_optimizer]: 9.089e-05, [1] [Cycle 1]: 8.61201e-05, [6] [build]: 5.01005e-06 [elim_shapecalc]: 1.35e-05 [elim_not_effective]: 1.676e-05 [opt_reshape]: 9.04e-06 [fold_const_symbol]: 1.397e-05 [renormalize]: 3.10014e-07 [pipeline_parallel_scheduler]: 1.53005e-06 [auto_monad_reorder]: 3.14299e-05 [get_jit_bprop_graph]: 4.29922e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.0005099 [distribtued_split]: 3.967e-05 [validate]: 3.44999e-05 [task_emit]: 0.0713526 [execute]: 1.054e-05 Sums bootstrap : 0.000289s : 0.37% type_inference : 0.002465s : 3.15% auto_monad : 0.000126s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000023s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000548s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000445s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000052s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000148s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000484s : 0.62% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000026s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000018s : 0.02% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000004s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000510s : 0.65% distribtued_split : 0.000040s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.071353s : 91.10% execute : 0.000011s : 0.01% TotalTime = 0.0823225, [21] [bootstrap]: 0.00031913 [type_inference]: 0.00258875 [auto_monad]: 0.00014291 [graph_reusing]: 2.39001e-06 [inline]: 1.46998e-06 [parallel-infer-symbol]: 2.15997e-06 [pre_auto_parallel]: 2.59201e-05 [insert-virtual-dataset]: 3.22005e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 2.05007e-06 [pipeline_split]: 1.88011e-06 [optimize]: 0.0073612, [52] [py_interpret_to_execute]: 1.768e-05 [rewriter_before_opt_a]: 3.696e-05 [opt_a]: 0.00546658, [2] [Cycle 1]: 0.0015886, [43] [expand_dump_flag]: 3.55009e-06 [switch_simplify]: 3.18701e-05 [loop_unroll]: 1.35e-05 [a_1]: 0.00035019 [recompute_prepare]: 9.09006e-06 [updatestate_depend_eliminate]: 9.36002e-06 [updatestate_assign_eliminate]: 7.07002e-06 [updatestate_loads_eliminate]: 7.76001e-06 [parameter_eliminate]: 3.41004e-06 [a_2]: 0.00012055 [accelerated_algorithm]: 8.88004e-06 [shard]: 2.15007e-06 [meta_shard_fg_expand]: 4.05998e-06 [shard_inline]: 8.70007e-06 [auto_parallel]: 1.201e-05 [parallel]: 8.07003e-06 [flash_sp]: 1.174e-05 [merge_comm]: 8.45001e-06 [allreduce_fusion]: 5.73997e-06 [matmul_add_comm_reduction]: 1.16701e-05 [allreduce_slice_to_reducescatter]: 4.90109e-07 [virtual_shard_identity]: 1.036e-05 [virtual_dataset]: 8.65001e-06 [get_grad_eliminate_]: 8.01997e-06 [virtual_output]: 7.78993e-06 [merge_forward]: 6.52997e-06 [cell_reuse_recompute_pass]: 1.63994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.71e-05 [before_grad]: 1.47499e-05 [inplace_validation]: 5.30004e-06 [meta_fg_expand]: 5.60004e-06 [inplace_validation_after_expand]: 7.82998e-06 [flash_sp_send_recv_attached]: 5.28991e-06 [receive_attached]: 2.63005e-06 [after_resolve]: 1.12699e-05 [a_after_grad]: 1.293e-05 [special_op_eliminate]: 7.92998e-06 [renormalize]: 0.00046092 [add_forward_monad_depend]: 3.58e-06 [auto_monad_grad]: 1.96998e-06 [auto_monad_eliminator]: 3.087e-05 [cse]: 3.557e-05 [a_3]: 5.92101e-05 [Cycle 2]: 0.00079515, [43] [expand_dump_flag]: 1.16997e-06 [switch_simplify]: 9.02009e-06 [loop_unroll]: 8.00996e-06 [a_1]: 0.0002046 [recompute_prepare]: 7.58993e-06 [updatestate_depend_eliminate]: 6.12997e-06 [updatestate_assign_eliminate]: 4.98001e-06 [updatestate_loads_eliminate]: 5.87001e-06 [parameter_eliminate]: 1.21002e-06 [a_2]: 0.00010704 [accelerated_algorithm]: 8.87003e-06 [shard]: 1.01002e-06 [meta_shard_fg_expand]: 2.61003e-06 [shard_inline]: 8.15e-06 [auto_parallel]: 1.099e-05 [parallel]: 3.40003e-06 [flash_sp]: 3.39001e-06 [merge_comm]: 6.26e-06 [allreduce_fusion]: 5.14009e-06 [matmul_add_comm_reduction]: 8.14011e-06 [allreduce_slice_to_reducescatter]: 3.20026e-07 [virtual_shard_identity]: 8.91997e-06 [virtual_dataset]: 7.57002e-06 [get_grad_eliminate_]: 7.81997e-06 [virtual_output]: 7.33009e-06 [merge_forward]: 4.93997e-06 [cell_reuse_recompute_pass]: 1.80001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.599e-05 [before_grad]: 1.22801e-05 [inplace_validation]: 4.33007e-06 [meta_fg_expand]: 4.68001e-06 [inplace_validation_after_expand]: 5.39992e-06 [flash_sp_send_recv_attached]: 8.30041e-07 [receive_attached]: 6.79982e-07 [after_resolve]: 1.016e-05 [a_after_grad]: 1.2e-05 [special_op_eliminate]: 7.37992e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.30042e-07 [auto_monad_grad]: 1.23994e-06 [auto_monad_eliminator]: 1.94201e-05 [cse]: 1.997e-05 [a_3]: 4.901e-05 [py_interpret_to_execute_after_opt_a]: 9.11008e-06 [slice_cell_reuse_recomputed_activation]: 2.13995e-06 [rewriter_after_opt_a]: 0.00014622 [convert_after_rewriter]: 9.38994e-06 [order_py_execute_after_rewriter]: 6.13998e-06 [opt_b]: 0.00025058, [1] [Cycle 1]: 0.0002449, [7] [b_1]: 0.0001674 [b_2]: 1.036e-05 [updatestate_depend_eliminate]: 5.30004e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.20993e-06 [renormalize]: 3.40049e-07 [cse]: 1.955e-05 [optimize_parallel_all_gather_comm]: 8.45001e-06 [overlap_param_gather]: 1.05007e-06 [cconv]: 2.46101e-05 [loop_unroll]: 0.00049459 [opt_after_cconv]: 0.00013803, [1] [Cycle 1]: 0.00013177, [7] [c_1]: 5.451e-05 [parameter_eliminate]: 2.41003e-06 [updatestate_depend_eliminate]: 8.84011e-06 [updatestate_assign_eliminate]: 4.73997e-06 [updatestate_loads_eliminate]: 5.83997e-06 [cse]: 2.244e-05 [renormalize]: 4.30038e-07 [remove_dup_value]: 1.40599e-05 [tuple_transform]: 7.374e-05, [1] [Cycle 1]: 6.885e-05, [2] [d_1]: 5.853e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 2.21003e-06 [add_cache_embedding]: 1.43599e-05 [add_recomputation]: 6.38601e-05 [cse_after_recomputation]: 2.82901e-05, [1] [Cycle 1]: 2.338e-05, [1] [cse]: 1.83e-05 [environ_conv]: 7.82998e-06 [swap_dp_allreduce_reducescatter]: 8.22998e-06 [bias_add_comm_swap]: 2.46998e-06 [label_micro_interleaved_index]: 2.12004e-06 [label_fine_grained_interleaved_index]: 2.12993e-06 [merge_cast_opt]: 1.23004e-06 [slice_recompute_activation]: 2.05997e-06 [micro_interleaved_order_control]: 2.04996e-06 [assign_add_opt]: 2.908e-05 [ForceFp32Comm]: 9.39937e-07 [remove_cast_before_assign_add]: 7.36001e-06 [full_micro_interleaved_order_control]: 2.09e-06 [reorder_send_recv_between_fp_bp]: 2.1999e-06 [comm_op_add_attrs]: 2.83199e-05 [add_comm_op_reuse_tag]: 2.03005e-06 [interleave_split_concat_branches]: 9.00007e-07 [interleave_parallel_branches]: 1.09e-06 [overlap_opt_shard_in_pipeline]: 1.43994e-06 [overlap_opt_shard_grad_in_pipeline]: 2.98989e-06 [control_data_broadcast_order]: 1.13994e-06 [grouped_pairwise_exchange_alltoall]: 1.044e-05 [offloading_packed_experts]: 2.37999e-06 [overlap_recompute_and_grad_model_parallel]: 2.10002e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.11002e-06 [overlap_recompute_allgather_and_fa_grad]: 6.351e-05 [overlap_grad_ring_attention]: 2.52004e-06 [overlap_grad_flash_sp]: 1.58399e-05 [begin_end_overlap_inline]: 7.89994e-07 [split_matmul_comm_elemetwise]: 1.96008e-06 [split_layernorm_comm]: 1.82993e-06 [handle_group_info]: 5.26989e-06 [symbol_engine_optimizer]: 9.37199e-05, [1] [Cycle 1]: 8.85701e-05, [6] [build]: 5.12006e-06 [elim_shapecalc]: 1.31699e-05 [elim_not_effective]: 1.80299e-05 [opt_reshape]: 9.18994e-06 [fold_const_symbol]: 1.492e-05 [renormalize]: 2.70084e-07 [pipeline_parallel_scheduler]: 1.63005e-06 [auto_monad_reorder]: 3.232e-05 [get_jit_bprop_graph]: 5.70086e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00051315 [distribtued_split]: 4.18799e-05 [validate]: 3.757e-05 [task_emit]: 0.0709805 [execute]: 1.184e-05 Sums bootstrap : 0.000319s : 0.41% type_inference : 0.002589s : 3.31% auto_monad : 0.000143s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000018s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000041s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000555s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.02% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000228s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000013s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000461s : 0.59% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000056s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000167s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000495s : 0.63% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000059s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000064s : 0.08% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000513s : 0.66% distribtued_split : 0.000042s : 0.05% validate : 0.000038s : 0.05% task_emit : 0.070981s : 90.76% execute : 0.000012s : 0.02% TotalTime = 0.0825412, [21] [bootstrap]: 0.00030441 [type_inference]: 0.00252484 [auto_monad]: 0.00012861 [graph_reusing]: 2.99001e-06 [inline]: 1.77999e-06 [parallel-infer-symbol]: 2.03005e-06 [pre_auto_parallel]: 2.484e-05 [insert-virtual-dataset]: 3.06999e-06 [parallel-infer-symbol-second]: 3.49944e-07 [dataset_repeat_opt]: 1.07998e-06 [pipeline_split]: 1.53994e-06 [optimize]: 0.00719617, [52] [py_interpret_to_execute]: 1.522e-05 [rewriter_before_opt_a]: 3.428e-05 [opt_a]: 0.00534742, [2] [Cycle 1]: 0.00153526, [43] [expand_dump_flag]: 2.91993e-06 [switch_simplify]: 2.907e-05 [loop_unroll]: 1.378e-05 [a_1]: 0.00034888 [recompute_prepare]: 8.50996e-06 [updatestate_depend_eliminate]: 8.25e-06 [updatestate_assign_eliminate]: 5.82996e-06 [updatestate_loads_eliminate]: 6.87002e-06 [parameter_eliminate]: 3.05008e-06 [a_2]: 0.0001184 [accelerated_algorithm]: 8.46991e-06 [shard]: 2.74007e-06 [meta_shard_fg_expand]: 3.89002e-06 [shard_inline]: 9.09995e-06 [auto_parallel]: 1.19e-05 [parallel]: 6.88003e-06 [flash_sp]: 1.059e-05 [merge_comm]: 7.89994e-06 [allreduce_fusion]: 5.74999e-06 [matmul_add_comm_reduction]: 1.07201e-05 [allreduce_slice_to_reducescatter]: 5.89993e-07 [virtual_shard_identity]: 1.00001e-05 [virtual_dataset]: 8.37992e-06 [get_grad_eliminate_]: 7.78993e-06 [virtual_output]: 7.69994e-06 [merge_forward]: 6.12997e-06 [cell_reuse_recompute_pass]: 1.62004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.68801e-05 [before_grad]: 1.36e-05 [inplace_validation]: 4.73997e-06 [meta_fg_expand]: 5.66e-06 [inplace_validation_after_expand]: 5.99003e-06 [flash_sp_send_recv_attached]: 4.64998e-06 [receive_attached]: 2.55008e-06 [after_resolve]: 1.10801e-05 [a_after_grad]: 1.25701e-05 [special_op_eliminate]: 7.92998e-06 [renormalize]: 0.00043521 [add_forward_monad_depend]: 3.65998e-06 [auto_monad_grad]: 2.44007e-06 [auto_monad_eliminator]: 3.212e-05 [cse]: 3.269e-05 [a_3]: 6.086e-05 [Cycle 2]: 0.00079017, [43] [expand_dump_flag]: 1.12003e-06 [switch_simplify]: 9.15001e-06 [loop_unroll]: 8.18004e-06 [a_1]: 0.00020953 [recompute_prepare]: 7.48003e-06 [updatestate_depend_eliminate]: 5.93998e-06 [updatestate_assign_eliminate]: 4.62995e-06 [updatestate_loads_eliminate]: 5.61005e-06 [parameter_eliminate]: 1.40001e-06 [a_2]: 0.0001067 [accelerated_algorithm]: 8.41008e-06 [shard]: 1.33005e-06 [meta_shard_fg_expand]: 2.76999e-06 [shard_inline]: 7.83999e-06 [auto_parallel]: 1.062e-05 [parallel]: 4.15999e-06 [flash_sp]: 4.11004e-06 [merge_comm]: 5.71995e-06 [allreduce_fusion]: 4.88991e-06 [matmul_add_comm_reduction]: 8.00996e-06 [allreduce_slice_to_reducescatter]: 2.89991e-07 [virtual_shard_identity]: 8.35001e-06 [virtual_dataset]: 7.50006e-06 [get_grad_eliminate_]: 7.26001e-06 [virtual_output]: 7.2699e-06 [merge_forward]: 4.88001e-06 [cell_reuse_recompute_pass]: 1.67009e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.519e-05 [before_grad]: 1.26701e-05 [inplace_validation]: 4.2899e-06 [meta_fg_expand]: 4.91005e-06 [inplace_validation_after_expand]: 4.99003e-06 [flash_sp_send_recv_attached]: 1.04005e-06 [receive_attached]: 7.69971e-07 [after_resolve]: 9.80997e-06 [a_after_grad]: 1.196e-05 [special_op_eliminate]: 7.15e-06 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 1.06997e-06 [auto_monad_grad]: 1.13004e-06 [auto_monad_eliminator]: 1.829e-05 [cse]: 2.046e-05 [a_3]: 4.838e-05 [py_interpret_to_execute_after_opt_a]: 9.00007e-06 [slice_cell_reuse_recomputed_activation]: 2.41993e-06 [rewriter_after_opt_a]: 0.00014567 [convert_after_rewriter]: 8.38004e-06 [order_py_execute_after_rewriter]: 6.44999e-06 [opt_b]: 0.00024319, [1] [Cycle 1]: 0.00023803, [7] [b_1]: 0.00016294 [b_2]: 9.68005e-06 [updatestate_depend_eliminate]: 5.38002e-06 [updatestate_assign_eliminate]: 4.40993e-06 [updatestate_loads_eliminate]: 5.52007e-06 [renormalize]: 2.40048e-07 [cse]: 1.93199e-05 [optimize_parallel_all_gather_comm]: 8.51997e-06 [overlap_param_gather]: 1.19989e-06 [cconv]: 2.265e-05 [loop_unroll]: 0.00048916 [opt_after_cconv]: 0.00013595, [1] [Cycle 1]: 0.00012999, [7] [c_1]: 5.44499e-05 [parameter_eliminate]: 2.66009e-06 [updatestate_depend_eliminate]: 8.07992e-06 [updatestate_assign_eliminate]: 4.85999e-06 [updatestate_loads_eliminate]: 5.38002e-06 [cse]: 2.262e-05 [renormalize]: 3.7998e-07 [remove_dup_value]: 1.23599e-05 [tuple_transform]: 7.015e-05, [1] [Cycle 1]: 6.593e-05, [2] [d_1]: 5.70901e-05 [renormalize]: 2.00002e-07 [partial_unused_args_eliminate]: 2.04996e-06 [add_cache_embedding]: 1.27701e-05 [add_recomputation]: 6.297e-05 [cse_after_recomputation]: 2.77699e-05, [1] [Cycle 1]: 2.323e-05, [1] [cse]: 1.846e-05 [environ_conv]: 7.19994e-06 [swap_dp_allreduce_reducescatter]: 7.25e-06 [bias_add_comm_swap]: 2.11003e-06 [label_micro_interleaved_index]: 1.86008e-06 [label_fine_grained_interleaved_index]: 1.86998e-06 [merge_cast_opt]: 1.05007e-06 [slice_recompute_activation]: 1.92004e-06 [micro_interleaved_order_control]: 1.63005e-06 [assign_add_opt]: 2.863e-05 [ForceFp32Comm]: 1.06997e-06 [remove_cast_before_assign_add]: 7.07991e-06 [full_micro_interleaved_order_control]: 2.29001e-06 [reorder_send_recv_between_fp_bp]: 2.20002e-06 [comm_op_add_attrs]: 2.73e-05 [add_comm_op_reuse_tag]: 1.76998e-06 [interleave_split_concat_branches]: 7.59959e-07 [interleave_parallel_branches]: 8.5996e-07 [overlap_opt_shard_in_pipeline]: 1.20001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.03005e-06 [control_data_broadcast_order]: 1.23004e-06 [grouped_pairwise_exchange_alltoall]: 9.18994e-06 [offloading_packed_experts]: 2.20002e-06 [overlap_recompute_and_grad_model_parallel]: 1.62004e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.11002e-06 [overlap_recompute_allgather_and_fa_grad]: 6.845e-05 [overlap_grad_ring_attention]: 2.29001e-06 [overlap_grad_flash_sp]: 1.478e-05 [begin_end_overlap_inline]: 9.89996e-07 [split_matmul_comm_elemetwise]: 2.02004e-06 [split_layernorm_comm]: 1.66998e-06 [handle_group_info]: 4.65999e-06 [symbol_engine_optimizer]: 9.13499e-05, [1] [Cycle 1]: 8.65e-05, [6] [build]: 5.19992e-06 [elim_shapecalc]: 1.391e-05 [elim_not_effective]: 1.759e-05 [opt_reshape]: 8.55001e-06 [fold_const_symbol]: 1.42599e-05 [renormalize]: 3.59956e-07 [pipeline_parallel_scheduler]: 1.47999e-06 [auto_monad_reorder]: 3.08601e-05 [get_jit_bprop_graph]: 4.69969e-07 [rewriter_after_jit_bprop_graph]: 4.30038e-07 [eliminate_special_op_node]: 0.00051245 [distribtued_split]: 4.053e-05 [validate]: 3.478e-05 [task_emit]: 0.0714813 [execute]: 9.77993e-06 Sums bootstrap : 0.000304s : 0.39% type_inference : 0.002525s : 3.22% auto_monad : 0.000129s : 0.16% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000558s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000225s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000435s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000109s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000146s : 0.19% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000489s : 0.62% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000068s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000512s : 0.65% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.071481s : 91.03% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000130 63 5.09% : 0.000007s : 2: substitution.depend_value_elim 2.04% : 0.000003s : 5: substitution.elim_not_effective 1.76% : 0.000002s : 5: substitution.fold_const_symbol 5.53% : 0.000007s : 6: substitution.graph_param_transform 50.59% : 0.000066s : 1: substitution.inline 3.95% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.21% : 0.000004s : 6: substitution.load_eliminater 2.71% : 0.000004s : 2: substitution.reduce_all_const_elim 5.57% : 0.000007s : 10: substitution.remove_not_recompute_node 2.63% : 0.000003s : 2: substitution.replace_old_param 8.65% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.27% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002435 2 88.65% : 0.002158s : 1: type_inference.infer 11.35% : 0.000276s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000231 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.78% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.22% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.44% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.65% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.24% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 1.95% : 0.000005s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.75% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.75% : 0.000013s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.10% : 0.000003s : 12: predicate.less_batch_normalization 1.76% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.34% : 0.000005s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.36% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.75% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.70% : 0.000002s : 13: predicate.minmaximum_grad 0.91% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.20% : 0.000003s : 19: predicate.partial_eliminate 0.75% : 0.000002s : 13: predicate.print_const_string_wrapper 0.91% : 0.000002s : 12: predicate.reduce_all_const_elim 1.14% : 0.000003s : 13: predicate.reduce_eliminate 0.58% : 0.000001s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.88% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.44% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.29% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.03% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.20% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.53% : 0.000010s : 43: predicate.switch_simplify 0.82% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.88% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.64% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.67% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.69% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.61% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000159 4 10.35% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.65% : 0.000142s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091264 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.15% : 0.000138s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.34% : 0.000312s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.57% : 0.000523s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000493s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001105s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 5.80% : 0.005296s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.27% : 0.000247s : 1: opt_b 7.84% : 0.007158s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000077s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000007s : 1: pipeline_split 0.03% : 0.000029s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000239s : 1: renormalize.infer 0.22% : 0.000199s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000154s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.02% : 0.000021s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 78.21% : 0.071379s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.72% : 0.002482s : 1: type_inference 0.08% : 0.000070s : 1: validate Time group info: ------[substitution.] 0.000139 63 5.12% : 0.000007s : 2: substitution.depend_value_elim 2.42% : 0.000003s : 5: substitution.elim_not_effective 2.14% : 0.000003s : 5: substitution.fold_const_symbol 5.38% : 0.000007s : 6: substitution.graph_param_transform 51.11% : 0.000071s : 1: substitution.inline 3.80% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.13% : 0.000004s : 6: substitution.load_eliminater 2.67% : 0.000004s : 2: substitution.reduce_all_const_elim 5.47% : 0.000008s : 10: substitution.remove_not_recompute_node 2.57% : 0.000004s : 2: substitution.replace_old_param 8.68% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.51% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002558 2 88.64% : 0.002267s : 1: type_inference.infer 11.36% : 0.000291s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000070 1 100.00% : 0.000070s : 1: match.inline ------[predicate.] 0.000234 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.31% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.91% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.34% : 0.000005s : 25: predicate.arithmetic_simplify 0.91% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.78% : 0.000002s : 12: predicate.compare_switch_simplify 0.24% : 0.000001s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.55% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.88% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.64% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 1.87% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.33% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.03% : 0.000002s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.34% : 0.000005s : 38: predicate.load_eliminater 1.46% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.27% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.80% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.74% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.69% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.24% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.12% : 0.000003s : 13: predicate.reduce_eliminate 0.60% : 0.000001s : 12: predicate.remove_not_recompute_node 1.05% : 0.000002s : 25: predicate.replace_applicator 0.41% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.83% : 0.000002s : 13: predicate.reshape_eliminate 0.77% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.41% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.21% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.59% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.45% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.76% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.79% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.61% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.25% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.38% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.75% : 0.000002s : 12: predicate.virtual_output_eliminate 0.68% : 0.000002s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000162 4 10.82% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.18% : 0.000144s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091544 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.17% : 0.000156s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000345s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.58% : 0.000528s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000014s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000505s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001123s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000157s : 27: opt.transform.opt_b 0.06% : 0.000057s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.06% : 0.000051s : 4: opt.transform.symbol_engine_opt 5.98% : 0.005470s : 1: opt_a 0.15% : 0.000142s : 1: opt_after_cconv 0.28% : 0.000254s : 1: opt_b 8.05% : 0.007370s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.07% : 0.000069s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000023s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000250s : 1: renormalize.infer 0.22% : 0.000204s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000152s : 1: rewriter_after_opt_a 0.05% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000097s : 1: symbol_engine_optimizer 77.57% : 0.071012s : 1: task_emit 0.08% : 0.000077s : 1: tuple_transform 2.85% : 0.002607s : 1: type_inference 0.08% : 0.000074s : 1: validate Time group info: ------[substitution.] 0.000133 63 5.12% : 0.000007s : 2: substitution.depend_value_elim 2.08% : 0.000003s : 5: substitution.elim_not_effective 1.81% : 0.000002s : 5: substitution.fold_const_symbol 5.75% : 0.000008s : 6: substitution.graph_param_transform 50.24% : 0.000067s : 1: substitution.inline 3.84% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.46% : 0.000005s : 6: substitution.load_eliminater 2.66% : 0.000004s : 2: substitution.reduce_all_const_elim 5.57% : 0.000007s : 10: substitution.remove_not_recompute_node 2.69% : 0.000004s : 2: substitution.replace_old_param 8.77% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.01% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002495 2 88.59% : 0.002210s : 1: type_inference.infer 11.41% : 0.000285s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000232 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.12% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.83% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.41% : 0.000006s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.74% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.36% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.80% : 0.000002s : 12: predicate.depend_value_elim 0.87% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.19% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_depend_swap 1.94% : 0.000005s : 31: predicate.environ_get_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.48% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.15% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.63% : 0.000013s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.93% : 0.000002s : 12: predicate.less_batch_normalization 1.64% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.43% : 0.000006s : 38: predicate.load_eliminater 1.35% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.70% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.82% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.42% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.37% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000003s : 13: predicate.reduce_eliminate 0.67% : 0.000002s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.90% : 0.000002s : 13: predicate.reshape_eliminate 0.91% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 0.94% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.90% : 0.000002s : 12: predicate.shard_identity_eliminate 1.40% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.85% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.29% : 0.000010s : 43: predicate.switch_simplify 0.81% : 0.000002s : 13: predicate.tile_eliminate 0.86% : 0.000002s : 13: predicate.transpose_eliminate 1.85% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.73% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.53% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.83% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.38% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.37% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000161 4 9.73% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.27% : 0.000146s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091578 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.15% : 0.000141s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000330s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000031s : 1: comm_op_add_attrs 0.01% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.58% : 0.000527s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000499s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001118s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 5.84% : 0.005351s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.27% : 0.000246s : 1: opt_b 7.87% : 0.007204s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000011s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.02% : 0.000016s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.25% : 0.000232s : 1: renormalize.infer 0.22% : 0.000198s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000152s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 78.09% : 0.071509s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.78% : 0.002543s : 1: type_inference 0.07% : 0.000068s : 1: validate TotalTime = 0.0830487, [21] [bootstrap]: 0.00032868 [type_inference]: 0.00250076 [auto_monad]: 0.00010568 [graph_reusing]: 1.97999e-06 [inline]: 1.05996e-06 [parallel-infer-symbol]: 1.30001e-06 [pre_auto_parallel]: 2.117e-05 [insert-virtual-dataset]: 1.79e-06 [parallel-infer-symbol-second]: 4.80097e-07 [dataset_repeat_opt]: 7.69971e-07 [pipeline_split]: 1.16008e-06 [optimize]: 0.00730363, [52] [py_interpret_to_execute]: 1.308e-05 [rewriter_before_opt_a]: 3.239e-05 [opt_a]: 0.00550315, [2] [Cycle 1]: 0.00145222, [43] [expand_dump_flag]: 2.22004e-06 [switch_simplify]: 2.646e-05 [loop_unroll]: 1.32601e-05 [a_1]: 0.00032847 [recompute_prepare]: 8.99995e-06 [updatestate_depend_eliminate]: 7.39994e-06 [updatestate_assign_eliminate]: 5.33008e-06 [updatestate_loads_eliminate]: 6.20994e-06 [parameter_eliminate]: 2.09e-06 [a_2]: 0.00011739 [accelerated_algorithm]: 8.92009e-06 [shard]: 1.70001e-06 [meta_shard_fg_expand]: 3.05998e-06 [shard_inline]: 8.60996e-06 [auto_parallel]: 1.12701e-05 [parallel]: 5.41005e-06 [flash_sp]: 7.65e-06 [merge_comm]: 7.91997e-06 [allreduce_fusion]: 5.41005e-06 [matmul_add_comm_reduction]: 9.07003e-06 [allreduce_slice_to_reducescatter]: 3.20026e-07 [virtual_shard_identity]: 9.74e-06 [virtual_dataset]: 8.08004e-06 [get_grad_eliminate_]: 7.78004e-06 [virtual_output]: 7.73999e-06 [merge_forward]: 5.41995e-06 [cell_reuse_recompute_pass]: 1.45996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.64401e-05 [before_grad]: 1.364e-05 [inplace_validation]: 4.73007e-06 [meta_fg_expand]: 5.30004e-06 [inplace_validation_after_expand]: 5.76e-06 [flash_sp_send_recv_attached]: 3.46999e-06 [receive_attached]: 1.89e-06 [after_resolve]: 1.129e-05 [a_after_grad]: 1.269e-05 [special_op_eliminate]: 8.06001e-06 [renormalize]: 0.00041163 [add_forward_monad_depend]: 2.6999e-06 [auto_monad_grad]: 1.39989e-06 [auto_monad_eliminator]: 2.398e-05 [cse]: 2.61901e-05 [a_3]: 5.871e-05 [Cycle 2]: 0.00077674, [43] [expand_dump_flag]: 9.09902e-07 [switch_simplify]: 9.14e-06 [loop_unroll]: 7.89994e-06 [a_1]: 0.00020188 [recompute_prepare]: 7.87992e-06 [updatestate_depend_eliminate]: 5.83997e-06 [updatestate_assign_eliminate]: 4.9501e-06 [updatestate_loads_eliminate]: 5.38002e-06 [parameter_eliminate]: 1.13994e-06 [a_2]: 0.00010484 [accelerated_algorithm]: 8.78004e-06 [shard]: 1.15996e-06 [meta_shard_fg_expand]: 2.33005e-06 [shard_inline]: 8.39995e-06 [auto_parallel]: 1.002e-05 [parallel]: 3.20992e-06 [flash_sp]: 2.32004e-06 [merge_comm]: 5.78002e-06 [allreduce_fusion]: 4.91994e-06 [matmul_add_comm_reduction]: 7.29994e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.89006e-06 [virtual_dataset]: 7.92008e-06 [get_grad_eliminate_]: 7.36001e-06 [virtual_output]: 7.48003e-06 [merge_forward]: 4.45999e-06 [cell_reuse_recompute_pass]: 1.63994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.549e-05 [before_grad]: 1.285e-05 [inplace_validation]: 4.14008e-06 [meta_fg_expand]: 4.87e-06 [inplace_validation_after_expand]: 4.94998e-06 [flash_sp_send_recv_attached]: 9.59961e-07 [receive_attached]: 7.00005e-07 [after_resolve]: 9.47004e-06 [a_after_grad]: 1.179e-05 [special_op_eliminate]: 7.39004e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 8.2003e-07 [auto_monad_grad]: 1.00001e-06 [auto_monad_eliminator]: 1.59499e-05 [cse]: 1.82301e-05 [a_3]: 4.836e-05 [py_interpret_to_execute_after_opt_a]: 8.47992e-06 [slice_cell_reuse_recomputed_activation]: 1.75997e-06 [rewriter_after_opt_a]: 0.00013526 [convert_after_rewriter]: 8.52998e-06 [order_py_execute_after_rewriter]: 5.11005e-06 [opt_b]: 0.00024255, [1] [Cycle 1]: 0.00023766, [7] [b_1]: 0.0001627 [b_2]: 1.017e-05 [updatestate_depend_eliminate]: 5.18991e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.20993e-06 [renormalize]: 3.30037e-07 [cse]: 1.78e-05 [optimize_parallel_all_gather_comm]: 7.76001e-06 [overlap_param_gather]: 1.16997e-06 [cconv]: 1.531e-05 [loop_unroll]: 0.00048922 [opt_after_cconv]: 0.00013114, [1] [Cycle 1]: 0.0001254, [7] [c_1]: 5.115e-05 [parameter_eliminate]: 1.90001e-06 [updatestate_depend_eliminate]: 7.62998e-06 [updatestate_assign_eliminate]: 4.81994e-06 [updatestate_loads_eliminate]: 4.95999e-06 [cse]: 2.073e-05 [renormalize]: 3.29921e-07 [remove_dup_value]: 1.002e-05 [tuple_transform]: 6.958e-05, [1] [Cycle 1]: 6.518e-05, [2] [d_1]: 5.532e-05 [renormalize]: 1.70083e-07 [partial_unused_args_eliminate]: 1.55997e-06 [add_cache_embedding]: 1.09499e-05 [add_recomputation]: 5.882e-05 [cse_after_recomputation]: 2.87499e-05, [1] [Cycle 1]: 2.377e-05, [1] [cse]: 1.827e-05 [environ_conv]: 7.12997e-06 [swap_dp_allreduce_reducescatter]: 8.37003e-06 [bias_add_comm_swap]: 1.61002e-06 [label_micro_interleaved_index]: 1.25007e-06 [label_fine_grained_interleaved_index]: 1.04995e-06 [merge_cast_opt]: 7.3004e-07 [slice_recompute_activation]: 1.02993e-06 [micro_interleaved_order_control]: 1.45996e-06 [assign_add_opt]: 2.546e-05 [ForceFp32Comm]: 9.69972e-07 [remove_cast_before_assign_add]: 6.32007e-06 [full_micro_interleaved_order_control]: 1.52993e-06 [reorder_send_recv_between_fp_bp]: 1.17999e-06 [comm_op_add_attrs]: 2.53799e-05 [add_comm_op_reuse_tag]: 1.43005e-06 [interleave_split_concat_branches]: 7.69971e-07 [interleave_parallel_branches]: 9.10019e-07 [overlap_opt_shard_in_pipeline]: 1.10001e-06 [overlap_opt_shard_grad_in_pipeline]: 1.19e-06 [control_data_broadcast_order]: 6.49947e-07 [grouped_pairwise_exchange_alltoall]: 6.80995e-06 [offloading_packed_experts]: 1.1801e-06 [overlap_recompute_and_grad_model_parallel]: 1.14995e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.49948e-07 [overlap_recompute_allgather_and_fa_grad]: 6.421e-05 [overlap_grad_ring_attention]: 1.25996e-06 [overlap_grad_flash_sp]: 1.211e-05 [begin_end_overlap_inline]: 4.69969e-07 [split_matmul_comm_elemetwise]: 1.69e-06 [split_layernorm_comm]: 1.13994e-06 [handle_group_info]: 3.15008e-06 [symbol_engine_optimizer]: 9.013e-05, [1] [Cycle 1]: 8.535e-05, [6] [build]: 4.54008e-06 [elim_shapecalc]: 1.30599e-05 [elim_not_effective]: 1.788e-05 [opt_reshape]: 9.40997e-06 [fold_const_symbol]: 1.311e-05 [renormalize]: 1.8999e-07 [pipeline_parallel_scheduler]: 8.89995e-07 [auto_monad_reorder]: 2.415e-05 [get_jit_bprop_graph]: 3.19909e-07 [rewriter_after_jit_bprop_graph]: 3.00002e-07 [eliminate_special_op_node]: 0.00050955 [distribtued_split]: 3.35e-05 [validate]: 3.13299e-05 [task_emit]: 0.0719324 [execute]: 8.82999e-06 Sums bootstrap : 0.000329s : 0.42% type_inference : 0.002501s : 3.18% auto_monad : 0.000106s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000036s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000530s : 0.67% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000222s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000018s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000412s : 0.52% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000135s : 0.17% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000489s : 0.62% optimize.opt_after_cconv.c_1 : 0.000051s : 0.06% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000059s : 0.07% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000025s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000025s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000064s : 0.08% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000510s : 0.65% distribtued_split : 0.000034s : 0.04% validate : 0.000031s : 0.04% task_emit : 0.071932s : 91.34% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000112 63 4.34% : 0.000005s : 2: substitution.depend_value_elim 2.11% : 0.000002s : 5: substitution.elim_not_effective 1.75% : 0.000002s : 5: substitution.fold_const_symbol 5.54% : 0.000006s : 6: substitution.graph_param_transform 49.83% : 0.000056s : 1: substitution.inline 4.73% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.52% : 0.000004s : 6: substitution.load_eliminater 2.15% : 0.000002s : 2: substitution.reduce_all_const_elim 6.61% : 0.000007s : 10: substitution.remove_not_recompute_node 2.64% : 0.000003s : 2: substitution.replace_old_param 8.86% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.91% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002477 2 90.58% : 0.002243s : 1: type_inference.infer 9.42% : 0.000233s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000055 1 100.00% : 0.000055s : 1: match.inline ------[predicate.] 0.000231 1420 0.85% : 0.000002s : 13: predicate.accumulaten_eliminater 1.08% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.84% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.23% : 0.000005s : 25: predicate.arithmetic_simplify 0.94% : 0.000002s : 13: predicate.cast_eliminate 0.74% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.20% : 0.000000s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.20% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.87% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.85% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.64% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.23% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.29% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.52% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.17% : 0.000003s : 12: predicate.less_batch_normalization 1.72% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000005s : 38: predicate.load_eliminater 1.42% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.86% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.88% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.28% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.15% : 0.000003s : 13: predicate.reduce_eliminate 0.54% : 0.000001s : 12: predicate.remove_not_recompute_node 1.09% : 0.000003s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.21% : 0.000000s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.86% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.97% : 0.000002s : 12: predicate.shard_identity_eliminate 1.41% : 0.000003s : 18: predicate.special_op_eliminate 1.02% : 0.000002s : 12: predicate.specialize_transform 1.09% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.93% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.40% : 0.000006s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.33% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.94% : 0.000002s : 13: predicate.transpose_eliminate 1.68% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.67% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.52% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.74% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.69% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.70% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.28% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.46% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.58% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000142 4 7.14% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.86% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.092126 192 0.01% : 0.000005s : 1: ForceFp32Comm 0.04% : 0.000037s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.03% : 0.000030s : 1: assign_add_opt 0.13% : 0.000118s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000353s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.03% : 0.000029s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000041s : 1: distribtued_split 0.57% : 0.000522s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000498s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.18% : 0.001087s : 80: opt.transform.opt_a 0.05% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 5.98% : 0.005507s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.27% : 0.000245s : 1: opt_b 7.94% : 0.007311s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000070s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.23% : 0.000216s : 1: renormalize.infer 0.21% : 0.000191s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000141s : 1: rewriter_after_opt_a 0.04% : 0.000037s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 78.11% : 0.071962s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.73% : 0.002517s : 1: type_inference 0.07% : 0.000065s : 1: validate TotalTime = 0.0841458, [21] [bootstrap]: 0.00031787 [type_inference]: 0.00266835 [auto_monad]: 0.00013573 [graph_reusing]: 2.84996e-06 [inline]: 1.47999e-06 [parallel-infer-symbol]: 2.37999e-06 [pre_auto_parallel]: 2.596e-05 [insert-virtual-dataset]: 2.63995e-06 [parallel-infer-symbol-second]: 5.39934e-07 [dataset_repeat_opt]: 1.53994e-06 [pipeline_split]: 1.65007e-06 [optimize]: 0.00757235, [52] [py_interpret_to_execute]: 1.63e-05 [rewriter_before_opt_a]: 3.74001e-05 [opt_a]: 0.00568028, [2] [Cycle 1]: 0.00156556, [43] [expand_dump_flag]: 4.05998e-06 [switch_simplify]: 3.004e-05 [loop_unroll]: 1.349e-05 [a_1]: 0.00035048 [recompute_prepare]: 8.82999e-06 [updatestate_depend_eliminate]: 8.86992e-06 [updatestate_assign_eliminate]: 6.19993e-06 [updatestate_loads_eliminate]: 7.66001e-06 [parameter_eliminate]: 3.06999e-06 [a_2]: 0.00011951 [accelerated_algorithm]: 8.90996e-06 [shard]: 2.40002e-06 [meta_shard_fg_expand]: 3.98001e-06 [shard_inline]: 8.47992e-06 [auto_parallel]: 1.248e-05 [parallel]: 7.98004e-06 [flash_sp]: 1.19399e-05 [merge_comm]: 8.32998e-06 [allreduce_fusion]: 5.52996e-06 [matmul_add_comm_reduction]: 1.10201e-05 [allreduce_slice_to_reducescatter]: 4.69969e-07 [virtual_shard_identity]: 9.55001e-06 [virtual_dataset]: 8.23999e-06 [get_grad_eliminate_]: 7.59994e-06 [virtual_output]: 7.52998e-06 [merge_forward]: 6.19993e-06 [cell_reuse_recompute_pass]: 1.70001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.633e-05 [before_grad]: 1.37599e-05 [inplace_validation]: 5.01995e-06 [meta_fg_expand]: 5.76011e-06 [inplace_validation_after_expand]: 5.96e-06 [flash_sp_send_recv_attached]: 4.92996e-06 [receive_attached]: 2.98e-06 [after_resolve]: 1.176e-05 [a_after_grad]: 1.24701e-05 [special_op_eliminate]: 7.71007e-06 [renormalize]: 0.00045094 [add_forward_monad_depend]: 3.83996e-06 [auto_monad_grad]: 2.01003e-06 [auto_monad_eliminator]: 3.24501e-05 [cse]: 3.32701e-05 [a_3]: 5.75e-05 [Cycle 2]: 0.00078306, [43] [expand_dump_flag]: 9.50065e-07 [switch_simplify]: 8.82999e-06 [loop_unroll]: 7.81997e-06 [a_1]: 0.00020491 [recompute_prepare]: 7.33999e-06 [updatestate_depend_eliminate]: 5.98002e-06 [updatestate_assign_eliminate]: 4.80993e-06 [updatestate_loads_eliminate]: 5.50994e-06 [parameter_eliminate]: 1.25996e-06 [a_2]: 0.00010452 [accelerated_algorithm]: 8.22998e-06 [shard]: 1.34006e-06 [meta_shard_fg_expand]: 2.70992e-06 [shard_inline]: 7.67002e-06 [auto_parallel]: 1.135e-05 [parallel]: 3.62005e-06 [flash_sp]: 3.71004e-06 [merge_comm]: 5.84999e-06 [allreduce_fusion]: 4.78001e-06 [matmul_add_comm_reduction]: 8.10996e-06 [allreduce_slice_to_reducescatter]: 2.69967e-07 [virtual_shard_identity]: 8.28994e-06 [virtual_dataset]: 7.40995e-06 [get_grad_eliminate_]: 7.08993e-06 [virtual_output]: 7.11007e-06 [merge_forward]: 4.73997e-06 [cell_reuse_recompute_pass]: 2.09e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.53299e-05 [before_grad]: 1.261e-05 [inplace_validation]: 4.14008e-06 [meta_fg_expand]: 4.72995e-06 [inplace_validation_after_expand]: 5.10993e-06 [flash_sp_send_recv_attached]: 7.69971e-07 [receive_attached]: 7.89994e-07 [after_resolve]: 1.023e-05 [a_after_grad]: 1.18701e-05 [special_op_eliminate]: 7.40995e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 9.70089e-07 [auto_monad_grad]: 1.17009e-06 [auto_monad_eliminator]: 1.847e-05 [cse]: 1.95999e-05 [a_3]: 4.785e-05 [py_interpret_to_execute_after_opt_a]: 1.015e-05 [slice_cell_reuse_recomputed_activation]: 2.19001e-06 [rewriter_after_opt_a]: 0.00015376 [convert_after_rewriter]: 8.61997e-06 [order_py_execute_after_rewriter]: 5.99003e-06 [opt_b]: 0.00024441, [1] [Cycle 1]: 0.00023902, [7] [b_1]: 0.00016214 [b_2]: 1.007e-05 [updatestate_depend_eliminate]: 5.40004e-06 [updatestate_assign_eliminate]: 4.41005e-06 [updatestate_loads_eliminate]: 5.32996e-06 [renormalize]: 3.69968e-07 [cse]: 1.945e-05 [optimize_parallel_all_gather_comm]: 8.31007e-06 [overlap_param_gather]: 1.33994e-06 [cconv]: 2.428e-05 [loop_unroll]: 0.00051373 [opt_after_cconv]: 0.00013503, [1] [Cycle 1]: 0.00012872, [7] [c_1]: 5.338e-05 [parameter_eliminate]: 2.42994e-06 [updatestate_depend_eliminate]: 8.37003e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.50994e-06 [cse]: 2.209e-05 [renormalize]: 3.89991e-07 [remove_dup_value]: 1.318e-05 [tuple_transform]: 6.964e-05, [1] [Cycle 1]: 6.53199e-05, [2] [d_1]: 5.60699e-05 [renormalize]: 1.80095e-07 [partial_unused_args_eliminate]: 1.84006e-06 [add_cache_embedding]: 1.412e-05 [add_recomputation]: 6.385e-05 [cse_after_recomputation]: 2.788e-05, [1] [Cycle 1]: 2.35001e-05, [1] [cse]: 1.79299e-05 [environ_conv]: 7.81997e-06 [swap_dp_allreduce_reducescatter]: 7.87003e-06 [bias_add_comm_swap]: 2.4999e-06 [label_micro_interleaved_index]: 1.60001e-06 [label_fine_grained_interleaved_index]: 2.39001e-06 [merge_cast_opt]: 9.79984e-07 [slice_recompute_activation]: 1.92993e-06 [micro_interleaved_order_control]: 1.90001e-06 [assign_add_opt]: 2.911e-05 [ForceFp32Comm]: 8.39937e-07 [remove_cast_before_assign_add]: 7.77002e-06 [full_micro_interleaved_order_control]: 2.04996e-06 [reorder_send_recv_between_fp_bp]: 2.61003e-06 [comm_op_add_attrs]: 2.802e-05 [add_comm_op_reuse_tag]: 1.85007e-06 [interleave_split_concat_branches]: 9.29926e-07 [interleave_parallel_branches]: 6.49947e-07 [overlap_opt_shard_in_pipeline]: 1.12003e-06 [overlap_opt_shard_grad_in_pipeline]: 2.33005e-06 [control_data_broadcast_order]: 1.12993e-06 [grouped_pairwise_exchange_alltoall]: 9.56992e-06 [offloading_packed_experts]: 2.16998e-06 [overlap_recompute_and_grad_model_parallel]: 1.91003e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.10019e-07 [overlap_recompute_allgather_and_fa_grad]: 6.777e-05 [overlap_grad_ring_attention]: 2.36009e-06 [overlap_grad_flash_sp]: 1.57199e-05 [begin_end_overlap_inline]: 8.30041e-07 [split_matmul_comm_elemetwise]: 1.81003e-06 [split_layernorm_comm]: 2.14996e-06 [handle_group_info]: 5.25999e-06 [symbol_engine_optimizer]: 9.15399e-05, [1] [Cycle 1]: 8.576e-05, [6] [build]: 5.12006e-06 [elim_shapecalc]: 1.33801e-05 [elim_not_effective]: 1.759e-05 [opt_reshape]: 9.09995e-06 [fold_const_symbol]: 1.362e-05 [renormalize]: 3.19909e-07 [pipeline_parallel_scheduler]: 1.69e-06 [auto_monad_reorder]: 3.291e-05 [get_jit_bprop_graph]: 5.00004e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.0005232 [distribtued_split]: 4.15e-05 [validate]: 3.48601e-05 [task_emit]: 0.0725217 [execute]: 1.151e-05 Sums bootstrap : 0.000318s : 0.40% type_inference : 0.002668s : 3.34% auto_monad : 0.000136s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000555s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000451s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.06% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000105s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000154s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.20% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000514s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000008s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000068s : 0.08% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000523s : 0.66% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.072522s : 90.87% execute : 0.000012s : 0.01% Time group info: ------[substitution.] 0.000136 63 4.98% : 0.000007s : 2: substitution.depend_value_elim 2.03% : 0.000003s : 5: substitution.elim_not_effective 1.76% : 0.000002s : 5: substitution.fold_const_symbol 5.27% : 0.000007s : 6: substitution.graph_param_transform 50.79% : 0.000069s : 1: substitution.inline 3.91% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.25% : 0.000004s : 6: substitution.load_eliminater 2.74% : 0.000004s : 2: substitution.reduce_all_const_elim 5.43% : 0.000007s : 10: substitution.remove_not_recompute_node 2.65% : 0.000004s : 2: substitution.replace_old_param 9.19% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.01% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002639 2 89.07% : 0.002350s : 1: type_inference.infer 10.93% : 0.000288s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000232 1420 0.73% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.88% : 0.000002s : 13: predicate.addn_zero_filter 0.70% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.31% : 0.000005s : 25: predicate.arithmetic_simplify 0.93% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.69% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.50% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.76% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.83% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.67% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.21% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 2.05% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.83% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.34% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.15% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.81% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.85% : 0.000014s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.13% : 0.000003s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.35% : 0.000005s : 38: predicate.load_eliminater 1.39% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.25% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.67% : 0.000002s : 6: predicate.mutable_eliminate 0.41% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.12% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000003s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.93% : 0.000002s : 13: predicate.reshape_eliminate 0.85% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 0.97% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.43% : 0.000003s : 18: predicate.special_op_eliminate 1.03% : 0.000002s : 12: predicate.specialize_transform 0.97% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.36% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.56% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.42% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.79% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.69% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.52% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.27% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.43% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.76% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000166 4 10.24% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.76% : 0.000149s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.093563 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000069s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000148s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000342s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.03% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.05% : 0.000050s : 1: distribtued_split 0.57% : 0.000537s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000009s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000524s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.19% : 0.001110s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.16% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.08% : 0.005684s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.26% : 0.000247s : 1: opt_b 8.10% : 0.007581s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000243s : 1: renormalize.infer 0.22% : 0.000202s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000160s : 1: rewriter_after_opt_a 0.04% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 77.54% : 0.072551s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.87% : 0.002687s : 1: type_inference 0.07% : 0.000070s : 1: validate [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:30.019.311 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:30.019.826 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:30.020.004 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:30.020.003 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:30.020.139 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:30.020.139 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:30.020.325 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:30.020.555 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.0788564, [21] [bootstrap]: 0.00028966 [type_inference]: 0.00224243 [auto_monad]: 0.00010399 [graph_reusing]: 1.87999e-06 [inline]: 1.32993e-06 [parallel-infer-symbol]: 1.43005e-06 [pre_auto_parallel]: 2.113e-05 [insert-virtual-dataset]: 2.22004e-06 [parallel-infer-symbol-second]: 4.69969e-07 [dataset_repeat_opt]: 8.2003e-07 [pipeline_split]: 1.45996e-06 [optimize]: 0.00702861, [52] [py_interpret_to_execute]: 1.26801e-05 [rewriter_before_opt_a]: 3.16199e-05 [opt_a]: 0.00524191, [2] [Cycle 1]: 0.00146806, [43] [expand_dump_flag]: 2.44007e-06 [switch_simplify]: 2.564e-05 [loop_unroll]: 1.289e-05 [a_1]: 0.00032313 [recompute_prepare]: 9.21998e-06 [updatestate_depend_eliminate]: 7.67002e-06 [updatestate_assign_eliminate]: 4.99003e-06 [updatestate_loads_eliminate]: 6.61996e-06 [parameter_eliminate]: 2.13995e-06 [a_2]: 0.00011642 [accelerated_algorithm]: 8.00996e-06 [shard]: 1.64995e-06 [meta_shard_fg_expand]: 3.02005e-06 [shard_inline]: 8.62998e-06 [auto_parallel]: 1.224e-05 [parallel]: 5.51005e-06 [flash_sp]: 7.77002e-06 [merge_comm]: 6.62007e-06 [allreduce_fusion]: 5.16989e-06 [matmul_add_comm_reduction]: 9.49006e-06 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 9.47004e-06 [virtual_dataset]: 8.02998e-06 [get_grad_eliminate_]: 8.01997e-06 [virtual_output]: 7.71997e-06 [merge_forward]: 5.29003e-06 [cell_reuse_recompute_pass]: 1.47009e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.682e-05 [before_grad]: 1.413e-05 [inplace_validation]: 5.27001e-06 [meta_fg_expand]: 5.42006e-06 [inplace_validation_after_expand]: 5.98002e-06 [flash_sp_send_recv_attached]: 3.38e-06 [receive_attached]: 1.95997e-06 [after_resolve]: 1.13001e-05 [a_after_grad]: 1.253e-05 [special_op_eliminate]: 8.11007e-06 [renormalize]: 0.00042878 [add_forward_monad_depend]: 2.72004e-06 [auto_monad_grad]: 1.34995e-06 [auto_monad_eliminator]: 2.47e-05 [cse]: 2.675e-05 [a_3]: 5.703e-05 [Cycle 2]: 0.0008355, [43] [expand_dump_flag]: 7.60076e-07 [switch_simplify]: 8.90007e-06 [loop_unroll]: 7.80995e-06 [a_1]: 0.00020068 [recompute_prepare]: 7.27002e-06 [updatestate_depend_eliminate]: 5.96e-06 [updatestate_assign_eliminate]: 4.7799e-06 [updatestate_loads_eliminate]: 5.18002e-06 [parameter_eliminate]: 1.13004e-06 [a_2]: 0.00010473 [accelerated_algorithm]: 8.61997e-06 [shard]: 1.20001e-06 [meta_shard_fg_expand]: 2.46998e-06 [shard_inline]: 7.72998e-06 [auto_parallel]: 1.03e-05 [parallel]: 3.15998e-06 [flash_sp]: 2.62994e-06 [merge_comm]: 5.62007e-06 [allreduce_fusion]: 4.93007e-06 [matmul_add_comm_reduction]: 7.45e-06 [allreduce_slice_to_reducescatter]: 2.69967e-07 [virtual_shard_identity]: 8.69005e-06 [virtual_dataset]: 7.51996e-06 [get_grad_eliminate_]: 7.33999e-06 [virtual_output]: 7.19004e-06 [merge_forward]: 4.63997e-06 [cell_reuse_recompute_pass]: 1.70001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.62601e-05 [before_grad]: 1.319e-05 [inplace_validation]: 4.25999e-06 [meta_fg_expand]: 4.62006e-06 [inplace_validation_after_expand]: 5.11005e-06 [flash_sp_send_recv_attached]: 8.00006e-07 [receive_attached]: 8.49948e-07 [after_resolve]: 9.81998e-06 [a_after_grad]: 1.181e-05 [special_op_eliminate]: 7.47002e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 9.2003e-07 [auto_monad_grad]: 9.59961e-07 [auto_monad_eliminator]: 1.564e-05 [cse]: 1.831e-05 [a_3]: 0.00010092 [py_interpret_to_execute_after_opt_a]: 1.034e-05 [slice_cell_reuse_recomputed_activation]: 1.74996e-06 [rewriter_after_opt_a]: 0.00013419 [convert_after_rewriter]: 9.01998e-06 [order_py_execute_after_rewriter]: 5.74999e-06 [opt_b]: 0.00024323, [1] [Cycle 1]: 0.0002378, [7] [b_1]: 0.00016404 [b_2]: 1.01801e-05 [updatestate_depend_eliminate]: 5.24998e-06 [updatestate_assign_eliminate]: 4.25999e-06 [updatestate_loads_eliminate]: 5.14998e-06 [renormalize]: 3.7998e-07 [cse]: 1.74401e-05 [optimize_parallel_all_gather_comm]: 8.1301e-06 [overlap_param_gather]: 8.00006e-07 [cconv]: 1.65299e-05 [loop_unroll]: 0.00048649 [opt_after_cconv]: 0.00012996, [1] [Cycle 1]: 0.00012395, [7] [c_1]: 5.214e-05 [parameter_eliminate]: 1.89e-06 [updatestate_depend_eliminate]: 7.26001e-06 [updatestate_assign_eliminate]: 4.49002e-06 [updatestate_loads_eliminate]: 5.19992e-06 [cse]: 1.956e-05 [renormalize]: 3.00002e-07 [remove_dup_value]: 1.04e-05 [tuple_transform]: 6.81099e-05, [1] [Cycle 1]: 6.389e-05, [2] [d_1]: 5.44101e-05 [renormalize]: 1.60071e-07 [partial_unused_args_eliminate]: 1.52003e-06 [add_cache_embedding]: 1.104e-05 [add_recomputation]: 5.29001e-05 [cse_after_recomputation]: 2.494e-05, [1] [Cycle 1]: 2.08099e-05, [1] [cse]: 1.592e-05 [environ_conv]: 6.73998e-06 [swap_dp_allreduce_reducescatter]: 7.27002e-06 [bias_add_comm_swap]: 1.61002e-06 [label_micro_interleaved_index]: 1.71002e-06 [label_fine_grained_interleaved_index]: 1.35996e-06 [merge_cast_opt]: 8.69972e-07 [slice_recompute_activation]: 1.16008e-06 [micro_interleaved_order_control]: 1.47999e-06 [assign_add_opt]: 2.52801e-05 [ForceFp32Comm]: 1.01002e-06 [remove_cast_before_assign_add]: 6.31006e-06 [full_micro_interleaved_order_control]: 1.51992e-06 [reorder_send_recv_between_fp_bp]: 1.30001e-06 [comm_op_add_attrs]: 2.40699e-05 [add_comm_op_reuse_tag]: 1.59e-06 [interleave_split_concat_branches]: 5.49946e-07 [interleave_parallel_branches]: 5.50062e-07 [overlap_opt_shard_in_pipeline]: 6.50063e-07 [overlap_opt_shard_grad_in_pipeline]: 1.47999e-06 [control_data_broadcast_order]: 8.39937e-07 [grouped_pairwise_exchange_alltoall]: 6.15011e-06 [offloading_packed_experts]: 1.75007e-06 [overlap_recompute_and_grad_model_parallel]: 1.19e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.59958e-07 [overlap_recompute_allgather_and_fa_grad]: 8.762e-05 [overlap_grad_ring_attention]: 1.61992e-06 [overlap_grad_flash_sp]: 1.249e-05 [begin_end_overlap_inline]: 5.00004e-07 [split_matmul_comm_elemetwise]: 1.17999e-06 [split_layernorm_comm]: 1.05007e-06 [handle_group_info]: 3.19001e-06 [symbol_engine_optimizer]: 8.99499e-05, [1] [Cycle 1]: 8.525e-05, [6] [build]: 5.01995e-06 [elim_shapecalc]: 1.266e-05 [elim_not_effective]: 1.64299e-05 [opt_reshape]: 8.74e-06 [fold_const_symbol]: 1.352e-05 [renormalize]: 2.49944e-07 [pipeline_parallel_scheduler]: 1.06008e-06 [auto_monad_reorder]: 2.543e-05 [get_jit_bprop_graph]: 3.30037e-07 [rewriter_after_jit_bprop_graph]: 3.00002e-07 [eliminate_special_op_node]: 0.00049486 [distribtued_split]: 3.47e-05 [validate]: 3.178e-05 [task_emit]: 0.0683295 [execute]: 8.62998e-06 Sums bootstrap : 0.000290s : 0.39% type_inference : 0.002242s : 2.99% auto_monad : 0.000104s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000032s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000524s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000221s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000005s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000012s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000429s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000158s : 0.21% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000134s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000164s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000017s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000486s : 0.65% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000053s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000025s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000024s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000088s : 0.12% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000025s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000495s : 0.66% distribtued_split : 0.000035s : 0.05% validate : 0.000032s : 0.04% task_emit : 0.068330s : 91.21% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000113 63 4.87% : 0.000006s : 2: substitution.depend_value_elim 1.99% : 0.000002s : 5: substitution.elim_not_effective 1.74% : 0.000002s : 5: substitution.fold_const_symbol 5.50% : 0.000006s : 6: substitution.graph_param_transform 48.35% : 0.000055s : 1: substitution.inline 5.17% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.62% : 0.000004s : 6: substitution.load_eliminater 2.46% : 0.000003s : 2: substitution.reduce_all_const_elim 6.54% : 0.000007s : 10: substitution.remove_not_recompute_node 2.61% : 0.000003s : 2: substitution.replace_old_param 9.34% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.81% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002219 2 89.94% : 0.001996s : 1: type_inference.infer 10.06% : 0.000223s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000229 1420 0.83% : 0.000002s : 13: predicate.accumulaten_eliminater 1.10% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.75% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.13% : 0.000005s : 25: predicate.arithmetic_simplify 0.76% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.54% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.29% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.95% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.24% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.22% : 0.000003s : 19: predicate.environ_get_depend_swap 1.92% : 0.000004s : 31: predicate.environ_get_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.18% : 0.000003s : 14: predicate.float_depend_g_call 0.70% : 0.000002s : 12: predicate.float_environ_get_switch 1.17% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.31% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.47% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.68% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.18% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.85% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.78% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.80% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.47% : 0.000001s : 6: predicate.parallel_virtual_node 1.20% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.06% : 0.000002s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 1.07% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.50% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.41% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.78% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.08% : 0.000009s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.88% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.82% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.58% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.83% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.89% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.35% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.41% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000144 4 7.81% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.19% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087716 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000057s : 1: add_recomputation 0.03% : 0.000029s : 1: assign_add_opt 0.13% : 0.000116s : 1: auto_monad 0.04% : 0.000032s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.36% : 0.000315s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.03% : 0.000028s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.58% : 0.000508s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000009s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000004s : 1: label_micro_interleaved_index 0.57% : 0.000496s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.28% : 0.001126s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.18% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.98% : 0.005246s : 1: opt_a 0.15% : 0.000134s : 1: opt_after_cconv 0.28% : 0.000246s : 1: opt_b 8.02% : 0.007038s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.11% : 0.000093s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000018s : 1: py_interpret_to_execute 0.02% : 0.000015s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.25% : 0.000218s : 1: renormalize.infer 0.23% : 0.000206s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000140s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000093s : 1: symbol_engine_optimizer 77.93% : 0.068355s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.58% : 0.002259s : 1: type_inference 0.07% : 0.000064s : 1: validate TotalTime = 0.0805719, [21] [bootstrap]: 0.00032239 [type_inference]: 0.00263478 [auto_monad]: 0.00013333 [graph_reusing]: 2.68e-06 [inline]: 1.34006e-06 [parallel-infer-symbol]: 2.50002e-06 [pre_auto_parallel]: 2.629e-05 [insert-virtual-dataset]: 2.84007e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 1.45996e-06 [pipeline_split]: 1.55997e-06 [optimize]: 0.00737817, [52] [py_interpret_to_execute]: 1.678e-05 [rewriter_before_opt_a]: 3.58401e-05 [opt_a]: 0.00553072, [2] [Cycle 1]: 0.00164356, [43] [expand_dump_flag]: 4.02005e-06 [switch_simplify]: 3.021e-05 [loop_unroll]: 1.332e-05 [a_1]: 0.00040428 [recompute_prepare]: 9.26002e-06 [updatestate_depend_eliminate]: 8.75001e-06 [updatestate_assign_eliminate]: 6.42997e-06 [updatestate_loads_eliminate]: 7.17002e-06 [parameter_eliminate]: 3.72995e-06 [a_2]: 0.00011592 [accelerated_algorithm]: 8.42009e-06 [shard]: 2.39001e-06 [meta_shard_fg_expand]: 4.05998e-06 [shard_inline]: 8.57003e-06 [auto_parallel]: 1.271e-05 [parallel]: 7.00995e-06 [flash_sp]: 1.14901e-05 [merge_comm]: 8.56002e-06 [allreduce_fusion]: 5.31005e-06 [matmul_add_comm_reduction]: 1.082e-05 [allreduce_slice_to_reducescatter]: 5.39934e-07 [virtual_shard_identity]: 9.47004e-06 [virtual_dataset]: 7.97003e-06 [get_grad_eliminate_]: 7.71007e-06 [virtual_output]: 7.28003e-06 [merge_forward]: 5.81995e-06 [cell_reuse_recompute_pass]: 2.11003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.749e-05 [before_grad]: 1.428e-05 [inplace_validation]: 5.24009e-06 [meta_fg_expand]: 5.82996e-06 [inplace_validation_after_expand]: 6.84999e-06 [flash_sp_send_recv_attached]: 5.10004e-06 [receive_attached]: 3.02005e-06 [after_resolve]: 1.171e-05 [a_after_grad]: 1.24e-05 [special_op_eliminate]: 7.80006e-06 [renormalize]: 0.00047305 [add_forward_monad_depend]: 3.78001e-06 [auto_monad_grad]: 2.04996e-06 [auto_monad_eliminator]: 3.585e-05 [cse]: 3.54099e-05 [a_3]: 5.766e-05 [Cycle 2]: 0.0008517, [43] [expand_dump_flag]: 1.04995e-06 [switch_simplify]: 8.61997e-06 [loop_unroll]: 7.72008e-06 [a_1]: 0.00020138 [recompute_prepare]: 7.48003e-06 [updatestate_depend_eliminate]: 6.37001e-06 [updatestate_assign_eliminate]: 4.99003e-06 [updatestate_loads_eliminate]: 5.77001e-06 [parameter_eliminate]: 1.51002e-06 [a_2]: 0.00010433 [accelerated_algorithm]: 8.13999e-06 [shard]: 1.44995e-06 [meta_shard_fg_expand]: 2.49001e-06 [shard_inline]: 8.09995e-06 [auto_parallel]: 1.13799e-05 [parallel]: 3.60003e-06 [flash_sp]: 3.42994e-06 [merge_comm]: 5.84999e-06 [allreduce_fusion]: 5.47001e-06 [matmul_add_comm_reduction]: 8.05e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 8.46991e-06 [virtual_dataset]: 7.67992e-06 [get_grad_eliminate_]: 7.39994e-06 [virtual_output]: 7.52998e-06 [merge_forward]: 4.81994e-06 [cell_reuse_recompute_pass]: 1.87999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.497e-05 [before_grad]: 1.27499e-05 [inplace_validation]: 4.40003e-06 [meta_fg_expand]: 4.89003e-06 [inplace_validation_after_expand]: 5.39003e-06 [flash_sp_send_recv_attached]: 9.79984e-07 [receive_attached]: 1.05996e-06 [after_resolve]: 9.76992e-06 [a_after_grad]: 1.21701e-05 [special_op_eliminate]: 1.105e-05 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 1.92993e-06 [auto_monad_grad]: 1.63994e-06 [auto_monad_eliminator]: 2.49e-05 [cse]: 2.255e-05 [a_3]: 4.98401e-05 [py_interpret_to_execute_after_opt_a]: 1.105e-05 [slice_cell_reuse_recomputed_activation]: 2.27999e-06 [rewriter_after_opt_a]: 0.00013946 [convert_after_rewriter]: 1.224e-05 [order_py_execute_after_rewriter]: 6.23998e-06 [opt_b]: 0.00024238, [1] [Cycle 1]: 0.00023702, [7] [b_1]: 0.00016201 [b_2]: 9.91998e-06 [updatestate_depend_eliminate]: 5.51005e-06 [updatestate_assign_eliminate]: 4.72006e-06 [updatestate_loads_eliminate]: 5.43008e-06 [renormalize]: 2.60072e-07 [cse]: 1.84999e-05 [optimize_parallel_all_gather_comm]: 8.97993e-06 [overlap_param_gather]: 1.13004e-06 [cconv]: 2.406e-05 [loop_unroll]: 0.00049438 [opt_after_cconv]: 0.00013471, [1] [Cycle 1]: 0.00012874, [7] [c_1]: 5.317e-05 [parameter_eliminate]: 2.56009e-06 [updatestate_depend_eliminate]: 8.11997e-06 [updatestate_assign_eliminate]: 4.77e-06 [updatestate_loads_eliminate]: 5.68002e-06 [cse]: 2.166e-05 [renormalize]: 3.59956e-07 [remove_dup_value]: 1.238e-05 [tuple_transform]: 6.922e-05, [1] [Cycle 1]: 6.514e-05, [2] [d_1]: 5.57001e-05 [renormalize]: 2.39932e-07 [partial_unused_args_eliminate]: 2.07999e-06 [add_cache_embedding]: 1.298e-05 [add_recomputation]: 6.59301e-05 [cse_after_recomputation]: 2.627e-05, [1] [Cycle 1]: 2.152e-05, [1] [cse]: 1.669e-05 [environ_conv]: 7.61007e-06 [swap_dp_allreduce_reducescatter]: 8.11007e-06 [bias_add_comm_swap]: 2.33005e-06 [label_micro_interleaved_index]: 1.96998e-06 [label_fine_grained_interleaved_index]: 2.43005e-06 [merge_cast_opt]: 1.03004e-06 [slice_recompute_activation]: 1.66998e-06 [micro_interleaved_order_control]: 1.85997e-06 [assign_add_opt]: 2.842e-05 [ForceFp32Comm]: 8.60076e-07 [remove_cast_before_assign_add]: 7.35e-06 [full_micro_interleaved_order_control]: 2.07999e-06 [reorder_send_recv_between_fp_bp]: 2.13995e-06 [comm_op_add_attrs]: 2.77701e-05 [add_comm_op_reuse_tag]: 2.22994e-06 [interleave_split_concat_branches]: 7.89994e-07 [interleave_parallel_branches]: 8.40053e-07 [overlap_opt_shard_in_pipeline]: 1.22003e-06 [overlap_opt_shard_grad_in_pipeline]: 2.36009e-06 [control_data_broadcast_order]: 1.03004e-06 [grouped_pairwise_exchange_alltoall]: 9.54e-06 [offloading_packed_experts]: 2.21003e-06 [overlap_recompute_and_grad_model_parallel]: 2.26009e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.10018e-07 [overlap_recompute_allgather_and_fa_grad]: 6.894e-05 [overlap_grad_ring_attention]: 2.09e-06 [overlap_grad_flash_sp]: 1.43601e-05 [begin_end_overlap_inline]: 7.89994e-07 [split_matmul_comm_elemetwise]: 2.11003e-06 [split_layernorm_comm]: 1.60001e-06 [handle_group_info]: 5.25999e-06 [symbol_engine_optimizer]: 8.96701e-05, [1] [Cycle 1]: 8.48799e-05, [6] [build]: 5.22996e-06 [elim_shapecalc]: 1.303e-05 [elim_not_effective]: 1.63e-05 [opt_reshape]: 8.87003e-06 [fold_const_symbol]: 1.414e-05 [renormalize]: 3.10014e-07 [pipeline_parallel_scheduler]: 1.59e-06 [auto_monad_reorder]: 3.139e-05 [get_jit_bprop_graph]: 4.39934e-07 [rewriter_after_jit_bprop_graph]: 4.29922e-07 [eliminate_special_op_node]: 0.00051362 [distribtued_split]: 4.14e-05 [validate]: 3.497e-05 [task_emit]: 0.0691737 [execute]: 1.173e-05 Sums bootstrap : 0.000322s : 0.42% type_inference : 0.002635s : 3.45% auto_monad : 0.000133s : 0.17% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000017s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000606s : 0.79% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.01% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.01% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000019s : 0.02% optimize.opt_a.renormalize : 0.000473s : 0.62% optimize.opt_a.add_forward_monad_depend : 0.000006s : 0.01% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000061s : 0.08% optimize.opt_a.cse : 0.000058s : 0.08% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000011s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000139s : 0.18% optimize.convert_after_rewriter : 0.000012s : 0.02% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000494s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000066s : 0.09% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000069s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000514s : 0.67% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.069174s : 90.45% execute : 0.000012s : 0.02% TotalTime = 0.0807748, [21] [bootstrap]: 0.00031946 [type_inference]: 0.00268285 [auto_monad]: 9.262e-05 [graph_reusing]: 1.51002e-06 [inline]: 1.11002e-06 [parallel-infer-symbol]: 1.19e-06 [pre_auto_parallel]: 1.99099e-05 [insert-virtual-dataset]: 1.46998e-06 [parallel-infer-symbol-second]: 3.49944e-07 [dataset_repeat_opt]: 6.89994e-07 [pipeline_split]: 9.00007e-07 [optimize]: 0.00722251, [52] [py_interpret_to_execute]: 1.229e-05 [rewriter_before_opt_a]: 2.849e-05 [opt_a]: 0.00531552, [2] [Cycle 1]: 0.00156272, [43] [expand_dump_flag]: 3.49991e-06 [switch_simplify]: 2.934e-05 [loop_unroll]: 1.37399e-05 [a_1]: 0.00033822 [recompute_prepare]: 9.24e-06 [updatestate_depend_eliminate]: 8.51997e-06 [updatestate_assign_eliminate]: 5.35999e-06 [updatestate_loads_eliminate]: 6.84999e-06 [parameter_eliminate]: 3.44007e-06 [a_2]: 0.0001502 [accelerated_algorithm]: 8.87003e-06 [shard]: 1.37999e-06 [meta_shard_fg_expand]: 3.03006e-06 [shard_inline]: 8.70007e-06 [auto_parallel]: 1.163e-05 [parallel]: 6.81006e-06 [flash_sp]: 1.032e-05 [merge_comm]: 7.40995e-06 [allreduce_fusion]: 5.44009e-06 [matmul_add_comm_reduction]: 1.039e-05 [allreduce_slice_to_reducescatter]: 3.29921e-07 [virtual_shard_identity]: 9.91998e-06 [virtual_dataset]: 8.51997e-06 [get_grad_eliminate_]: 8.11997e-06 [virtual_output]: 8.03999e-06 [merge_forward]: 5.15999e-06 [cell_reuse_recompute_pass]: 1.84996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.67801e-05 [before_grad]: 1.35e-05 [inplace_validation]: 4.34997e-06 [meta_fg_expand]: 5.09992e-06 [inplace_validation_after_expand]: 5.63008e-06 [flash_sp_send_recv_attached]: 3.55998e-06 [receive_attached]: 2.86009e-06 [after_resolve]: 1.12399e-05 [a_after_grad]: 1.31701e-05 [special_op_eliminate]: 8.31997e-06 [renormalize]: 0.00044588 [add_forward_monad_depend]: 3.06999e-06 [auto_monad_grad]: 1.32993e-06 [auto_monad_eliminator]: 2.39101e-05 [cse]: 2.587e-05 [a_3]: 6.10801e-05 [Cycle 2]: 0.00079182, [43] [expand_dump_flag]: 1.05007e-06 [switch_simplify]: 9.26002e-06 [loop_unroll]: 7.93999e-06 [a_1]: 0.00020216 [recompute_prepare]: 7.60006e-06 [updatestate_depend_eliminate]: 6.30005e-06 [updatestate_assign_eliminate]: 5.12006e-06 [updatestate_loads_eliminate]: 5.10993e-06 [parameter_eliminate]: 1.36008e-06 [a_2]: 0.00010819 [accelerated_algorithm]: 8.38994e-06 [shard]: 1.32003e-06 [meta_shard_fg_expand]: 2.56009e-06 [shard_inline]: 8.02998e-06 [auto_parallel]: 1.107e-05 [parallel]: 4.12995e-06 [flash_sp]: 2.25008e-06 [merge_comm]: 5.91995e-06 [allreduce_fusion]: 4.97e-06 [matmul_add_comm_reduction]: 8.00996e-06 [allreduce_slice_to_reducescatter]: 2.30037e-07 [virtual_shard_identity]: 9.25991e-06 [virtual_dataset]: 8.06001e-06 [get_grad_eliminate_]: 7.45989e-06 [virtual_output]: 7.48993e-06 [merge_forward]: 4.37e-06 [cell_reuse_recompute_pass]: 1.84006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.546e-05 [before_grad]: 1.26e-05 [inplace_validation]: 4.58001e-06 [meta_fg_expand]: 4.78001e-06 [inplace_validation_after_expand]: 5.3799e-06 [flash_sp_send_recv_attached]: 9.10019e-07 [receive_attached]: 7.70087e-07 [after_resolve]: 1.001e-05 [a_after_grad]: 1.258e-05 [special_op_eliminate]: 7.47002e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 8.30041e-07 [auto_monad_grad]: 1.14006e-06 [auto_monad_eliminator]: 1.829e-05 [cse]: 1.994e-05 [a_3]: 4.999e-05 [py_interpret_to_execute_after_opt_a]: 9.65002e-06 [slice_cell_reuse_recomputed_activation]: 1.15996e-06 [rewriter_after_opt_a]: 0.0001264 [convert_after_rewriter]: 1.205e-05 [order_py_execute_after_rewriter]: 6.57002e-06 [opt_b]: 0.00025497, [1] [Cycle 1]: 0.00024813, [7] [b_1]: 0.00016926 [b_2]: 1.027e-05 [updatestate_depend_eliminate]: 5.50994e-06 [updatestate_assign_eliminate]: 4.72006e-06 [updatestate_loads_eliminate]: 5.46e-06 [renormalize]: 3.20026e-07 [cse]: 2.00601e-05 [optimize_parallel_all_gather_comm]: 9.72999e-06 [overlap_param_gather]: 1.09e-06 [cconv]: 2.424e-05 [loop_unroll]: 0.00049339 [opt_after_cconv]: 0.00013767, [1] [Cycle 1]: 0.00013142, [7] [c_1]: 5.495e-05 [parameter_eliminate]: 2.70992e-06 [updatestate_depend_eliminate]: 8.27003e-06 [updatestate_assign_eliminate]: 4.88991e-06 [updatestate_loads_eliminate]: 5.43997e-06 [cse]: 2.227e-05 [renormalize]: 5.20027e-07 [remove_dup_value]: 9.67004e-06 [tuple_transform]: 6.99101e-05, [1] [Cycle 1]: 6.538e-05, [2] [d_1]: 5.524e-05 [renormalize]: 1.30036e-07 [partial_unused_args_eliminate]: 1.61992e-06 [add_cache_embedding]: 1.27801e-05 [add_recomputation]: 6.291e-05 [cse_after_recomputation]: 2.64801e-05, [1] [Cycle 1]: 2.20799e-05, [1] [cse]: 1.697e-05 [environ_conv]: 8.08004e-06 [swap_dp_allreduce_reducescatter]: 7.67992e-06 [bias_add_comm_swap]: 2.21003e-06 [label_micro_interleaved_index]: 2.24996e-06 [label_fine_grained_interleaved_index]: 1.86998e-06 [merge_cast_opt]: 7.59959e-07 [slice_recompute_activation]: 2.03005e-06 [micro_interleaved_order_control]: 1.89e-06 [assign_add_opt]: 2.938e-05 [ForceFp32Comm]: 8.19913e-07 [remove_cast_before_assign_add]: 7.10005e-06 [full_micro_interleaved_order_control]: 1.75997e-06 [reorder_send_recv_between_fp_bp]: 2.36998e-06 [comm_op_add_attrs]: 2.795e-05 [add_comm_op_reuse_tag]: 1.49e-06 [interleave_split_concat_branches]: 9.30042e-07 [interleave_parallel_branches]: 1.02003e-06 [overlap_opt_shard_in_pipeline]: 1.11002e-06 [overlap_opt_shard_grad_in_pipeline]: 2.10002e-06 [control_data_broadcast_order]: 1.20001e-06 [grouped_pairwise_exchange_alltoall]: 9.47993e-06 [offloading_packed_experts]: 1.75997e-06 [overlap_recompute_and_grad_model_parallel]: 2.23995e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.29925e-07 [overlap_recompute_allgather_and_fa_grad]: 6.927e-05 [overlap_grad_ring_attention]: 1.76998e-06 [overlap_grad_flash_sp]: 1.56e-05 [begin_end_overlap_inline]: 7.20029e-07 [split_matmul_comm_elemetwise]: 1.84006e-06 [split_layernorm_comm]: 1.67999e-06 [handle_group_info]: 5.50994e-06 [symbol_engine_optimizer]: 9.259e-05, [1] [Cycle 1]: 8.733e-05, [6] [build]: 5.04998e-06 [elim_shapecalc]: 1.382e-05 [elim_not_effective]: 1.726e-05 [opt_reshape]: 9.45001e-06 [fold_const_symbol]: 1.32701e-05 [renormalize]: 4.00003e-07 [pipeline_parallel_scheduler]: 8.30041e-07 [auto_monad_reorder]: 3.091e-05 [get_jit_bprop_graph]: 2.79979e-07 [rewriter_after_jit_bprop_graph]: 4.7998e-07 [eliminate_special_op_node]: 0.00053743 [distribtued_split]: 3.26601e-05 [validate]: 3.376e-05 [task_emit]: 0.0695506 [execute]: 6.92997e-06 Sums bootstrap : 0.000319s : 0.42% type_inference : 0.002683s : 3.50% auto_monad : 0.000093s : 0.12% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000001s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000028s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000540s : 0.70% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000258s : 0.34% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000017s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000026s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000446s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000042s : 0.05% optimize.opt_a.cse : 0.000046s : 0.06% optimize.opt_a.a_3 : 0.000111s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000126s : 0.16% optimize.convert_after_rewriter : 0.000012s : 0.02% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000169s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000010s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000493s : 0.64% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000069s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000537s : 0.70% distribtued_split : 0.000033s : 0.04% validate : 0.000034s : 0.04% task_emit : 0.069551s : 90.64% execute : 0.000007s : 0.01% Time group info: ------[substitution.] 0.000137 63 4.85% : 0.000007s : 2: substitution.depend_value_elim 1.94% : 0.000003s : 5: substitution.elim_not_effective 2.08% : 0.000003s : 5: substitution.fold_const_symbol 5.38% : 0.000007s : 6: substitution.graph_param_transform 50.23% : 0.000069s : 1: substitution.inline 4.21% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.20% : 0.000004s : 6: substitution.load_eliminater 2.62% : 0.000004s : 2: substitution.reduce_all_const_elim 5.94% : 0.000008s : 10: substitution.remove_not_recompute_node 2.93% : 0.000004s : 2: substitution.replace_old_param 8.76% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.85% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002605 2 86.79% : 0.002261s : 1: type_inference.infer 13.21% : 0.000344s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000232 1420 0.76% : 0.000002s : 13: predicate.accumulaten_eliminater 1.20% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.72% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.85% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.24% : 0.000005s : 25: predicate.arithmetic_simplify 0.95% : 0.000002s : 13: predicate.cast_eliminate 0.95% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000000s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.23% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.26% : 0.000001s : 6: predicate.elim_not_effective 0.69% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.14% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 2.12% : 0.000005s : 31: predicate.environ_get_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.88% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.24% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.80% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.70% : 0.000013s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.94% : 0.000002s : 12: predicate.less_batch_normalization 1.61% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.34% : 0.000005s : 38: predicate.load_eliminater 1.35% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.19% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.83% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.74% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.54% : 0.000001s : 6: predicate.parallel_virtual_node 1.17% : 0.000003s : 14: predicate.partial_defer_inline 1.26% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.95% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.63% : 0.000001s : 12: predicate.remove_not_recompute_node 1.12% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.90% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 1.07% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.88% : 0.000002s : 12: predicate.shard_identity_eliminate 1.70% : 0.000004s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.07% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.31% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.84% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.55% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.62% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.57% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.24% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.58% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000158 4 10.59% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.41% : 0.000142s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089865 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000070s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000146s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000350s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.59% : 0.000528s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000505s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.29% : 0.001162s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.16% : 0.005535s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000245s : 1: opt_b 8.22% : 0.007387s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.02% : 0.000015s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.30% : 0.000265s : 1: renormalize.infer 0.22% : 0.000202s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000145s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 77.01% : 0.069208s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.95% : 0.002653s : 1: type_inference 0.08% : 0.000071s : 1: validate Time group info: ------[substitution.] 0.000125 63 3.86% : 0.000005s : 2: substitution.depend_value_elim 1.84% : 0.000002s : 5: substitution.elim_not_effective 1.61% : 0.000002s : 5: substitution.fold_const_symbol 5.43% : 0.000007s : 6: substitution.graph_param_transform 50.94% : 0.000063s : 1: substitution.inline 4.10% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.42% : 0.000004s : 6: substitution.load_eliminater 2.67% : 0.000003s : 2: substitution.reduce_all_const_elim 5.84% : 0.000007s : 10: substitution.remove_not_recompute_node 2.25% : 0.000003s : 2: substitution.replace_old_param 9.35% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.71% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002657 2 83.20% : 0.002210s : 1: type_inference.infer 16.80% : 0.000446s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000062 1 100.00% : 0.000062s : 1: match.inline ------[predicate.] 0.000232 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.25% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.76% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.28% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.12% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.83% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.08% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.89% : 0.000004s : 31: predicate.environ_get_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.25% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.70% : 0.000002s : 12: predicate.incorporate_call_switch 5.68% : 0.000013s : 63: predicate.inline 1.08% : 0.000003s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.73% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.33% : 0.000005s : 38: predicate.load_eliminater 1.46% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.29% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.79% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.69% : 0.000002s : 13: predicate.minmaximum_grad 0.84% : 0.000002s : 6: predicate.mutable_eliminate 0.48% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.17% : 0.000003s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.98% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.06% : 0.000002s : 13: predicate.reduce_eliminate 0.53% : 0.000001s : 12: predicate.remove_not_recompute_node 1.09% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.92% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.12% : 0.000003s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.47% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.55% : 0.000011s : 43: predicate.switch_simplify 0.74% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.74% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.83% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.59% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.31% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.56% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.56% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000152 4 11.26% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 88.74% : 0.000135s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089777 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.12% : 0.000106s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000347s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000018s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.62% : 0.000552s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000014s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000504s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001108s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.18% : 0.000159s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000034s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 5.93% : 0.005320s : 1: opt_a 0.16% : 0.000142s : 1: opt_after_cconv 0.29% : 0.000258s : 1: opt_b 8.05% : 0.007231s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000075s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.27% : 0.000245s : 1: renormalize.infer 0.22% : 0.000196s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000132s : 1: rewriter_after_opt_a 0.04% : 0.000033s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000096s : 1: symbol_engine_optimizer 77.49% : 0.069572s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 3.01% : 0.002702s : 1: type_inference 0.07% : 0.000065s : 1: validate TotalTime = 0.0811559, [21] [bootstrap]: 0.00031931 [type_inference]: 0.0025709 [auto_monad]: 0.00013921 [graph_reusing]: 2.73995e-06 [inline]: 1.3801e-06 [parallel-infer-symbol]: 2.09e-06 [pre_auto_parallel]: 2.684e-05 [insert-virtual-dataset]: 2.79001e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 1.51002e-06 [pipeline_split]: 1.94006e-06 [optimize]: 0.00727952, [52] [py_interpret_to_execute]: 1.48399e-05 [rewriter_before_opt_a]: 3.586e-05 [opt_a]: 0.00534761, [2] [Cycle 1]: 0.00155933, [43] [expand_dump_flag]: 3.80003e-06 [switch_simplify]: 2.891e-05 [loop_unroll]: 1.299e-05 [a_1]: 0.00034252 [recompute_prepare]: 8.9301e-06 [updatestate_depend_eliminate]: 8.34989e-06 [updatestate_assign_eliminate]: 5.78002e-06 [updatestate_loads_eliminate]: 6.78003e-06 [parameter_eliminate]: 3.75998e-06 [a_2]: 0.00014955 [accelerated_algorithm]: 8.92999e-06 [shard]: 2.10002e-06 [meta_shard_fg_expand]: 3.37011e-06 [shard_inline]: 8.68994e-06 [auto_parallel]: 1.19701e-05 [parallel]: 5.90994e-06 [flash_sp]: 1.007e-05 [merge_comm]: 7.82998e-06 [allreduce_fusion]: 5.92996e-06 [matmul_add_comm_reduction]: 1.125e-05 [allreduce_slice_to_reducescatter]: 5.10016e-07 [virtual_shard_identity]: 9.61998e-06 [virtual_dataset]: 8.19995e-06 [get_grad_eliminate_]: 7.78004e-06 [virtual_output]: 7.61007e-06 [merge_forward]: 5.81995e-06 [cell_reuse_recompute_pass]: 1.82993e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.69601e-05 [before_grad]: 1.39701e-05 [inplace_validation]: 5.41005e-06 [meta_fg_expand]: 5.37001e-06 [inplace_validation_after_expand]: 6.61006e-06 [flash_sp_send_recv_attached]: 4.51005e-06 [receive_attached]: 2.58e-06 [after_resolve]: 1.13701e-05 [a_after_grad]: 1.345e-05 [special_op_eliminate]: 8.20996e-06 [renormalize]: 0.00043141 [add_forward_monad_depend]: 3.70003e-06 [auto_monad_grad]: 1.97999e-06 [auto_monad_eliminator]: 3.41301e-05 [cse]: 3.434e-05 [a_3]: 5.76101e-05 [Cycle 2]: 0.00078004, [43] [expand_dump_flag]: 1.04005e-06 [switch_simplify]: 9.41998e-06 [loop_unroll]: 7.77002e-06 [a_1]: 0.00020132 [recompute_prepare]: 7.40995e-06 [updatestate_depend_eliminate]: 6.10005e-06 [updatestate_assign_eliminate]: 4.80004e-06 [updatestate_loads_eliminate]: 5.66e-06 [parameter_eliminate]: 1.20001e-06 [a_2]: 0.00010784 [accelerated_algorithm]: 8.35001e-06 [shard]: 1.05007e-06 [meta_shard_fg_expand]: 2.66999e-06 [shard_inline]: 8.15e-06 [auto_parallel]: 1.035e-05 [parallel]: 3.15998e-06 [flash_sp]: 4.04997e-06 [merge_comm]: 5.94999e-06 [allreduce_fusion]: 4.98001e-06 [matmul_add_comm_reduction]: 8.05e-06 [allreduce_slice_to_reducescatter]: 2.69967e-07 [virtual_shard_identity]: 8.71997e-06 [virtual_dataset]: 7.86991e-06 [get_grad_eliminate_]: 7.21996e-06 [virtual_output]: 7.19994e-06 [merge_forward]: 4.91005e-06 [cell_reuse_recompute_pass]: 2.03995e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50501e-05 [before_grad]: 1.278e-05 [inplace_validation]: 4.21004e-06 [meta_fg_expand]: 4.75999e-06 [inplace_validation_after_expand]: 5.80994e-06 [flash_sp_send_recv_attached]: 7.10017e-07 [receive_attached]: 8.10018e-07 [after_resolve]: 9.67993e-06 [a_after_grad]: 1.172e-05 [special_op_eliminate]: 7.28993e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 8.5996e-07 [auto_monad_grad]: 1.07998e-06 [auto_monad_eliminator]: 1.71401e-05 [cse]: 1.925e-05 [a_3]: 4.956e-05 [py_interpret_to_execute_after_opt_a]: 7.99994e-06 [slice_cell_reuse_recomputed_activation]: 2.25008e-06 [rewriter_after_opt_a]: 0.00024716 [convert_after_rewriter]: 9.30007e-06 [order_py_execute_after_rewriter]: 5.21995e-06 [opt_b]: 0.00024974, [1] [Cycle 1]: 0.00024286, [7] [b_1]: 0.00016453 [b_2]: 1.031e-05 [updatestate_depend_eliminate]: 5.59003e-06 [updatestate_assign_eliminate]: 4.95999e-06 [updatestate_loads_eliminate]: 5.52996e-06 [renormalize]: 2.5006e-07 [cse]: 1.958e-05 [optimize_parallel_all_gather_comm]: 8.21997e-06 [overlap_param_gather]: 9.39937e-07 [cconv]: 1.46499e-05 [loop_unroll]: 0.00047618 [opt_after_cconv]: 0.00013466, [1] [Cycle 1]: 0.00012873, [7] [c_1]: 5.315e-05 [parameter_eliminate]: 2.31003e-06 [updatestate_depend_eliminate]: 8.27992e-06 [updatestate_assign_eliminate]: 4.87e-06 [updatestate_loads_eliminate]: 5.91006e-06 [cse]: 2.177e-05 [renormalize]: 3.69968e-07 [remove_dup_value]: 1.44599e-05 [tuple_transform]: 6.96999e-05, [1] [Cycle 1]: 6.51899e-05, [2] [d_1]: 5.573e-05 [renormalize]: 2.10013e-07 [partial_unused_args_eliminate]: 2.04996e-06 [add_cache_embedding]: 1.32599e-05 [add_recomputation]: 6.298e-05 [cse_after_recomputation]: 2.66101e-05, [1] [Cycle 1]: 2.23799e-05, [1] [cse]: 1.752e-05 [environ_conv]: 7.41996e-06 [swap_dp_allreduce_reducescatter]: 7.22997e-06 [bias_add_comm_swap]: 2.24996e-06 [label_micro_interleaved_index]: 1.65007e-06 [label_fine_grained_interleaved_index]: 1.97999e-06 [merge_cast_opt]: 1.11002e-06 [slice_recompute_activation]: 2.21003e-06 [micro_interleaved_order_control]: 1.55007e-06 [assign_add_opt]: 2.95501e-05 [ForceFp32Comm]: 5.30039e-07 [remove_cast_before_assign_add]: 6.73998e-06 [full_micro_interleaved_order_control]: 2.07999e-06 [reorder_send_recv_between_fp_bp]: 2.15007e-06 [comm_op_add_attrs]: 2.832e-05 [add_comm_op_reuse_tag]: 1.93005e-06 [interleave_split_concat_branches]: 9.39937e-07 [interleave_parallel_branches]: 8.19913e-07 [overlap_opt_shard_in_pipeline]: 1.15007e-06 [overlap_opt_shard_grad_in_pipeline]: 2.84996e-06 [control_data_broadcast_order]: 8.69972e-07 [grouped_pairwise_exchange_alltoall]: 9.61998e-06 [offloading_packed_experts]: 2.30002e-06 [overlap_recompute_and_grad_model_parallel]: 2.13005e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.01002e-06 [overlap_recompute_allgather_and_fa_grad]: 6.88401e-05 [overlap_grad_ring_attention]: 2.05997e-06 [overlap_grad_flash_sp]: 1.557e-05 [begin_end_overlap_inline]: 7.20029e-07 [split_matmul_comm_elemetwise]: 2.2701e-06 [split_layernorm_comm]: 2.13995e-06 [handle_group_info]: 5.87991e-06 [symbol_engine_optimizer]: 8.82599e-05, [1] [Cycle 1]: 8.342e-05, [6] [build]: 4.57e-06 [elim_shapecalc]: 1.284e-05 [elim_not_effective]: 1.64399e-05 [opt_reshape]: 8.47992e-06 [fold_const_symbol]: 1.43501e-05 [renormalize]: 1.70083e-07 [pipeline_parallel_scheduler]: 1.70001e-06 [auto_monad_reorder]: 3.325e-05 [get_jit_bprop_graph]: 4.7998e-07 [rewriter_after_jit_bprop_graph]: 4.69969e-07 [eliminate_special_op_node]: 0.00049718 [distribtued_split]: 4.192e-05 [validate]: 3.58199e-05 [task_emit]: 0.0699424 [execute]: 1.15599e-05 Sums bootstrap : 0.000319s : 0.41% type_inference : 0.002571s : 3.33% auto_monad : 0.000139s : 0.18% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000038s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000544s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000257s : 0.33% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000431s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000247s : 0.32% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000476s : 0.62% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000069s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000497s : 0.64% distribtued_split : 0.000042s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.069942s : 90.66% execute : 0.000012s : 0.01% TotalTime = 0.081362, [21] [bootstrap]: 0.00029796 [type_inference]: 0.0024481 [auto_monad]: 0.00019187 [graph_reusing]: 2.50991e-06 [inline]: 1.45007e-06 [parallel-infer-symbol]: 2.32004e-06 [pre_auto_parallel]: 2.613e-05 [insert-virtual-dataset]: 2.56998e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 1.33005e-06 [pipeline_split]: 1.53005e-06 [optimize]: 0.00718944, [52] [py_interpret_to_execute]: 1.60299e-05 [rewriter_before_opt_a]: 3.392e-05 [opt_a]: 0.00534805, [2] [Cycle 1]: 0.00156142, [43] [expand_dump_flag]: 3.35998e-06 [switch_simplify]: 3.002e-05 [loop_unroll]: 1.33e-05 [a_1]: 0.00033967 [recompute_prepare]: 8.52998e-06 [updatestate_depend_eliminate]: 9.09006e-06 [updatestate_assign_eliminate]: 5.6799e-06 [updatestate_loads_eliminate]: 7.07002e-06 [parameter_eliminate]: 3.14997e-06 [a_2]: 0.00011541 [accelerated_algorithm]: 8.29005e-06 [shard]: 1.94006e-06 [meta_shard_fg_expand]: 3.40003e-06 [shard_inline]: 8.50996e-06 [auto_parallel]: 1.164e-05 [parallel]: 7.07002e-06 [flash_sp]: 9.45001e-06 [merge_comm]: 7.37002e-06 [allreduce_fusion]: 5.15999e-06 [matmul_add_comm_reduction]: 1.144e-05 [allreduce_slice_to_reducescatter]: 4.50062e-07 [virtual_shard_identity]: 9.40997e-06 [virtual_dataset]: 8.33999e-06 [get_grad_eliminate_]: 7.86001e-06 [virtual_output]: 7.49005e-06 [merge_forward]: 5.98992e-06 [cell_reuse_recompute_pass]: 1.71002e-06 [cell_reuse_handle_not_recompute_node_pass]: 4.12901e-05 [before_grad]: 1.528e-05 [inplace_validation]: 5.17e-06 [meta_fg_expand]: 5.33997e-06 [inplace_validation_after_expand]: 5.84999e-06 [flash_sp_send_recv_attached]: 4.38001e-06 [receive_attached]: 2.76999e-06 [after_resolve]: 1.18701e-05 [a_after_grad]: 1.298e-05 [special_op_eliminate]: 8.08993e-06 [renormalize]: 0.00044786 [add_forward_monad_depend]: 3.61004e-06 [auto_monad_grad]: 1.89e-06 [auto_monad_eliminator]: 3.118e-05 [cse]: 3.06701e-05 [a_3]: 5.74e-05 [Cycle 2]: 0.00085266, [43] [expand_dump_flag]: 1.13994e-06 [switch_simplify]: 9.54e-06 [loop_unroll]: 7.88993e-06 [a_1]: 0.00020255 [recompute_prepare]: 7.29994e-06 [updatestate_depend_eliminate]: 6.02996e-06 [updatestate_assign_eliminate]: 4.61994e-06 [updatestate_loads_eliminate]: 5.46e-06 [parameter_eliminate]: 1.31002e-06 [a_2]: 0.00010584 [accelerated_algorithm]: 8.33999e-06 [shard]: 1.31992e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 8.01007e-06 [auto_parallel]: 1.14701e-05 [parallel]: 4.02995e-06 [flash_sp]: 3.10992e-06 [merge_comm]: 6.24999e-06 [allreduce_fusion]: 5.04998e-06 [matmul_add_comm_reduction]: 8.48994e-06 [allreduce_slice_to_reducescatter]: 3.30037e-07 [virtual_shard_identity]: 1.19e-05 [virtual_dataset]: 8.09005e-06 [get_grad_eliminate_]: 7.52008e-06 [virtual_output]: 7.40995e-06 [merge_forward]: 6.12007e-06 [cell_reuse_recompute_pass]: 2.62994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.606e-05 [before_grad]: 1.281e-05 [inplace_validation]: 4.44008e-06 [meta_fg_expand]: 5.04008e-06 [inplace_validation_after_expand]: 5.4799e-06 [flash_sp_send_recv_attached]: 8.79983e-07 [receive_attached]: 8.50065e-07 [after_resolve]: 1.026e-05 [a_after_grad]: 1.17699e-05 [special_op_eliminate]: 7.70006e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 1.2801e-06 [auto_monad_grad]: 1.16997e-06 [auto_monad_eliminator]: 2.057e-05 [cse]: 2.033e-05 [a_3]: 4.93e-05 [py_interpret_to_execute_after_opt_a]: 9.69996e-06 [slice_cell_reuse_recomputed_activation]: 2.33995e-06 [rewriter_after_opt_a]: 0.0001474 [convert_after_rewriter]: 1.16e-05 [order_py_execute_after_rewriter]: 6.18002e-06 [opt_b]: 0.00024454, [1] [Cycle 1]: 0.00023867, [7] [b_1]: 0.00016275 [b_2]: 9.69006e-06 [updatestate_depend_eliminate]: 5.17e-06 [updatestate_assign_eliminate]: 4.75999e-06 [updatestate_loads_eliminate]: 5.38991e-06 [renormalize]: 2.89991e-07 [cse]: 1.877e-05 [optimize_parallel_all_gather_comm]: 8.95991e-06 [overlap_param_gather]: 9.60077e-07 [cconv]: 2.283e-05 [loop_unroll]: 0.00048835 [opt_after_cconv]: 0.00013484, [1] [Cycle 1]: 0.00012879, [7] [c_1]: 5.26799e-05 [parameter_eliminate]: 2.37999e-06 [updatestate_depend_eliminate]: 8.49995e-06 [updatestate_assign_eliminate]: 5.13007e-06 [updatestate_loads_eliminate]: 5.52007e-06 [cse]: 2.122e-05 [renormalize]: 4.10015e-07 [remove_dup_value]: 1.225e-05 [tuple_transform]: 6.892e-05, [1] [Cycle 1]: 6.49501e-05, [2] [d_1]: 5.57201e-05 [renormalize]: 1.60071e-07 [partial_unused_args_eliminate]: 2.13005e-06 [add_cache_embedding]: 1.35299e-05 [add_recomputation]: 5.97701e-05 [cse_after_recomputation]: 2.617e-05, [1] [Cycle 1]: 2.177e-05, [1] [cse]: 1.7e-05 [environ_conv]: 7.27002e-06 [swap_dp_allreduce_reducescatter]: 6.98993e-06 [bias_add_comm_swap]: 2.12004e-06 [label_micro_interleaved_index]: 1.91003e-06 [label_fine_grained_interleaved_index]: 1.93994e-06 [merge_cast_opt]: 9.89996e-07 [slice_recompute_activation]: 1.9701e-06 [micro_interleaved_order_control]: 1.89e-06 [assign_add_opt]: 2.91599e-05 [ForceFp32Comm]: 8.2003e-07 [remove_cast_before_assign_add]: 7.22008e-06 [full_micro_interleaved_order_control]: 1.93994e-06 [reorder_send_recv_between_fp_bp]: 1.91003e-06 [comm_op_add_attrs]: 2.617e-05 [add_comm_op_reuse_tag]: 1.64006e-06 [interleave_split_concat_branches]: 8.00006e-07 [interleave_parallel_branches]: 6.40051e-07 [overlap_opt_shard_in_pipeline]: 9.59961e-07 [overlap_opt_shard_grad_in_pipeline]: 2.46998e-06 [control_data_broadcast_order]: 9.49949e-07 [grouped_pairwise_exchange_alltoall]: 9.49996e-06 [offloading_packed_experts]: 2.04996e-06 [overlap_recompute_and_grad_model_parallel]: 1.50001e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.10019e-07 [overlap_recompute_allgather_and_fa_grad]: 7.244e-05 [overlap_grad_ring_attention]: 1.82993e-06 [overlap_grad_flash_sp]: 1.36599e-05 [begin_end_overlap_inline]: 8.69972e-07 [split_matmul_comm_elemetwise]: 1.9999e-06 [split_layernorm_comm]: 1.84006e-06 [handle_group_info]: 4.58001e-06 [symbol_engine_optimizer]: 8.89499e-05, [1] [Cycle 1]: 8.41201e-05, [6] [build]: 4.30993e-06 [elim_shapecalc]: 1.27601e-05 [elim_not_effective]: 1.73e-05 [opt_reshape]: 8.66002e-06 [fold_const_symbol]: 1.365e-05 [renormalize]: 3.10014e-07 [pipeline_parallel_scheduler]: 1.42003e-06 [auto_monad_reorder]: 3.13e-05 [get_jit_bprop_graph]: 3.89991e-07 [rewriter_after_jit_bprop_graph]: 4.1991e-07 [eliminate_special_op_node]: 0.00050447 [distribtued_split]: 5.17899e-05 [validate]: 3.381e-05 [task_emit]: 0.0703154 [execute]: 1.07201e-05 Sums bootstrap : 0.000298s : 0.39% type_inference : 0.002448s : 3.16% auto_monad : 0.000192s : 0.25% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000542s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000221s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000021s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000012s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000057s : 0.07% optimize.opt_a.before_grad : 0.000028s : 0.04% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000448s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.07% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000147s : 0.19% optimize.convert_after_rewriter : 0.000012s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000488s : 0.63% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000026s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000504s : 0.65% distribtued_split : 0.000052s : 0.07% validate : 0.000034s : 0.04% task_emit : 0.070315s : 90.89% execute : 0.000011s : 0.01% Time group info: ------[substitution.] 0.000128 63 5.18% : 0.000007s : 2: substitution.depend_value_elim 2.01% : 0.000003s : 5: substitution.elim_not_effective 2.30% : 0.000003s : 5: substitution.fold_const_symbol 5.78% : 0.000007s : 6: substitution.graph_param_transform 49.95% : 0.000064s : 1: substitution.inline 4.06% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.12% : 0.000004s : 6: substitution.load_eliminater 2.37% : 0.000003s : 2: substitution.reduce_all_const_elim 6.06% : 0.000008s : 10: substitution.remove_not_recompute_node 2.32% : 0.000003s : 2: substitution.replace_old_param 9.06% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.80% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002540 2 87.00% : 0.002210s : 1: type_inference.infer 13.00% : 0.000330s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000231 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 1.09% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.27% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.80% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.39% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.77% : 0.000002s : 12: predicate.depend_value_elim 0.78% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.84% : 0.000002s : 13: predicate.dict_get_item_eliminator 1.01% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.32% : 0.000001s : 6: predicate.elim_not_effective 0.59% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.22% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.26% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 6.01% : 0.000014s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.01% : 0.000002s : 12: predicate.less_batch_normalization 1.65% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.36% : 0.000005s : 38: predicate.load_eliminater 1.22% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.12% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.88% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicat TotalTime = 0.0815894, [21] [bootstrap]: 0.00030413 [type_inference]: 0.00249019 [auto_monad]: 0.00012939 [graph_reusing]: 2.33005e-06 [inline]: 1.39e-06 [parallel-infer-symbol]: 2.31992e-06 [pre_auto_parallel]: 2.545e-05 [insert-virtual-dataset]: 2.71993e-06 [parallel-infer-symbol-second]: 4.59957e-07 [dataset_repeat_opt]: 1.46998e-06 [pipeline_split]: 1.35996e-06 [optimize]: 0.00725964, [52] [py_interpret_to_execute]: 1.592e-05 [rewriter_before_opt_a]: 3.53401e-05 [opt_a]: 0.00527262, [2] [Cycle 1]: 0.00155489, [43] [expand_dump_flag]: 2.86999e-06 [switch_simplify]: 2.981e-05 [loop_unroll]: 1.315e-05 [a_1]: 0.00034685 [recompute_prepare]: 8.47003e-06 [updatestate_depend_eliminate]: 8.40996e-06 [updatestate_assign_eliminate]: 6.07001e-06 [updatestate_loads_eliminate]: 7.51996e-06 [parameter_eliminate]: 3.66999e-06 [a_2]: 0.0001195 [accelerated_algorithm]: 8.27992e-06 [shard]: 2.33005e-06 [meta_shard_fg_expand]: 3.28e-06 [shard_inline]: 8.43999e-06 [auto_parallel]: 1.174e-05 [parallel]: 7.32997e-06 [flash_sp]: 1.11799e-05 [merge_comm]: 7.57002e-06 [allreduce_fusion]: 6.70995e-06 [matmul_add_comm_reduction]: 1.102e-05 [allreduce_slice_to_reducescatter]: 9.79984e-07 [virtual_shard_identity]: 9.86003e-06 [virtual_dataset]: 8.02998e-06 [get_grad_eliminate_]: 7.95e-06 [virtual_output]: 7.53999e-06 [merge_forward]: 6.12997e-06 [cell_reuse_recompute_pass]: 1.66998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.665e-05 [before_grad]: 1.37701e-05 [inplace_validation]: 4.76011e-06 [meta_fg_expand]: 5.32996e-06 [inplace_validation_after_expand]: 6.8699e-06 [flash_sp_send_recv_attached]: 5.02006e-06 [receive_attached]: 2.79001e-06 [after_resolve]: 1.19599e-05 [a_after_grad]: 1.263e-05 [special_op_eliminate]: 8.30996e-06 [renormalize]: 0.00043566 [add_forward_monad_depend]: 3.98001e-06 [auto_monad_grad]: 1.90001e-06 [auto_monad_eliminator]: 3.37601e-05 [cse]: 3.395e-05 [a_3]: 5.796e-05 [Cycle 2]: 0.00078742, [43] [expand_dump_flag]: 1.07009e-06 [switch_simplify]: 8.96992e-06 [loop_unroll]: 7.67002e-06 [a_1]: 0.00020578 [recompute_prepare]: 7.47002e-06 [updatestate_depend_eliminate]: 5.99993e-06 [updatestate_assign_eliminate]: 4.67e-06 [updatestate_loads_eliminate]: 5.86e-06 [parameter_eliminate]: 1.24006e-06 [a_2]: 0.00010609 [accelerated_algorithm]: 8.16002e-06 [shard]: 1.25996e-06 [meta_shard_fg_expand]: 2.78e-06 [shard_inline]: 7.78004e-06 [auto_parallel]: 1.11e-05 [parallel]: 3.75998e-06 [flash_sp]: 3.49001e-06 [merge_comm]: 6.42997e-06 [allreduce_fusion]: 5.18991e-06 [matmul_add_comm_reduction]: 8.37003e-06 [allreduce_slice_to_reducescatter]: 2.80095e-07 [virtual_shard_identity]: 8.82999e-06 [virtual_dataset]: 7.80006e-06 [get_grad_eliminate_]: 7.46001e-06 [virtual_output]: 6.99004e-06 [merge_forward]: 4.80004e-06 [cell_reuse_recompute_pass]: 1.91003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.569e-05 [before_grad]: 1.272e-05 [inplace_validation]: 4.50003e-06 [meta_fg_expand]: 4.84008e-06 [inplace_validation_after_expand]: 5.11995e-06 [flash_sp_send_recv_attached]: 7.89994e-07 [receive_attached]: 7.49948e-07 [after_resolve]: 9.81009e-06 [a_after_grad]: 1.19901e.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.60% : 0.000001s : 6: predicate.parallel_virtual_node 1.10% : 0.000003s : 14: predicate.partial_defer_inline 1.32% : 0.000003s : 19: predicate.partial_eliminate 0.84% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.07% : 0.000002s : 13: predicate.reduce_eliminate 0.63% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.34% : 0.000001s : 6: predicate.reset_defer_inline 0.94% : 0.000002s : 13: predicate.reshape_eliminate 0.86% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.52% : 0.000001s : 6: predicate.row_tensor_eliminate 0.96% : 0.000002s : 12: predicate.same_eliminate 0.45% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.00% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.25% : 0.000005s : 38: predicate.stopgrad_eliminater 0.47% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.73% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.21% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.75% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.50% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.78% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.80% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.46% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.69% : 0.000002s : 6: predicate.value_based_eliminate 0.79% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.82% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000142 4 6.54% : 0.000009s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.46% : 0.000133s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090251 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.17% : 0.000152s : 1: auto_monad 0.04% : 0.000039s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.39% : 0.000348s : 1: bootstrap 0.02% : 0.000018s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.00005e-05 [special_op_eliminate]: 7.51007e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 1.00001e-06 [auto_monad_grad]: 1.10001e-06 [auto_monad_eliminator]: 1.76199e-05 [cse]: 2.05101e-05 [a_3]: 4.935e-05 [py_interpret_to_execute_after_opt_a]: 9.71998e-06 [slice_cell_reuse_recomputed_activation]: 2.32994e-06 [rewriter_after_opt_a]: 0.00014653 [convert_after_rewriter]: 9.27004e-06 [order_py_execute_after_rewriter]: 5.99003e-06 [opt_b]: 0.00024367, [1] [Cycle 1]: 0.00023839, [7] [b_1]: 0.00016347 [b_2]: 9.87994e-06 [updatestate_depend_eliminate]: 5.29003e-06 [updatestate_assign_eliminate]: 4.42995e-06 [updatestate_loads_eliminate]: 5.13997e-06 [renormalize]: 3.7998e-07 [cse]: 1.979e-05 [optimize_parallel_all_gather_comm]: 8.25e-06 [overlap_param_gather]: 1.32003e-06 [cconv]: 2.39999e-05 [loop_unroll]: 0.0006418 [opt_after_cconv]: 0.00013637, [1] [Cycle 1]: 0.00013036, [7] [c_1]: 5.404e-05 [parameter_eliminate]: 2.5701e-06 [updatestate_depend_eliminate]: 8.64e-06 [updatestate_assign_eliminate]: 4.51994e-06 [updatestate_loads_eliminate]: 5.56e-06 [cse]: 2.285e-05 [renormalize]: 5.09899e-07 [remove_dup_value]: 1.32601e-05 [tuple_transform]: 7.1e-05, [1] [Cycle 1]: 6.678e-05, [2] [d_1]: 5.73e-05 [renormalize]: 2.20025e-07 [partial_unused_args_eliminate]: 1.93005e-06 [add_cache_embedding]: 1.343e-05 [add_recomputation]: 6.286e-05 [cse_after_recomputation]: 2.71599e-05, [1] [Cycle 1]: 2.286e-05, [1] [cse]: 1.782e-05 [environ_conv]: 7.61996e-06 [swap_dp_allreduce_reducescatter]: 7.99994e-06 [bias_add_comm_swap]: 2.44996e-06 [label_micro_interleaved_index]: 1.64006e-06 [label_fine_grained_interleaved_index]: 2.09e-06 [merge_cast_opt]: 9.50065e-07 [slice_recompute_activation]: 1.55007e-06 [micro_interleaved_order_control]: 1.57999e-06 [assign_add_opt]: 2.79599e-05 [ForceFp32Comm]: 7.3004e-07 [remove_cast_before_assign_add]: 6.98003e-06 [full_micro_interleaved_order_control]: 1.89e-06 [reorder_send_recv_between_fp_bp]: 1.71002e-06 [comm_op_add_attrs]: 2.343e-05 [add_comm_op_reuse_tag]: 1.82993e-06 [interleave_split_concat_branches]: 5.69969e-07 [interleave_parallel_branches]: 7.10017e-07 [overlap_opt_shard_in_pipeline]: 8.2003e-07 [overlap_opt_shard_grad_in_pipeline]: 1.72004e-06 [control_data_broadcast_order]: 8.79983e-07 [grouped_pairwise_exchange_alltoall]: 7.89005e-06 [offloading_packed_experts]: 1.56008e-06 [overlap_recompute_and_grad_model_parallel]: 1.56008e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.60075e-07 [overlap_recompute_allgather_and_fa_grad]: 6.787e-05 [overlap_grad_ring_attention]: 1.82993e-06 [overlap_grad_flash_sp]: 1.4e-05 [begin_end_overlap_inline]: 5.69969e-07 [split_matmul_comm_elemetwise]: 1.72993e-06 [split_layernorm_comm]: 1.44006e-06 [handle_group_info]: 4.02005e-06 [symbol_engine_optimizer]: 8.961e-05, [1] [Cycle 1]: 8.50599e-05, [6] [build]: 4.99003e-06 [elim_shapecalc]: 1.322e-05 [elim_not_effective]: 1.71601e-05 [opt_reshape]: 8.71997e-06 [fold_const_symbol]: 1.35701e-05 [renormalize]: 2.69967e-07 [pipeline_parallel_scheduler]: 1.21992e-06 [auto_monad_reorder]: 2.749e-05 [get_jit_bprop_graph]: 3.60073e-07 [rewriter_after_jit_bprop_graph]: 3.7998e-07 [eliminate_special_op_node]: 0.00051569 [distribtued_split]: 3.69101e-05 [validate]: 3.398e-05 [task_emit]: 0.0704347 [execute]: 1.003e-05 Sums bootstrap : 0.000304s : 0.39% type_inference : 0.002490s : 3.21% auto_monad : 0.000129s : 0.17% graph_reusing 0s : 1: distribtued_split 0.57% : 0.000511s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000009s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000485s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001105s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.93% : 0.005351s : 1: opt_a 0.15% : 0.000139s : 1: opt_after_cconv 0.28% : 0.000253s : 1: opt_b 8.07% : 0.007287s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.26% : 0.000234s : 1: renormalize.infer 0.21% : 0.000192s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.28% : 0.000255s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 77.53% : 0.069971s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.87% : 0.002589s : 1: type_inference 0.08% : 0.000071s : 1: validate : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000553s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000226s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000012s : 0.02% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000436s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000147s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0 Time group info: ------[substitution.] 0.000131 63 4.95% : 0.000007s : 2: substitution.depend_value_elim 1.78% : 0.000002s : 5: substitution.elim_not_effective 1.82% : 0.000002s : 5: substitution.fold_const_symbol 5.17% : 0.000007s : 6: substitution.graph_param_transform 50.79% : 0.000067s : 1: substitution.inline 4.41% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.32% : 0.000004s : 6: substitution.load_eliminater 2.41% : 0.000003s : 2: substitution.reduce_all_const_elim 5.93% : 0.000008s : 10: substitution.remove_not_recompute_node 2.65% : 0.000003s : 2: substitution.replace_old_param 8.76% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.00% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002419 2 89.02% : 0.002153s : 1: type_inference.infer 10.98% : 0.000266s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000066 1 100.00% : 0.000066s : 1: match.inline ------[predicate.] 0.000230 1420 0.88% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.09% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.71% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.41% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.29% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.84% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.61% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.26% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 2.01% : 0.000005s : 31: predicate.environ_get_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.55% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.93% : 0.000002s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.40% : 0.000006s : 38: predicate.load_eliminater 1.29% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.28% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.72% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicat.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000642s : 0.83% optimize.opt_after_cconv.c_1 : 0.000054s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000009s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000057s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000023s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000008s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000068s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000004s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_enginee.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.44% : 0.000001s : 6: predicate.parallel_virtual_node 1.10% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.04% : 0.000002s : 13: predicate.reduce_eliminate 0.70% : 0.000002s : 12: predicate.remove_not_recompute_node 1.07% : 0.000002s : 25: predicate.replace_applicator 0.45% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.82% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.09% : 0.000003s : 12: predicate.shard_identity_eliminate 1.38% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.16% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.04% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.31% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.73% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.68% : 0.000011s : 43: predicate.switch_simplify 0.75% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.86% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.79% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.78% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.76% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.34% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.61% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000152 4 10.27% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.73% : 0.000137s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090407 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.23% : 0.000206s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.36% : 0.000323s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000030s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000016s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.07% : 0.00006_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000027s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000516s : 0.66% distribtued_split : 0.000037s : 0.05% validate : 0.000034s : 0.04% task_emit : 0.070435s : 90.78% execute : 0.000010s : 0.01% 1s : 1: distribtued_split 0.57% : 0.000518s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000498s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.25% : 0.001128s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.92% : 0.005352s : 1: opt_a 0.15% : 0.000139s : 1: opt_after_cconv 0.27% : 0.000248s : 1: opt_b 7.96% : 0.007198s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.09% : 0.000078s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.27% : 0.000240s : 1: renormalize.infer 0.22% : 0.000202s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000153s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 77.81% : 0.070344s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.73% : 0.002466s : 1: type_inference 0.08% : 0.000069s : 1: validate Time group info: ------[substitution.] 0.000132 63 4.88% : 0.000006s : 2: substitution.depend_value_elim 1.78% : 0.000002s : 5: substitution.elim_not_effective 1.80% : 0.000002s : 5: substitution.fold_const_symbol 5.49% : 0.000007s : 6: substitution.graph_param_transform 50.25% : 0.000066s : 1: substitution.inline 3.93% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.24% : 0.000004s : 6: substitution.load_eliminater 2.74% : 0.000004s : 2: substitution.reduce_all_const_elim 5.86% : 0.000008s : 10: substitution.remove_not_recompute_node 2.70% : 0.000004s : 2: substitution.replace_old_param 9.02% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.30% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002459 2 89.24% : 0.002195s : 1: type_inference.infer 10.76% : 0.000265s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000230 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.75% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.21% : 0.000005s : 25: predicate.arithmetic_simplify 0.89% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.43% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.33% : 0.000001s : 6: predicate.elim_not_effective 0.54% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.09% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 1.95% : 0.000004s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.80% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.90% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.82% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.03% : 0.000002s : 12: predicate.less_batch_normalization 1.69% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.36% : 0.000005s : 38: predicate.load_eliminater 1.37% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.81% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.81% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.12% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.84% : 0.000002s : 13: predicate.print_const_string_wrapper 0.86% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000002s : 13: predicate.reduce_eliminate 0.59% : 0.000001s : 12: predicate.remove_not_recompute_node 1.13% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.47% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.35% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.63% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.27% : 0.000010s : 43: predicate.switch_simplify 0.83% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.82% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.37% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.51% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.52% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.50% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000149 4 11.33% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 88.67% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090682 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000142s : 1: auto_monad 0.04% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.36% : 0.000328s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.03% : 0.000028s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000045s : 1: distribtued_split 0.58% : 0.000530s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000011s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.72% : 0.000652s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001113s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 5.82% : 0.005276s : 1: opt_a 0.16% : 0.000141s : 1: opt_after_cconv 0.27% : 0.000247s : 1: opt_b 8.02% : 0.007268s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.08% : 0.000073s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000241s : 1: renormalize.infer 0.21% : 0.000189s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000152s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 77.70% : 0.070463s : 1: task_emit 0.08% : 0.000074s : 1: tuple_transform 2.85% : 0.002581s : 1: type_inference 0.07% : 0.000066s : 1: validate TotalTime = 0.0822184, [21] [bootstrap]: 0.00031036 [type_inference]: 0.00246422 [auto_monad]: 0.00010193 [graph_reusing]: 1.64006e-06 [inline]: 1.07998e-06 [parallel-infer-symbol]: 1.42993e-06 [pre_auto_parallel]: 2.109e-05 [insert-virtual-dataset]: 1.77999e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 9.00007e-07 [pipeline_split]: 1.09e-06 [optimize]: 0.00730961, [52] [py_interpret_to_execute]: 1.308e-05 [rewriter_before_opt_a]: 3.112e-05 [opt_a]: 0.00554067, [2] [Cycle 1]: 0.00157678, [43] [expand_dump_flag]: 1.84996e-06 [switch_simplify]: 2.52301e-05 [loop_unroll]: 1.29e-05 [a_1]: 0.00032927 [recompute_prepare]: 9.01998e-06 [updatestate_depend_eliminate]: 7.65e-06 [updatestate_assign_eliminate]: 5.84999e-06 [updatestate_loads_eliminate]: 6.11995e-06 [parameter_eliminate]: 2.10002e-06 [a_2]: 0.00015843 [accelerated_algorithm]: 9.85002e-06 [shard]: 1.60991e-06 [meta_shard_fg_expand]: 3.19001e-06 [shard_inline]: 9.31998e-06 [auto_parallel]: 1.139e-05 [parallel]: 5.96e-06 [flash_sp]: 7.90996e-06 [merge_comm]: 7.42998e-06 [allreduce_fusion]: 5.20993e-06 [matmul_add_comm_reduction]: 8.87003e-06 [allreduce_slice_to_reducescatter]: 3.39933e-07 [virtual_shard_identity]: 9.85002e-06 [virtual_dataset]: 8.47003e-06 [get_grad_eliminate_]: 8.82999e-06 [virtual_output]: 8.04011e-06 [merge_forward]: 6.09003e-06 [cell_reuse_recompute_pass]: 1.69e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.652e-05 [before_grad]: 1.601e-05 [inplace_validation]: 4.62995e-06 [meta_fg_expand]: 5.01005e-06 [inplace_validation_after_expand]: 5.34998e-06 [flash_sp_send_recv_attached]: 2.85008e-06 [receive_attached]: 1.73005e-06 [after_resolve]: 1.134e-05 [a_after_grad]: 1.29e-05 [special_op_eliminate]: 8.57993e-06 [renormalize]: 0.00048381 [add_forward_monad_depend]: 2.62004e-06 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 2.33799e-05 [cse]: 2.573e-05 [a_3]: 5.838e-05 [Cycle 2]: 0.00077971, [43] [expand_dump_flag]: 9.2003e-07 [switch_simplify]: 9.12999e-06 [loop_unroll]: 8.01007e-06 [a_1]: 0.00020443 [recompute_prepare]: 7.63999e-06 [updatestate_depend_eliminate]: 5.68992e-06 [updatestate_assign_eliminate]: 4.70993e-06 [updatestate_loads_eliminate]: 4.98001e-06 [parameter_eliminate]: 1.12003e-06 [a_2]: 0.00010573 [accelerated_algorithm]: 8.72009e-06 [shard]: 9.50065e-07 [meta_shard_fg_expand]: 2.63995e-06 [shard_inline]: 7.86001e-06 [auto_parallel]: 1.011e-05 [parallel]: 2.99001e-06 [flash_sp]: 2.41003e-06 [merge_comm]: 5.97001e-06 [allreduce_fusion]: 4.89992e-06 [matmul_add_comm_reduction]: 7.47002e-06 [allreduce_slice_to_reducescatter]: 2.60072e-07 [virtual_shard_identity]: 8.51008e-06 [virtual_dataset]: 7.98993e-06 [get_grad_eliminate_]: 7.35e-06 [virtual_output]: 7.27002e-06 [merge_forward]: 4.55999e-06 [cell_reuse_recompute_pass]: 1.83005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.527e-05 [before_grad]: 1.30499e-05 [inplace_validation]: 4.20003e-06 [meta_fg_expand]: 4.78001e-06 [inplace_validation_after_expand]: 4.92996e-06 [flash_sp_send_recv_attached]: 8.10018e-07 [receive_attached]: 6.39935e-07 [after_resolve]: 1.007e-05 [a_after_grad]: 1.19e-05 [special_op_eliminate]: 7.61007e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 7.29924e-07 [auto_monad_grad]: 8.60076e-07 [auto_monad_eliminator]: 1.56e-05 [cse]: 1.88501e-05 [a_3]: 4.928e-05 [py_interpret_to_execute_after_opt_a]: 8.96002e-06 [slice_cell_reuse_recomputed_activation]: 1.54995e-06 [rewriter_after_opt_a]: 0.00012671 [convert_after_rewriter]: 7.37992e-06 [order_py_execute_after_rewriter]: 5.30994e-06 [opt_b]: 0.00024395, [1] [Cycle 1]: 0.00023913, [7] [b_1]: 0.00016489 [b_2]: 1.022e-05 [updatestate_depend_eliminate]: 5.03007e-06 [updatestate_assign_eliminate]: 4.37e-06 [updatestate_loads_eliminate]: 5.18991e-06 [renormalize]: 2.10013e-07 [cse]: 1.77099e-05 [optimize_parallel_all_gather_comm]: 8.59005e-06 [overlap_param_gather]: 1.13994e-06 [cconv]: 1.41701e-05 [loop_unroll]: 0.00048985 [opt_after_cconv]: 0.00013272, [1] [Cycle 1]: 0.00012692, [7] [c_1]: 5.233e-05 [parameter_eliminate]: 1.85007e-06 [updatestate_depend_eliminate]: 7.88004e-06 [updatestate_assign_eliminate]: 4.55009e-06 [updatestate_loads_eliminate]: 5.19003e-06 [cse]: 2.179e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 1.062e-05 [tuple_transform]: 6.849e-05, [1] [Cycle 1]: 6.44299e-05, [2] [d_1]: 5.495e-05 [renormalize]: 1.39931e-07 [partial_unused_args_eliminate]: 1.42003e-06 [add_cache_embedding]: 1.061e-05 [add_recomputation]: 5.245e-05 [cse_after_recomputation]: 2.793e-05, [1] [Cycle 1]: 2.351e-05, [1] [cse]: 1.841e-05 [environ_conv]: 6.04999e-06 [swap_dp_allreduce_reducescatter]: 6.94999e-06 [bias_add_comm_swap]: 1.36998e-06 [label_micro_interleaved_index]: 1.23994e-06 [label_fine_grained_interleaved_index]: 1.46998e-06 [merge_cast_opt]: 6.50063e-07 [slice_recompute_activation]: 9.10019e-07 [micro_interleaved_order_control]: 1.43005e-06 [assign_add_opt]: 2.43201e-05 [ForceFp32Comm]: 5.69969e-07 [remove_cast_before_assign_add]: 6.5401e-06 [full_micro_interleaved_order_control]: 1.15007e-06 [reorder_send_recv_between_fp_bp]: 1.05996e-06 [comm_op_add_attrs]: 2.56901e-05 [add_comm_op_reuse_tag]: 1.79e-06 [interleave_split_concat_branches]: 9.49949e-07 [interleave_parallel_branches]: 5.39934e-07 [overlap_opt_shard_in_pipeline]: 1.14006e-06 [overlap_opt_shard_grad_in_pipeline]: 1.44995e-06 [control_data_broadcast_order]: 6.40051e-07 [grouped_pairwise_exchange_alltoall]: 6.49004e-06 [offloading_packed_experts]: 1.34995e-06 [overlap_recompute_and_grad_model_parallel]: 1.07998e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.09899e-07 [overlap_recompute_allgather_and_fa_grad]: 6.656e-05 [overlap_grad_ring_attention]: 1.21002e-06 [overlap_grad_flash_sp]: 1.34599e-05 [begin_end_overlap_inline]: 4.50062e-07 [split_matmul_comm_elemetwise]: 1.31992e-06 [split_layernorm_comm]: 1.37999e-06 [handle_group_info]: 3.47989e-06 [symbol_engine_optimizer]: 9.11501e-05, [1] [Cycle 1]: 8.66899e-05, [6] [build]: 4.43007e-06 [elim_shapecalc]: 1.314e-05 [elim_not_effective]: 1.708e-05 [opt_reshape]: 9.72999e-06 [fold_const_symbol]: 1.413e-05 [renormalize]: 2.00002e-07 [pipeline_parallel_scheduler]: 1.01002e-06 [auto_monad_reorder]: 2.33999e-05 [get_jit_bprop_graph]: 3.49944e-07 [rewriter_after_jit_bprop_graph]: 3.10014e-07 [eliminate_special_op_node]: 0.00051781 [distribtued_split]: 3.594e-05 [validate]: 3.056e-05 [task_emit]: 0.0711543 [execute]: 9.38994e-06 Sums bootstrap : 0.000310s : 0.40% type_inference : 0.002464s : 3.16% auto_monad : 0.000102s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000013s : 0.02% optimize.rewriter_before_opt_a : 0.000031s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000534s : 0.68% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000264s : 0.34% optimize.opt_a.accelerated_algorithm : 0.000019s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000029s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.00% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000484s : 0.62% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000039s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000127s : 0.16% optimize.convert_after_rewriter : 0.000007s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000014s : 0.02% optimize.loop_unroll : 0.000490s : 0.63% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000024s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000026s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000067s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000518s : 0.66% distribtued_split : 0.000036s : 0.05% validate : 0.000031s : 0.04% task_emit : 0.071154s : 91.18% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000112 63 4.41% : 0.000005s : 2: substitution.depend_value_elim 2.05% : 0.000002s : 5: substitution.elim_not_effective 1.78% : 0.000002s : 5: substitution.fold_const_symbol 5.65% : 0.000006s : 6: substitution.graph_param_transform 48.08% : 0.000054s : 1: substitution.inline 5.22% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.55% : 0.000004s : 6: substitution.load_eliminater 2.29% : 0.000003s : 2: substitution.reduce_all_const_elim 6.66% : 0.000007s : 10: substitution.remove_not_recompute_node 2.34% : 0.000003s : 2: substitution.replace_old_param 9.72% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.26% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002437 2 89.43% : 0.002180s : 1: type_inference.infer 10.57% : 0.000258s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000053 1 100.00% : 0.000053s : 1: match.inline ------[predicate.] 0.000275 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.12% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.59% : 0.000002s : 12: predicate.addn_check_dump 0.92% : 0.000003s : 13: predicate.addn_zero_filter 0.68% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 1.95% : 0.000005s : 25: predicate.arithmetic_simplify 0.73% : 0.000002s : 13: predicate.cast_eliminate 0.79% : 0.000002s : 12: predicate.check_bprop_eliminate 0.65% : 0.000002s : 12: predicate.compare_switch_simplify 0.17% : 0.000000s : 6: predicate.const_output_eliminate 0.41% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.17% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.74% : 0.000002s : 12: predicate.depend_value_elim 0.69% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.75% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.74% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.23% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.02% : 0.000003s : 19: predicate.environ_add_const_eliminate 0.93% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.01% : 0.000003s : 19: predicate.environ_get_depend_swap 1.64% : 0.000005s : 31: predicate.environ_get_eliminate 1.01% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.67% : 0.000002s : 14: predicate.exchange_switch_depend_value 0.99% : 0.000003s : 14: predicate.float_depend_g_call 0.59% : 0.000002s : 12: predicate.float_environ_get_switch 0.90% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000001s : 6: predicate.fold_const_symbol 0.77% : 0.000002s : 12: predicate.get_grad_eliminate 0.22% : 0.000001s : 6: predicate.graph_param_transform 0.66% : 0.000002s : 12: predicate.incorporate_call 0.57% : 0.000002s : 12: predicate.incorporate_call_switch 18.82% : 0.000052s : 63: predicate.inline 0.88% : 0.000002s : 12: predicate.inline_without_move 0.33% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.09% : 0.000003s : 12: predicate.less_batch_normalization 1.49% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.00% : 0.000005s : 38: predicate.load_eliminater 1.12% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.04% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.58% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.62% : 0.000002s : 12: predicate.merge_addn 0.70% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.68% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.63% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.40% : 0.000001s : 6: predicate.opt_reshape 0.52% : 0.000001s : 6: predicate.parallel_virtual_node 0.96% : 0.000003s : 14: predicate.partial_defer_inline 1.10% : 0.000003s : 19: predicate.partial_eliminate 0.66% : 0.000002s : 13: predicate.print_const_string_wrapper 0.81% : 0.000002s : 12: predicate.reduce_all_const_elim 0.97% : 0.000003s : 13: predicate.reduce_eliminate 0.47% : 0.000001s : 12: predicate.remove_not_recompute_node 0.92% : 0.000003s : 25: predicate.replace_applicator 0.38% : 0.000001s : 12: predicate.replace_old_param 0.20% : 0.000001s : 6: predicate.reset_defer_inline 0.70% : 0.000002s : 13: predicate.reshape_eliminate 0.69% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.48% : 0.000001s : 6: predicate.row_tensor_eliminate 0.92% : 0.000003s : 12: predicate.same_eliminate 0.38% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.84% : 0.000002s : 12: predicate.shard_identity_eliminate 1.29% : 0.000004s : 18: predicate.special_op_eliminate 0.87% : 0.000002s : 12: predicate.specialize_transform 0.93% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.82% : 0.000002s : 12: predicate.stack_unstack_eliminate 1.96% : 0.000005s : 38: predicate.stopgrad_eliminater 0.36% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.72% : 0.000002s : 14: predicate.switch_defer_inline 1.39% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.62% : 0.000010s : 43: predicate.switch_simplify 0.69% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.52% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.50% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.28% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.30% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.41% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.16% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.35% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 1.97% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 2.94% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.45% : 0.000001s : 6: predicate.value_based_eliminate 0.70% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.63% : 0.000002s : 12: predicate.virtual_output_eliminate 0.45% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000129 4 7.34% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.66% : 0.000120s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091424 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000014s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.06% : 0.000057s : 1: add_recomputation 0.03% : 0.000028s : 1: assign_add_opt 0.12% : 0.000113s : 1: auto_monad 0.03% : 0.000030s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.36% : 0.000333s : 1: bootstrap 0.02% : 0.000018s : 1: cconv 0.03% : 0.000030s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000011s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000043s : 1: distribtued_split 0.58% : 0.000531s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000499s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001136s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000155s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.06% : 0.005544s : 1: opt_a 0.15% : 0.000137s : 1: opt_after_cconv 0.27% : 0.000247s : 1: opt_b 8.00% : 0.007318s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.08% : 0.000072s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.32% : 0.000288s : 1: renormalize.infer 0.21% : 0.000190s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.14% : 0.000132s : 1: rewriter_after_opt_a 0.04% : 0.000036s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 77.86% : 0.071181s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.71% : 0.002481s : 1: type_inference 0.07% : 0.000063s : 1: validate TotalTime = 0.0836487, [21] [bootstrap]: 0.00032668 [type_inference]: 0.00277311 [auto_monad]: 0.00013538 [graph_reusing]: 2.45008e-06 [inline]: 1.54006e-06 [parallel-infer-symbol]: 2.70002e-06 [pre_auto_parallel]: 2.69801e-05 [insert-virtual-dataset]: 2.66009e-06 [parallel-infer-symbol-second]: 4.59957e-07 [dataset_repeat_opt]: 1.31992e-06 [pipeline_split]: 1.60001e-06 [optimize]: 0.00752204, [52] [py_interpret_to_execute]: 1.938e-05 [rewriter_before_opt_a]: 3.774e-05 [opt_a]: 0.00554668, [2] [Cycle 1]: 0.00157654, [43] [expand_dump_flag]: 3.72995e-06 [switch_simplify]: 2.95e-05 [loop_unroll]: 1.309e-05 [a_1]: 0.00036743 [recompute_prepare]: 8.85001e-06 [updatestate_depend_eliminate]: 9.20997e-06 [updatestate_assign_eliminate]: 5.99003e-06 [updatestate_loads_eliminate]: 7.07002e-06 [parameter_eliminate]: 3.01993e-06 [a_2]: 0.00011772 [accelerated_algorithm]: 8.6599e-06 [shard]: 2.43995e-06 [meta_shard_fg_expand]: 3.65998e-06 [shard_inline]: 8.60996e-06 [auto_parallel]: 1.23e-05 [parallel]: 8.46002e-06 [flash_sp]: 1.25701e-05 [merge_comm]: 8.87993e-06 [allreduce_fusion]: 5.72996e-06 [matmul_add_comm_reduction]: 1.107e-05 [allreduce_slice_to_reducescatter]: 6.59958e-07 [virtual_shard_identity]: 1.045e-05 [virtual_dataset]: 7.87003e-06 [get_grad_eliminate_]: 8.15e-06 [virtual_output]: 7.88004e-06 [merge_forward]: 5.97991e-06 [cell_reuse_recompute_pass]: 1.73994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.666e-05 [before_grad]: 1.369e-05 [inplace_validation]: 4.75009e-06 [meta_fg_expand]: 6.18002e-06 [inplace_validation_after_expand]: 6.44999e-06 [flash_sp_send_recv_attached]: 4.50993e-06 [receive_attached]: 2.93995e-06 [after_resolve]: 1.18901e-05 [a_after_grad]: 1.29499e-05 [special_op_eliminate]: 8.30996e-06 [renormalize]: 0.00043871 [add_forward_monad_depend]: 3.62005e-06 [auto_monad_grad]: 1.91992e-06 [auto_monad_eliminator]: 3.063e-05 [cse]: 3.719e-05 [a_3]: 5.77901e-05 [Cycle 2]: 0.00078602, [43] [expand_dump_flag]: 1.02993e-06 [switch_simplify]: 9.32999e-06 [loop_unroll]: 7.45e-06 [a_1]: 0.00020745 [recompute_prepare]: 7.38993e-06 [updatestate_depend_eliminate]: 5.84999e-06 [updatestate_assign_eliminate]: 4.77e-06 [updatestate_loads_eliminate]: 5.32006e-06 [parameter_eliminate]: 1.30001e-06 [a_2]: 0.00010557 [accelerated_algorithm]: 8.12009e-06 [shard]: 1.13004e-06 [meta_shard_fg_expand]: 2.60002e-06 [shard_inline]: 7.81007e-06 [auto_parallel]: 1.16901e-05 [parallel]: 3.66999e-06 [flash_sp]: 3.89002e-06 [merge_comm]: 6.16e-06 [allreduce_fusion]: 4.61994e-06 [matmul_add_comm_reduction]: 7.72998e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 8.69005e-06 [virtual_dataset]: 7.67002e-06 [get_grad_eliminate_]: 7.38993e-06 [virtual_output]: 6.87002e-06 [merge_forward]: 4.40003e-06 [cell_reuse_recompute_pass]: 1.82004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.521e-05 [before_grad]: 1.264e-05 [inplace_validation]: 4.27e-06 [meta_fg_expand]: 4.82006e-06 [inplace_validation_after_expand]: 4.98991e-06 [flash_sp_send_recv_attached]: 8.10018e-07 [receive_attached]: 7.69971e-07 [after_resolve]: 9.89006e-06 [a_after_grad]: 1.18901e-05 [special_op_eliminate]: 7.28003e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 9.30042e-07 [auto_monad_grad]: 1.20001e-06 [auto_monad_eliminator]: 1.805e-05 [cse]: 1.99301e-05 [a_3]: 4.865e-05 [py_interpret_to_execute_after_opt_a]: 9.26002e-06 [slice_cell_reuse_recomputed_activation]: 2.52994e-06 [rewriter_after_opt_a]: 0.00015306 [convert_after_rewriter]: 9.37004e-06 [order_py_execute_after_rewriter]: 5.6799e-06 [opt_b]: 0.00024737, [1] [Cycle 1]: 0.00024203, [7] [b_1]: 0.00016549 [b_2]: 1.01899e-05 [updatestate_depend_eliminate]: 5.54009e-06 [updatestate_assign_eliminate]: 4.43996e-06 [updatestate_loads_eliminate]: 5.39992e-06 [renormalize]: 2.89991e-07 [cse]: 1.93099e-05 [optimize_parallel_all_gather_comm]: 8.66002e-06 [overlap_param_gather]: 1.47009e-06 [cconv]: 2.514e-05 [loop_unroll]: 0.00054995 [opt_after_cconv]: 0.00013794, [1] [Cycle 1]: 0.00013146, [7] [c_1]: 5.47e-05 [parameter_eliminate]: 2.41003e-06 [updatestate_depend_eliminate]: 8.37992e-06 [updatestate_assign_eliminate]: 4.57e-06 [updatestate_loads_eliminate]: 5.09003e-06 [cse]: 2.32699e-05 [renormalize]: 4.59957e-07 [remove_dup_value]: 1.488e-05 [tuple_transform]: 7.206e-05, [1] [Cycle 1]: 6.756e-05, [2] [d_1]: 5.77801e-05 [renormalize]: 1.70083e-07 [partial_unused_args_eliminate]: 2.10002e-06 [add_cache_embedding]: 1.42601e-05 [add_recomputation]: 6.38601e-05 [cse_after_recomputation]: 2.863e-05, [1] [Cycle 1]: 2.371e-05, [1] [cse]: 1.81701e-05 [environ_conv]: 8.20006e-06 [swap_dp_allreduce_reducescatter]: 8.12998e-06 [bias_add_comm_swap]: 2.93995e-06 [label_micro_interleaved_index]: 2.00002e-06 [label_fine_grained_interleaved_index]: 1.95997e-06 [merge_cast_opt]: 1.12003e-06 [slice_recompute_activation]: 1.97999e-06 [micro_interleaved_order_control]: 2.06998e-06 [assign_add_opt]: 2.901e-05 [ForceFp32Comm]: 8.801e-07 [remove_cast_before_assign_add]: 7.19004e-06 [full_micro_interleaved_order_control]: 2.23995e-06 [reorder_send_recv_between_fp_bp]: 2.4999e-06 [comm_op_add_attrs]: 2.928e-05 [add_comm_op_reuse_tag]: 2.13995e-06 [interleave_split_concat_branches]: 9.30042e-07 [interleave_parallel_branches]: 7.79983e-07 [overlap_opt_shard_in_pipeline]: 1.47009e-06 [overlap_opt_shard_grad_in_pipeline]: 2.65008e-06 [control_data_broadcast_order]: 1.26008e-06 [grouped_pairwise_exchange_alltoall]: 9.79006e-06 [offloading_packed_experts]: 2.41003e-06 [overlap_recompute_and_grad_model_parallel]: 1.74006e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.02003e-06 [overlap_recompute_allgather_and_fa_grad]: 7.753e-05 [overlap_grad_ring_attention]: 2.50002e-06 [overlap_grad_flash_sp]: 2.448e-05 [begin_end_overlap_inline]: 8.30041e-07 [split_matmul_comm_elemetwise]: 2.45008e-06 [split_layernorm_comm]: 1.97999e-06 [handle_group_info]: 6.24999e-06 [symbol_engine_optimizer]: 9.426e-05, [1] [Cycle 1]: 8.912e-05, [6] [build]: 5.38991e-06 [elim_shapecalc]: 1.403e-05 [elim_not_effective]: 1.782e-05 [opt_reshape]: 8.97003e-06 [fold_const_symbol]: 1.474e-05 [renormalize]: 3.19909e-07 [pipeline_parallel_scheduler]: 1.63994e-06 [auto_monad_reorder]: 3.173e-05 [get_jit_bprop_graph]: 4.60073e-07 [rewriter_after_jit_bprop_graph]: 4.1991e-07 [eliminate_special_op_node]: 0.00052507 [distribtued_split]: 4.395e-05 [validate]: 3.42301e-05 [task_emit]: 0.0719586 [execute]: 1.24801e-05 Sums bootstrap : 0.000327s : 0.41% type_inference : 0.002773s : 3.49% auto_monad : 0.000135s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000027s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000019s : 0.02% optimize.rewriter_before_opt_a : 0.000038s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000575s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000024s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000439s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000057s : 0.07% optimize.opt_a.a_3 : 0.000106s : 0.13% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000153s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000550s : 0.69% optimize.opt_after_cconv.c_1 : 0.000055s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000058s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000029s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000078s : 0.10% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000024s : 0.03% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000015s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000525s : 0.66% distribtued_split : 0.000044s : 0.06% validate : 0.000034s : 0.04% task_emit : 0.071959s : 90.57% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000139 63 4.93% : 0.000007s : 2: substitution.depend_value_elim 2.25% : 0.000003s : 5: substitution.elim_not_effective 1.90% : 0.000003s : 5: substitution.fold_const_symbol 5.45% : 0.000008s : 6: substitution.graph_param_transform 51.19% : 0.000071s : 1: substitution.inline 3.85% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.36% : 0.000005s : 6: substitution.load_eliminater 2.80% : 0.000004s : 2: substitution.reduce_all_const_elim 5.46% : 0.000008s : 10: substitution.remove_not_recompute_node 2.53% : 0.000004s : 2: substitution.replace_old_param 8.69% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.58% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002743 2 83.70% : 0.002296s : 1: type_inference.infer 16.30% : 0.000447s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000070 1 100.00% : 0.000070s : 1: match.inline ------[predicate.] 0.000232 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.13% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.18% : 0.000005s : 25: predicate.arithmetic_simplify 0.92% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.46% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.31% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.87% : 0.000002s : 12: predicate.depend_value_elim 0.83% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.82% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.91% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.27% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 1.93% : 0.000004s : 31: predicate.environ_get_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.26% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.93% : 0.000002s : 12: predicate.get_grad_eliminate 0.45% : 0.000001s : 6: predicate.graph_param_transform 0.80% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.72% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.78% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.44% : 0.000006s : 38: predicate.load_eliminater 1.46% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.73% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.83% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.85% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.78% : 0.000002s : 13: predicate.minmaximum_grad 0.79% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.40% : 0.000001s : 6: predicate.parallel_virtual_node 1.07% : 0.000002s : 14: predicate.partial_defer_inline 1.30% : 0.000003s : 19: predicate.partial_eliminate 0.85% : 0.000002s : 13: predicate.print_const_string_wrapper 0.90% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.64% : 0.000001s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.75% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.55% : 0.000001s : 6: predicate.row_tensor_eliminate 1.06% : 0.000002s : 12: predicate.same_eliminate 0.44% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.99% : 0.000002s : 12: predicate.shard_identity_eliminate 1.44% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.94% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.26% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.57% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.41% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.70% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.78% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.62% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.67% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.33% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.80% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.76% : 0.000002s : 12: predicate.virtual_output_eliminate 0.61% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000168 4 10.56% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.44% : 0.000150s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.093031 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.01% : 0.000005s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.38% : 0.000351s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.04% : 0.000034s : 1: comm_op_add_attrs 0.01% : 0.000005s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000052s : 1: distribtued_split 0.58% : 0.000539s : 1: eliminate_special_op_node 0.01% : 0.000012s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000010s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.60% : 0.000560s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000006s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001132s : 80: opt.transform.opt_a 0.06% : 0.000053s : 1: opt.transform.opt_after_cconv 0.17% : 0.000155s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000051s : 4: opt.transform.symbol_engine_opt 5.97% : 0.005550s : 1: opt_a 0.15% : 0.000142s : 1: opt_after_cconv 0.27% : 0.000251s : 1: opt_b 8.09% : 0.007530s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.03% : 0.000029s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.09% : 0.000084s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.03% : 0.000025s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.25% : 0.000236s : 1: renormalize.infer 0.21% : 0.000197s : 1: renormalize.specialize 0.01% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000159s : 1: rewriter_after_opt_a 0.05% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000012s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000098s : 1: symbol_engine_optimizer 77.38% : 0.071990s : 1: task_emit 0.08% : 0.000075s : 1: tuple_transform 3.00% : 0.002792s : 1: type_inference 0.08% : 0.000070s : 1: validate [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:30.133.174 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:30.133.608 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:30.133.609 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:30.133.752 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:30.133.940 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:30.134.055 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:30.134.055 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:30.134.381 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.0778583, [21] [bootstrap]: 0.00029537 [type_inference]: 0.00231403 [auto_monad]: 0.00010491 [graph_reusing]: 1.77999e-06 [inline]: 1.26997e-06 [parallel-infer-symbol]: 1.65997e-06 [pre_auto_parallel]: 2.245e-05 [insert-virtual-dataset]: 1.93005e-06 [parallel-infer-symbol-second]: 4.89992e-07 [dataset_repeat_opt]: 8.40053e-07 [pipeline_split]: 1.30991e-06 [optimize]: 0.00691955, [52] [py_interpret_to_execute]: 1.218e-05 [rewriter_before_opt_a]: 3.054e-05 [opt_a]: 0.00516319, [2] [Cycle 1]: 0.0014566, [43] [expand_dump_flag]: 2.82994e-06 [switch_simplify]: 2.60801e-05 [loop_unroll]: 1.27699e-05 [a_1]: 0.00032735 [recompute_prepare]: 9.40997e-06 [updatestate_depend_eliminate]: 8.76992e-06 [updatestate_assign_eliminate]: 5.43008e-06 [updatestate_loads_eliminate]: 6.67002e-06 [parameter_eliminate]: 2.39001e-06 [a_2]: 0.00011415 [accelerated_algorithm]: 8.33999e-06 [shard]: 1.74996e-06 [meta_shard_fg_expand]: 3.62005e-06 [shard_inline]: 8.76992e-06 [auto_parallel]: 1.15e-05 [parallel]: 5.62996e-06 [flash_sp]: 8.87003e-06 [merge_comm]: 7.6599e-06 [allreduce_fusion]: 5.08991e-06 [matmul_add_comm_reduction]: 9.34e-06 [allreduce_slice_to_reducescatter]: 4.00003e-07 [virtual_shard_identity]: 9.62999e-06 [virtual_dataset]: 8.02008e-06 [get_grad_eliminate_]: 7.72998e-06 [virtual_output]: 8.09005e-06 [merge_forward]: 5.18002e-06 [cell_reuse_recompute_pass]: 1.36998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.701e-05 [before_grad]: 1.39901e-05 [inplace_validation]: 4.87e-06 [meta_fg_expand]: 5.54998e-06 [inplace_validation_after_expand]: 6.01995e-06 [flash_sp_send_recv_attached]: 3.64997e-06 [receive_attached]: 1.84006e-06 [after_resolve]: 1.105e-05 [a_after_grad]: 1.30601e-05 [special_op_eliminate]: 8.01997e-06 [renormalize]: 0.00040874 [add_forward_monad_depend]: 2.59001e-06 [auto_monad_grad]: 1.64995e-06 [auto_monad_eliminator]: 2.521e-05 [cse]: 2.78299e-05 [a_3]: 5.733e-05 [Cycle 2]: 0.00077813, [43] [expand_dump_flag]: 8.00006e-07 [switch_simplify]: 9.04e-06 [loop_unroll]: 7.47992e-06 [a_1]: 0.00019996 [recompute_prepare]: 7.30006e-06 [updatestate_depend_eliminate]: 5.71995e-06 [updatestate_assign_eliminate]: 5.10004e-06 [updatestate_loads_eliminate]: 4.88001e-06 [parameter_eliminate]: 1.02003e-06 [a_2]: 0.00010429 [accelerated_algorithm]: 8.70996e-06 [shard]: 1.11002e-06 [meta_shard_fg_expand]: 2.35008e-06 [shard_inline]: 8.03999e-06 [auto_parallel]: 1.045e-05 [parallel]: 3.60003e-06 [flash_sp]: 3.04997e-06 [merge_comm]: 6.19993e-06 [allreduce_fusion]: 5.04998e-06 [matmul_add_comm_reduction]: 7.23999e-06 [allreduce_slice_to_reducescatter]: 2.70084e-07 [virtual_shard_identity]: 8.70007e-06 [virtual_dataset]: 7.95e-06 [get_grad_eliminate_]: 7.65e-06 [virtual_output]: 7.42998e-06 [merge_forward]: 4.35999e-06 [cell_reuse_recompute_pass]: 1.6999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.531e-05 [before_grad]: 1.26599e-05 [inplace_validation]: 4.18001e-06 [meta_fg_expand]: 4.95999e-06 [inplace_validation_after_expand]: 4.94998e-06 [flash_sp_send_recv_attached]: 7.00005e-07 [receive_attached]: 8.10018e-07 [after_resolve]: 9.79996e-06 [a_after_grad]: 1.176e-05 [special_op_eliminate]: 7.66991e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 8.49948e-07 [auto_monad_grad]: 9.49949e-07 [auto_monad_eliminator]: 1.62499e-05 [cse]: 1.797e-05 [a_3]: 4.82e-05 [py_interpret_to_execute_after_opt_a]: 8.16002e-06 [slice_cell_reuse_recomputed_activation]: 1.92993e-06 [rewriter_after_opt_a]: 0.00013319 [convert_after_rewriter]: 7.91997e-06 [order_py_execute_after_rewriter]: 5.27001e-06 [opt_b]: 0.00024398, [1] [Cycle 1]: 0.00023895, [7] [b_1]: 0.00016349 [b_2]: 9.67004e-06 [updatestate_depend_eliminate]: 5.31995e-06 [updatestate_assign_eliminate]: 4.38001e-06 [updatestate_loads_eliminate]: 4.77e-06 [renormalize]: 2.59955e-07 [cse]: 1.76501e-05 [optimize_parallel_all_gather_comm]: 8.12009e-06 [overlap_param_gather]: 6.40051e-07 [cconv]: 1.67e-05 [loop_unroll]: 0.00047228 [opt_after_cconv]: 0.00014833, [1] [Cycle 1]: 0.00014266, [7] [c_1]: 7.187e-05 [parameter_eliminate]: 1.67999e-06 [updatestate_depend_eliminate]: 7.43999e-06 [updatestate_assign_eliminate]: 4.65999e-06 [updatestate_loads_eliminate]: 4.93997e-06 [cse]: 2.014e-05 [renormalize]: 3.10014e-07 [remove_dup_value]: 1.114e-05 [tuple_transform]: 6.768e-05, [1] [Cycle 1]: 6.367e-05, [2] [d_1]: 5.465e-05 [renormalize]: 1.79978e-07 [partial_unused_args_eliminate]: 1.47999e-06 [add_cache_embedding]: 1.175e-05 [add_recomputation]: 5.199e-05 [cse_after_recomputation]: 2.54799e-05, [1] [Cycle 1]: 2.107e-05, [1] [cse]: 1.58199e-05 [environ_conv]: 5.90994e-06 [swap_dp_allreduce_reducescatter]: 6.84999e-06 [bias_add_comm_swap]: 1.79e-06 [label_micro_interleaved_index]: 1.15007e-06 [label_fine_grained_interleaved_index]: 1.15996e-06 [merge_cast_opt]: 7.00005e-07 [slice_recompute_activation]: 1.52003e-06 [micro_interleaved_order_control]: 1.02003e-06 [assign_add_opt]: 2.609e-05 [ForceFp32Comm]: 6.59958e-07 [remove_cast_before_assign_add]: 6.58992e-06 [full_micro_interleaved_order_control]: 1.29e-06 [reorder_send_recv_between_fp_bp]: 1.25007e-06 [comm_op_add_attrs]: 2.22201e-05 [add_comm_op_reuse_tag]: 1.41002e-06 [interleave_split_concat_branches]: 6.10016e-07 [interleave_parallel_branches]: 5.90109e-07 [overlap_opt_shard_in_pipeline]: 8.10018e-07 [overlap_opt_shard_grad_in_pipeline]: 1.45996e-06 [control_data_broadcast_order]: 6.79982e-07 [grouped_pairwise_exchange_alltoall]: 6.58003e-06 [offloading_packed_experts]: 1.33005e-06 [overlap_recompute_and_grad_model_parallel]: 1.24006e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.79981e-07 [overlap_recompute_allgather_and_fa_grad]: 6.511e-05 [overlap_grad_ring_attention]: 1.45007e-06 [overlap_grad_flash_sp]: 1.218e-05 [begin_end_overlap_inline]: 5.30039e-07 [split_matmul_comm_elemetwise]: 1.51002e-06 [split_layernorm_comm]: 1.12003e-06 [handle_group_info]: 3.43996e-06 [symbol_engine_optimizer]: 8.733e-05, [1] [Cycle 1]: 8.27799e-05, [6] [build]: 4.30993e-06 [elim_shapecalc]: 1.253e-05 [elim_not_effective]: 1.638e-05 [opt_reshape]: 8.92999e-06 [fold_const_symbol]: 1.318e-05 [renormalize]: 2.59955e-07 [pipeline_parallel_scheduler]: 9.30042e-07 [auto_monad_reorder]: 2.39001e-05 [get_jit_bprop_graph]: 5.40051e-07 [rewriter_after_jit_bprop_graph]: 3.30037e-07 [eliminate_special_op_node]: 0.00049664 [distribtued_split]: 3.44299e-05 [validate]: 3.079e-05 [task_emit]: 0.0673677 [execute]: 9.11008e-06 Sums bootstrap : 0.000295s : 0.40% type_inference : 0.002314s : 3.13% auto_monad : 0.000105s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000031s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000020s : 0.03% optimize.opt_a.a_1 : 0.000527s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000218s : 0.30% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000016s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000409s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.06% optimize.opt_a.cse : 0.000046s : 0.06% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000133s : 0.18% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000472s : 0.64% optimize.opt_after_cconv.c_1 : 0.000072s : 0.10% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000052s : 0.07% optimize.cse_after_recomputation.cse : 0.000016s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000026s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000022s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000065s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000024s : 0.03% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000497s : 0.67% distribtued_split : 0.000034s : 0.05% validate : 0.000031s : 0.04% task_emit : 0.067368s : 91.11% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000113 63 4.40% : 0.000005s : 2: substitution.depend_value_elim 2.07% : 0.000002s : 5: substitution.elim_not_effective 1.96% : 0.000002s : 5: substitution.fold_const_symbol 5.87% : 0.000007s : 6: substitution.graph_param_transform 49.64% : 0.000056s : 1: substitution.inline 4.37% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.34% : 0.000004s : 6: substitution.load_eliminater 2.43% : 0.000003s : 2: substitution.reduce_all_const_elim 6.49% : 0.000007s : 10: substitution.remove_not_recompute_node 2.32% : 0.000003s : 2: substitution.replace_old_param 9.21% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.89% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002289 2 90.15% : 0.002064s : 1: type_inference.infer 9.85% : 0.000225s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000055 1 100.00% : 0.000055s : 1: match.inline ------[predicate.] 0.000228 1420 0.94% : 0.000002s : 13: predicate.accumulaten_eliminater 1.00% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.33% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.51% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.33% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.89% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.93% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.99% : 0.000005s : 31: predicate.environ_get_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.78% : 0.000002s : 12: predicate.float_environ_get_switch 1.11% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.90% : 0.000002s : 12: predicate.get_grad_eliminate 0.30% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.72% : 0.000013s : 63: predicate.inline 1.10% : 0.000002s : 12: predicate.inline_without_move 0.39% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.05% : 0.000002s : 12: predicate.less_batch_normalization 1.80% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.27% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.23% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.84% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.81% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.69% : 0.000002s : 6: predicate.mutable_eliminate 0.43% : 0.000001s : 6: predicate.opt_reshape 0.44% : 0.000001s : 6: predicate.parallel_virtual_node 1.09% : 0.000002s : 14: predicate.partial_defer_inline 1.21% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 0.98% : 0.000002s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.19% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.85% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.46% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.53% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.97% : 0.000002s : 12: predicate.shard_identity_eliminate 1.41% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 1.05% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.40% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.88% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.07% : 0.000009s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.87% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.61% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.70% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.45% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.52% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.44% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000124 4 8.74% : 0.000011s : 1: func_graph_cloner_run.FuncGraphClonerGraph 91.26% : 0.000113s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086563 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000057s : 1: add_recomputation 0.03% : 0.000030s : 1: assign_add_opt 0.14% : 0.000117s : 1: auto_monad 0.04% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000320s : 1: bootstrap 0.02% : 0.000021s : 1: cconv 0.03% : 0.000026s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000028s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000043s : 1: distribtued_split 0.59% : 0.000509s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000007s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000481s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001077s : 80: opt.transform.opt_a 0.08% : 0.000070s : 1: opt.transform.opt_after_cconv 0.18% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000030s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.97% : 0.005167s : 1: opt_a 0.18% : 0.000152s : 1: opt_after_cconv 0.29% : 0.000247s : 1: opt_b 8.00% : 0.006929s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000009s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000070s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000029s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.26% : 0.000221s : 1: renormalize.infer 0.21% : 0.000182s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000139s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 77.85% : 0.067392s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.69% : 0.002332s : 1: type_inference 0.07% : 0.000062s : 1: validate TotalTime = 0.0793047, [21] [bootstrap]: 0.0003073 [type_inference]: 0.00256229 [auto_monad]: 0.00013295 [graph_reusing]: 2.39001e-06 [inline]: 1.42003e-06 [parallel-infer-symbol]: 2.26998e-06 [pre_auto_parallel]: 2.463e-05 [insert-virtual-dataset]: 2.65997e-06 [parallel-infer-symbol-second]: 4.10015e-07 [dataset_repeat_opt]: 1.54995e-06 [pipeline_split]: 1.81003e-06 [optimize]: 0.00727481, [52] [py_interpret_to_execute]: 1.47299e-05 [rewriter_before_opt_a]: 3.388e-05 [opt_a]: 0.00540275, [2] [Cycle 1]: 0.0015288, [43] [expand_dump_flag]: 3.50003e-06 [switch_simplify]: 3.068e-05 [loop_unroll]: 1.321e-05 [a_1]: 0.00034459 [recompute_prepare]: 8.65001e-06 [updatestate_depend_eliminate]: 9.01998e-06 [updatestate_assign_eliminate]: 5.70994e-06 [updatestate_loads_eliminate]: 7.27992e-06 [parameter_eliminate]: 3.58e-06 [a_2]: 0.000117 [accelerated_algorithm]: 8.37992e-06 [shard]: 2.23005e-06 [meta_shard_fg_expand]: 3.76999e-06 [shard_inline]: 8.68004e-06 [auto_parallel]: 1.211e-05 [parallel]: 7.50006e-06 [flash_sp]: 1.064e-05 [merge_comm]: 7.63999e-06 [allreduce_fusion]: 5.43997e-06 [matmul_add_comm_reduction]: 1.074e-05 [allreduce_slice_to_reducescatter]: 4.09898e-07 [virtual_shard_identity]: 9.11998e-06 [virtual_dataset]: 8.01997e-06 [get_grad_eliminate_]: 7.91997e-06 [virtual_output]: 7.41996e-06 [merge_forward]: 5.77001e-06 [cell_reuse_recompute_pass]: 1.90001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.719e-05 [before_grad]: 1.351e-05 [inplace_validation]: 5.53997e-06 [meta_fg_expand]: 5.63997e-06 [inplace_validation_after_expand]: 6.86001e-06 [flash_sp_send_recv_attached]: 5.01995e-06 [receive_attached]: 2.92994e-06 [after_resolve]: 1.17001e-05 [a_after_grad]: 1.275e-05 [special_op_eliminate]: 7.56001e-06 [renormalize]: 0.00042937 [add_forward_monad_depend]: 3.42994e-06 [auto_monad_grad]: 1.81003e-06 [auto_monad_eliminator]: 3.30099e-05 [cse]: 3.30399e-05 [a_3]: 5.78399e-05 [Cycle 2]: 0.00077919, [43] [expand_dump_flag]: 1.10001e-06 [switch_simplify]: 8.66002e-06 [loop_unroll]: 9.77004e-06 [a_1]: 0.00020034 [recompute_prepare]: 7.23999e-06 [updatestate_depend_eliminate]: 6.17001e-06 [updatestate_assign_eliminate]: 4.64998e-06 [updatestate_loads_eliminate]: 5.58002e-06 [parameter_eliminate]: 1.33005e-06 [a_2]: 0.00010448 [accelerated_algorithm]: 8.23999e-06 [shard]: 1.16997e-06 [meta_shard_fg_expand]: 2.69001e-06 [shard_inline]: 8.00996e-06 [auto_parallel]: 1.061e-05 [parallel]: 3.66999e-06 [flash_sp]: 3.28e-06 [merge_comm]: 6.38003e-06 [allreduce_fusion]: 4.84998e-06 [matmul_add_comm_reduction]: 7.77992e-06 [allreduce_slice_to_reducescatter]: 2.49944e-07 [virtual_shard_identity]: 8.48004e-06 [virtual_dataset]: 7.45e-06 [get_grad_eliminate_]: 7.28993e-06 [virtual_output]: 7.21007e-06 [merge_forward]: 4.54998e-06 [cell_reuse_recompute_pass]: 1.93994e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.522e-05 [before_grad]: 1.216e-05 [inplace_validation]: 4.28001e-06 [meta_fg_expand]: 4.55999e-06 [inplace_validation_after_expand]: 5.47001e-06 [flash_sp_send_recv_attached]: 9.50065e-07 [receive_attached]: 6.49947e-07 [after_resolve]: 1.00901e-05 [a_after_grad]: 1.15499e-05 [special_op_eliminate]: 7.46001e-06 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 8.69972e-07 [auto_monad_grad]: 1.24995e-06 [auto_monad_eliminator]: 1.788e-05 [cse]: 1.98201e-05 [a_3]: 4.86199e-05 [py_interpret_to_execute_after_opt_a]: 9.34e-06 [slice_cell_reuse_recomputed_activation]: 2.21003e-06 [rewriter_after_opt_a]: 0.00013948 [convert_after_rewriter]: 8.95001e-06 [order_py_execute_after_rewriter]: 5.52007e-06 [opt_b]: 0.00029705, [1] [Cycle 1]: 0.00029179, [7] [b_1]: 0.00021599 [b_2]: 1.025e-05 [updatestate_depend_eliminate]: 5.46e-06 [updatestate_assign_eliminate]: 4.21994e-06 [updatestate_loads_eliminate]: 5.18002e-06 [renormalize]: 2.5006e-07 [cse]: 1.91e-05 [optimize_parallel_all_gather_comm]: 8.05e-06 [overlap_param_gather]: 1.05996e-06 [cconv]: 2.286e-05 [loop_unroll]: 0.0004905 [opt_after_cconv]: 0.00013149, [1] [Cycle 1]: 0.00012561, [7] [c_1]: 5.258e-05 [parameter_eliminate]: 2.40991e-06 [updatestate_depend_eliminate]: 7.75e-06 [updatestate_assign_eliminate]: 4.57e-06 [updatestate_loads_eliminate]: 4.97e-06 [cse]: 2.133e-05 [renormalize]: 4.50062e-07 [remove_dup_value]: 1.384e-05 [tuple_transform]: 6.745e-05, [1] [Cycle 1]: 6.33e-05, [2] [d_1]: 5.46799e-05 [renormalize]: 2.00002e-07 [partial_unused_args_eliminate]: 2.02004e-06 [add_cache_embedding]: 1.371e-05 [add_recomputation]: 6.15601e-05 [cse_after_recomputation]: 2.8e-05, [1] [Cycle 1]: 2.34201e-05, [1] [cse]: 1.817e-05 [environ_conv]: 7.33999e-06 [swap_dp_allreduce_reducescatter]: 8.10996e-06 [bias_add_comm_swap]: 2.25008e-06 [label_micro_interleaved_index]: 1.85997e-06 [label_fine_grained_interleaved_index]: 2.19001e-06 [merge_cast_opt]: 1.04995e-06 [slice_recompute_activation]: 1.66998e-06 [micro_interleaved_order_control]: 2.15997e-06 [assign_add_opt]: 2.85599e-05 [ForceFp32Comm]: 9.00007e-07 [remove_cast_before_assign_add]: 7.45e-06 [full_micro_interleaved_order_control]: 1.96998e-06 [reorder_send_recv_between_fp_bp]: 1.80001e-06 [comm_op_add_attrs]: 2.68001e-05 [add_comm_op_reuse_tag]: 1.79e-06 [interleave_split_concat_branches]: 6.19912e-07 [interleave_parallel_branches]: 7.89994e-07 [overlap_opt_shard_in_pipeline]: 9.40054e-07 [overlap_opt_shard_grad_in_pipeline]: 2.21003e-06 [control_data_broadcast_order]: 1.30001e-06 [grouped_pairwise_exchange_alltoall]: 9.87994e-06 [offloading_packed_experts]: 2.12004e-06 [overlap_recompute_and_grad_model_parallel]: 2.23995e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.00006e-07 [overlap_recompute_allgather_and_fa_grad]: 6.78999e-05 [overlap_grad_ring_attention]: 1.93005e-06 [overlap_grad_flash_sp]: 1.551e-05 [begin_end_overlap_inline]: 7.49948e-07 [split_matmul_comm_elemetwise]: 2.01992e-06 [split_layernorm_comm]: 1.64006e-06 [handle_group_info]: 5.22996e-06 [symbol_engine_optimizer]: 8.61801e-05, [1] [Cycle 1]: 8.17799e-05, [6] [build]: 4.91994e-06 [elim_shapecalc]: 1.26299e-05 [elim_not_effective]: 1.57e-05 [opt_reshape]: 8.17992e-06 [fold_const_symbol]: 1.374e-05 [renormalize]: 2.10013e-07 [pipeline_parallel_scheduler]: 1.47999e-06 [auto_monad_reorder]: 3.10199e-05 [get_jit_bprop_graph]: 4.70085e-07 [rewriter_after_jit_bprop_graph]: 6.79982e-07 [eliminate_special_op_node]: 0.00050397 [distribtued_split]: 4.104e-05 [validate]: 3.488e-05 [task_emit]: 0.0681237 [execute]: 1.03901e-05 Sums bootstrap : 0.000307s : 0.41% type_inference : 0.002562s : 3.41% auto_monad : 0.000133s : 0.18% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000023s : 0.03% optimize.opt_a.a_1 : 0.000545s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000221s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000429s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.07% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000139s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000216s : 0.29% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000491s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000014s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000068s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000504s : 0.67% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.068124s : 90.56% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000135 63 4.97% : 0.000007s : 2: substitution.depend_value_elim 1.95% : 0.000003s : 5: substitution.elim_not_effective 1.88% : 0.000003s : 5: substitution.fold_const_symbol 5.43% : 0.000007s : 6: substitution.graph_param_transform 51.10% : 0.000069s : 1: substitution.inline 3.87% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.19% : 0.000004s : 6: substitution.load_eliminater 2.82% : 0.000004s : 2: substitution.reduce_all_const_elim 5.85% : 0.000008s : 10: substitution.remove_not_recompute_node 2.54% : 0.000003s : 2: substitution.replace_old_param 8.53% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.86% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002531 2 89.10% : 0.002255s : 1: type_inference.infer 10.90% : 0.000276s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000229 1420 0.80% : 0.000002s : 13: predicate.accumulaten_eliminater 1.16% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.82% : 0.000002s : 13: predicate.addn_zero_filter 0.80% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.36% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.87% : 0.000002s : 12: predicate.depend_value_elim 0.92% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.82% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.90% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.31% : 0.000001s : 6: predicate.elim_not_effective 0.58% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.18% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.97% : 0.000005s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.28% : 0.000003s : 14: predicate.float_depend_g_call 0.71% : 0.000002s : 12: predicate.float_environ_get_switch 1.05% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.91% : 0.000002s : 12: predicate.get_grad_eliminate 0.35% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.88% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.64% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.37% : 0.000005s : 38: predicate.load_eliminater 1.31% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.21% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.86% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.76% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.72% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.15% : 0.000003s : 14: predicate.partial_defer_inline 1.33% : 0.000003s : 19: predicate.partial_eliminate 0.75% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.11% : 0.000003s : 13: predicate.reduce_eliminate 0.63% : 0.000001s : 12: predicate.remove_not_recompute_node 1.06% : 0.000002s : 25: predicate.replace_applicator 0.51% : 0.000001s : 12: predicate.replace_old_param 0.28% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.07% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.37% : 0.000003s : 18: predicate.special_op_eliminate 0.89% : 0.000002s : 12: predicate.specialize_transform 0.98% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.06% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.28% : 0.000005s : 38: predicate.stopgrad_eliminater 0.45% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.84% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.60% : 0.000011s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.70% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.68% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.57% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.76% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.53% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.55% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.44% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.87% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000148 4 10.46% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.54% : 0.000133s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.088385 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000018s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.17% : 0.000146s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.38% : 0.000335s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.04% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.04% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.59% : 0.000518s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000501s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001099s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000046s : 4: opt.transform.symbol_engine_opt 6.12% : 0.005406s : 1: opt_a 0.15% : 0.000135s : 1: opt_after_cconv 0.34% : 0.000300s : 1: opt_b 8.24% : 0.007283s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000073s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.27% : 0.000236s : 1: renormalize.infer 0.21% : 0.000188s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000145s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000089s : 1: symbol_engine_optimizer 77.11% : 0.068150s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.92% : 0.002583s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.0808198, [21] [bootstrap]: 0.00032758 [type_inference]: 0.00258258 [auto_monad]: 0.00013678 [graph_reusing]: 2.58e-06 [inline]: 1.45996e-06 [parallel-infer-symbol]: 2.66009e-06 [pre_auto_parallel]: 2.721e-05 [insert-virtual-dataset]: 3.30992e-06 [parallel-infer-symbol-second]: 4.4005e-07 [dataset_repeat_opt]: 1.37999e-06 [pipeline_split]: 1.69e-06 [optimize]: 0.00731494, [52] [py_interpret_to_execute]: 1.492e-05 [rewriter_before_opt_a]: 3.53199e-05 [opt_a]: 0.0054326, [2] [Cycle 1]: 0.00154864, [43] [expand_dump_flag]: 3.89002e-06 [switch_simplify]: 3.023e-05 [loop_unroll]: 1.30699e-05 [a_1]: 0.00035001 [recompute_prepare]: 8.69995e-06 [updatestate_depend_eliminate]: 8.74e-06 [updatestate_assign_eliminate]: 5.69993e-06 [updatestate_loads_eliminate]: 7.84011e-06 [parameter_eliminate]: 3.71994e-06 [a_2]: 0.00011774 [accelerated_algorithm]: 8.46002e-06 [shard]: 2.27999e-06 [meta_shard_fg_expand]: 3.86999e-06 [shard_inline]: 8.55001e-06 [auto_parallel]: 1.213e-05 [parallel]: 8.01997e-06 [flash_sp]: 1.21901e-05 [merge_comm]: 8.55001e-06 [allreduce_fusion]: 5.31005e-06 [matmul_add_comm_reduction]: 1.10399e-05 [allreduce_slice_to_reducescatter]: 5.20027e-07 [virtual_shard_identity]: 9.57993e-06 [virtual_dataset]: 7.91007e-06 [get_grad_eliminate_]: 8.03999e-06 [virtual_output]: 8.20006e-06 [merge_forward]: 6.59993e-06 [cell_reuse_recompute_pass]: 1.80001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.702e-05 [before_grad]: 1.432e-05 [inplace_validation]: 5.34998e-06 [meta_fg_expand]: 5.69003e-06 [inplace_validation_after_expand]: 6.47001e-06 [flash_sp_send_recv_attached]: 5.38002e-06 [receive_attached]: 2.53995e-06 [after_resolve]: 1.18399e-05 [a_after_grad]: 1.252e-05 [special_op_eliminate]: 8.00006e-06 [renormalize]: 0.00043548 [add_forward_monad_depend]: 4.21994e-06 [auto_monad_grad]: 2.00002e-06 [auto_monad_eliminator]: 3.426e-05 [cse]: 3.476e-05 [a_3]: 5.75699e-05 [Cycle 2]: 0.000771, [43] [expand_dump_flag]: 1.15996e-06 [switch_simplify]: 9.51998e-06 [loop_unroll]: 7.88993e-06 [a_1]: 0.00020046 [recompute_prepare]: 7.77002e-06 [updatestate_depend_eliminate]: 5.78992e-06 [updatestate_assign_eliminate]: 4.60993e-06 [updatestate_loads_eliminate]: 5.35999e-06 [parameter_eliminate]: 1.27009e-06 [a_2]: 0.00010371 [accelerated_algorithm]: 8.23999e-06 [shard]: 1.17009e-06 [meta_shard_fg_expand]: 2.50002e-06 [shard_inline]: 8.04989e-06 [auto_parallel]: 1.09799e-05 [parallel]: 3.22994e-06 [flash_sp]: 3.41004e-06 [merge_comm]: 5.81995e-06 [allreduce_fusion]: 4.73007e-06 [matmul_add_comm_reduction]: 8.08993e-06 [allreduce_slice_to_reducescatter]: 2.49944e-07 [virtual_shard_identity]: 8.27992e-06 [virtual_dataset]: 7.41007e-06 [get_grad_eliminate_]: 7.22997e-06 [virtual_output]: 7.21007e-06 [merge_forward]: 4.51994e-06 [cell_reuse_recompute_pass]: 1.70001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.53399e-05 [before_grad]: 1.245e-05 [inplace_validation]: 4.50993e-06 [meta_fg_expand]: 4.65999e-06 [inplace_validation_after_expand]: 5.01995e-06 [flash_sp_send_recv_attached]: 9.40054e-07 [receive_attached]: 6.79982e-07 [after_resolve]: 9.82999e-06 [a_after_ TotalTime = 0.0808205, [21] [bootstrap]: 0.00032568 [type_inference]: 0.00258298 [auto_monad]: 0.00013568 [graph_reusing]: 1.47999e-06 [inline]: 1.09e-06 [parallel-infer-symbol]: 1.80001e-06 [pre_auto_parallel]: 2.715e-05 [insert-virtual-dataset]: 1.77999e-06 [parallel-infer-symbol-second]: 3.69968e-07 [dataset_repeat_opt]: 7.20029e-07 [pipeline_split]: 1.02003e-06 [optimize]: 0.00731185, [52] [py_interpret_to_execute]: 1.51601e-05 [rewriter_before_opt_a]: 3.508e-05 [opt_a]: 0.00544362, [2] [Cycle 1]: 0.00154832, [43] [expand_dump_flag]: 3.69002e-06 [switch_simplify]: 3.13999e-05 [loop_unroll]: 1.342e-05 [a_1]: 0.00034922 [recompute_prepare]: 9.39996e-06 [updatestate_depend_eliminate]: 8.25e-06 [updatestate_assign_eliminate]: 5.83997e-06 [updatestate_loads_eliminate]: 5.47001e-06 [parameter_eliminate]: 2.80002e-06 [a_2]: 0.00011759 [accelerated_algorithm]: 8.31997e-06 [shard]: 1.79e-06 [meta_shard_fg_expand]: 3.64997e-06 [shard_inline]: 8.71997e-06 [auto_parallel]: 1.249e-05 [parallel]: 6.56e-06 [flash_sp]: 1.11701e-05 [merge_comm]: 8.81997e-06 [allreduce_fusion]: 5.00993e-06 [matmul_add_comm_reduction]: 1.14801e-05 [allreduce_slice_to_reducescatter]: 3.50061e-07 [virtual_shard_identity]: 9.97994e-06 [virtual_dataset]: 8.08993e-06 [get_grad_eliminate_]: 8.07003e-06 [virtual_output]: 8.02998e-06 [merge_forward]: 5.81006e-06 [cell_reuse_recompute_pass]: 1.67009e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.73299e-05 [before_grad]: 1.408e-05 [inplace_validation]: 5.50004e-06 [meta_fg_expand]: 5.68002e-06 [inplace_validation_after_expand]: 6.59004e-06 [flash_sp_send_recv_attached]: 4.94008e-06 [receive_attached]: 2.89001e-06 [after_resolve]: 1.122e-05 [a_after_grad]: 1.27801e-05 [special_op_eliminate]: 7.81007e-06 [renormalize]: 0.00044255 [add_forward_monad_depend]: 3.28e-06 [auto_monad_grad]: 1.60001e-06 [auto_monad_eliminator]: 2.616e-05 [cse]: 3.49201e-05 [a_3]: 5.844e-05 [Cycle 2]: 0.00078133, [43] [expand_dump_flag]: 1.11002e-06 [switch_simplify]: 8.80996e-06 [loop_unroll]: 8.09005e-06 [a_1]: 0.00020146 [recompute_prepare]: 7.56001e-06 [updatestate_depend_eliminate]: 6.49993e-06 [updatestate_assign_eliminate]: 4.91005e-06 [updatestate_loads_eliminate]: 5.72007e-06 [parameter_eliminate]: 1.31992e-06 [a_2]: 0.00010556 [accelerated_algorithm]: 8.52998e-06 [shard]: 1.55997e-06 [meta_shard_fg_expand]: 2.43995e-06 [shard_inline]: 7.83999e-06 [auto_parallel]: 1.064e-05 [parallel]: 3.39001e-06 [flash_sp]: 2.37999e-06 [merge_comm]: 5.88992e-06 [allreduce_fusion]: 5.04998e-06 [matmul_add_comm_reduction]: 8.13999e-06 [allreduce_slice_to_reducescatter]: 2.60072e-07 [virtual_shard_identity]: 9.22999e-06 [virtual_dataset]: 7.63999e-06 [get_grad_eliminate_]: 7.40995e-06 [virtual_output]: 7.28003e-06 [merge_forward]: 4.57e-06 [cell_reuse_recompute_pass]: 1.76998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.547e-05 [before_grad]: 1.266e-05 [inplace_validation]: 4.38001e-06 [meta_fg_expand]: 4.58001e-06 [inplace_validation_after_expand]: 5.19003e-06 [flash_sp_send_recv_attached]: 9.50065e-07 [receive_attached]: 6.79982e-07 [after_resolve]: 9.21998e-06 [a_after_grad]: grad]: 1.21901e-05 [special_op_eliminate]: 7.71997e-06 [renormalize]: 1.10012e-07 [add_forward_monad_depend]: 1.13994e-06 [auto_monad_grad]: 1.15996e-06 [auto_monad_eliminator]: 1.849e-05 [cse]: 1.878e-05 [a_3]: 4.788e-05 [py_interpret_to_execute_after_opt_a]: 9.07003e-06 [slice_cell_reuse_recomputed_activation]: 2.33995e-06 [rewriter_after_opt_a]: 0.00014057 [convert_after_rewriter]: 9.27993e-06 [order_py_execute_after_rewriter]: 6.68003e-06 [opt_b]: 0.00030341, [1] [Cycle 1]: 0.00029808, [7] [b_1]: 0.00016489 [b_2]: 1.01799e-05 [updatestate_depend_eliminate]: 5.51005e-06 [updatestate_assign_eliminate]: 4.42995e-06 [updatestate_loads_eliminate]: 5.58002e-06 [renormalize]: 3.20026e-07 [cse]: 1.895e-05 [optimize_parallel_all_gather_comm]: 8.52998e-06 [overlap_param_gather]: 6.6997e-07 [cconv]: 1.96e-05 [loop_unroll]: 0.00048404 [opt_after_cconv]: 0.00013318, [1] [Cycle 1]: 0.00012691, [7] [c_1]: 5.269e-05 [parameter_eliminate]: 2.46998e-06 [updatestate_depend_eliminate]: 8.25e-06 [updatestate_assign_eliminate]: 4.89003e-06 [updatestate_loads_eliminate]: 5.47001e-06 [cse]: 2.116e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 1.369e-05 [tuple_transform]: 6.998e-05, [1] [Cycle 1]: 6.54099e-05, [2] [d_1]: 5.632e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 2.43995e-06 [add_cache_embedding]: 1.34701e-05 [add_recomputation]: 6.39199e-05 [cse_after_recomputation]: 2.61699e-05, [1] [Cycle 1]: 2.197e-05, [1] [cse]: 1.715e-05 [environ_conv]: 7.57002e-06 [swap_dp_allreduce_reducescatter]: 7.84989e-06 [bias_add_comm_swap]: 2.53005e-06 [label_micro_interleaved_index]: 2.2701e-06 [label_fine_grained_interleaved_index]: 2.20002e-06 [merge_cast_opt]: 1.42003e-06 [slice_recompute_activation]: 1.85997e-06 [micro_interleaved_order_control]: 1.71002e-06 [assign_add_opt]: 2.921e-05 [ForceFp32Comm]: 8.39937e-07 [remove_cast_before_assign_add]: 7.16001e-06 [full_micro_interleaved_order_control]: 2.20991e-06 [reorder_send_recv_between_fp_bp]: 2.27999e-06 [comm_op_add_attrs]: 2.745e-05 [add_comm_op_reuse_tag]: 1.87999e-06 [interleave_split_concat_branches]: 9.10019e-07 [interleave_parallel_branches]: 9.69972e-07 [overlap_opt_shard_in_pipeline]: 1.43994e-06 [overlap_opt_shard_grad_in_pipeline]: 2.41993e-06 [control_data_broadcast_order]: 6.3004e-07 [grouped_pairwise_exchange_alltoall]: 5.62996e-06 [offloading_packed_experts]: 1.98989e-06 [overlap_recompute_and_grad_model_parallel]: 2.04006e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.89995e-07 [overlap_recompute_allgather_and_fa_grad]: 6.658e-05 [overlap_grad_ring_attention]: 2.0701e-06 [overlap_grad_flash_sp]: 1.509e-05 [begin_end_overlap_inline]: 8.50065e-07 [split_matmul_comm_elemetwise]: 2.64007e-06 [split_layernorm_comm]: 1.71002e-06 [handle_group_info]: 4.93007e-06 [symbol_engine_optimizer]: 9.18e-05, [1] [Cycle 1]: 8.70799e-05, [6] [build]: 5.21995e-06 [elim_shapecalc]: 1.31e-05 [elim_not_effective]: 1.665e-05 [opt_reshape]: 8.91008e-06 [fold_const_symbol]: 1.626e-05 [renormalize]: 3.30037e-07 [pipeline_parallel_scheduler]: 1.14006e-06 [auto_monad_reorder]: 2.99701e-05 [get_jit_bprop_graph]: 5.10016e-07 [rewriter_after_jit_bprop_graph]: 4.89992e-07 [eliminate_special_op_node]: 0.00051528 [distribtued_split]: 3.23199e-05 [validate]: 3.421e-05 [task_emit]: 0.0695418 [execute]: 1.24599e-05 Sums bootstrap : 0.000328s : 0.43% type_inference : 0.002583s : 3.37% auto_monad : 0.000137s : 01.183e-05 [special_op_eliminate]: 7.41007e-06 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 9.89996e-07 [auto_monad_grad]: 1.12993e-06 [auto_monad_eliminator]: 1.789e-05 [cse]: 2.00401e-05 [a_3]: 4.826e-05 [py_interpret_to_execute_after_opt_a]: 9.55001e-06 [slice_cell_reuse_recomputed_activation]: 1.09e-06 [rewriter_after_opt_a]: 0.00012945 [convert_after_rewriter]: 1.047e-05 [order_py_execute_after_rewriter]: 5.4501e-06 [opt_b]: 0.00029837, [1] [Cycle 1]: 0.00024545, [7] [b_1]: 0.0001666 [b_2]: 1.03101e-05 [updatestate_depend_eliminate]: 5.70004e-06 [updatestate_assign_eliminate]: 4.50993e-06 [updatestate_loads_eliminate]: 5.59003e-06 [renormalize]: 2.40048e-07 [cse]: 1.944e-05 [optimize_parallel_all_gather_comm]: 8.43999e-06 [overlap_param_gather]: 1.17009e-06 [cconv]: 2.39899e-05 [loop_unroll]: 0.00049205 [opt_after_cconv]: 0.00013581, [1] [Cycle 1]: 0.00012973, [7] [c_1]: 5.294e-05 [parameter_eliminate]: 2.79001e-06 [updatestate_depend_eliminate]: 8.09995e-06 [updatestate_assign_eliminate]: 4.82006e-06 [updatestate_loads_eliminate]: 6.01006e-06 [cse]: 2.182e-05 [renormalize]: 4.49945e-07 [remove_dup_value]: 1.057e-05 [tuple_transform]: 6.66201e-05, [1] [Cycle 1]: 6.225e-05, [2] [d_1]: 5.274e-05 [renormalize]: 1.70083e-07 [partial_unused_args_eliminate]: 1.39e-06 [add_cache_embedding]: 1.208e-05 [add_recomputation]: 6.10501e-05 [cse_after_recomputation]: 2.705e-05, [1] [Cycle 1]: 2.235e-05, [1] [cse]: 1.71701e-05 [environ_conv]: 5.97001e-06 [swap_dp_allreduce_reducescatter]: 7.79005e-06 [bias_add_comm_swap]: 1.97999e-06 [label_micro_interleaved_index]: 2.16998e-06 [label_fine_grained_interleaved_index]: 2.10002e-06 [merge_cast_opt]: 8.70088e-07 [slice_recompute_activation]: 1.77999e-06 [micro_interleaved_order_control]: 1.46998e-06 [assign_add_opt]: 2.931e-05 [ForceFp32Comm]: 6.40051e-07 [remove_cast_before_assign_add]: 7.1401e-06 [full_micro_interleaved_order_control]: 2.16998e-06 [reorder_send_recv_between_fp_bp]: 2.19001e-06 [comm_op_add_attrs]: 2.741e-05 [add_comm_op_reuse_tag]: 1.71002e-06 [interleave_split_concat_branches]: 5.69969e-07 [interleave_parallel_branches]: 7.39936e-07 [overlap_opt_shard_in_pipeline]: 9.00007e-07 [overlap_opt_shard_grad_in_pipeline]: 1.70991e-06 [control_data_broadcast_order]: 1.13994e-06 [grouped_pairwise_exchange_alltoall]: 9.47993e-06 [offloading_packed_experts]: 2.33005e-06 [overlap_recompute_and_grad_model_parallel]: 1.91992e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.89994e-07 [overlap_recompute_allgather_and_fa_grad]: 7.161e-05 [overlap_grad_ring_attention]: 1.09e-06 [overlap_grad_flash_sp]: 1.115e-05 [begin_end_overlap_inline]: 4.00003e-07 [split_matmul_comm_elemetwise]: 1.72004e-06 [split_layernorm_comm]: 1.50001e-06 [handle_group_info]: 4.59002e-06 [symbol_engine_optimizer]: 8.839e-05, [1] [Cycle 1]: 8.36899e-05, [6] [build]: 4.91994e-06 [elim_shapecalc]: 1.273e-05 [elim_not_effective]: 1.67501e-05 [opt_reshape]: 8.37992e-06 [fold_const_symbol]: 1.337e-05 [renormalize]: 4.4005e-07 [pipeline_parallel_scheduler]: 1.56998e-06 [auto_monad_reorder]: 3.131e-05 [get_jit_bprop_graph]: 6.00005e-07 [rewriter_after_jit_bprop_graph]: 2.59955e-07 [eliminate_special_op_node]: 0.00050418 [distribtued_split]: 4.221e-05 [validate]: 3.516e-05 [task_emit]: 0.0695419 [execute]: 1.241e-05 Sums bootstrap : 0.000326s : 0.42% type_inference : 0.002583s : 3.37% auto_monad : 0.000136s : 0.18% graph_r.18% graph_reusing : 0.000003s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000027s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000550s : 0.72% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000221s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000016s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000436s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000053s : 0.07% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000141s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000165s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestateusing : 0.000001s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.04% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000551s : 0.72% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000223s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000020s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.03% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000443s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000044s : 0.06% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000129s : 0.17% optimize.convert_after_rewriter : 0.000010s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000167s : 0.22% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminae_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000020s : 0.03% optimize.loop_unroll : 0.000484s : 0.63% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000021s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000064s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000067s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000016s : 0.02% te : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000024s : 0.03% optimize.loop_unroll : 0.000492s : 0.64% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000011s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000011s : 0.01% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.sym optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000515s : 0.67% distribtued_split : 0.000032s : 0.04% validate : 0.000034s : 0.04% task_emit : 0.069542s : 90.71% execute : 0.000012s : 0.02% bol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000504s : 0.66% distribtued_split : 0.000042s : 0.06% validate : 0.000035s : 0.05% task_emit : 0.069542s : 90.74% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000133 63 4.14% : 0.000006s : 2: substitution.depend_value_elim 1.89% : 0.000003s : 5: substitution.elim_not_effective 1.83% : 0.000002s : 5: substitution.fold_const_symbol 4.12% : 0.000005s : 6: substitution.graph_param_transform 54.23% : 0.000072s : 1: substitution.inline 3.78% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.25% : 0.000004s : 6: substitution.load_eliminater 2.51% : 0.000003s : 2: substitution.reduce_all_const_elim 5.79% : 0.000008s : 10: substitution.remove_not_recompute_node 1.97% : 0.000003s : 2: substitution.replace_old_param 9.16% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.35% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002551 2 88.96% : 0.002269s : 1: type_inference.infer 11.04% : 0.000282s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000071 1 100.00% : 0.000071s : 1: match.inline ------[predicate.] 0.000231 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 1.26% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.14% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.69% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.48% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.11% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.91% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.86% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.64% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.89% : 0.000004s : 31: predicate.environ_get_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.27% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.76% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.62% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.00% : 0.000002s : 12: predicate.less_batch_normalization 1.79% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.42% : 0.000006s : 38: predicate.load_eliminater 1.51% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.15% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.90% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.74% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.85% : 0.000002s : 12: predicat Time group info: ------[substitution.] 0.000140 63 5.43% : 0.000008s : 2: substitution.depend_value_elim 1.92% : 0.000003s : 5: substitution.elim_not_effective 1.30% : 0.000002s : 5: substitution.fold_const_symbol 5.40% : 0.000008s : 6: substitution.graph_param_transform 51.50% : 0.000072s : 1: substitution.inline 3.95% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.02% : 0.000004s : 6: substitution.load_eliminater 2.61% : 0.000004s : 2: substitution.reduce_all_const_elim 5.61% : 0.000008s : 10: substitution.remove_not_recompute_node 2.78% : 0.000004s : 2: substitution.replace_old_param 8.29% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 8.20% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002551 2 88.95% : 0.002269s : 1: type_inference.infer 11.05% : 0.000282s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000071 1 100.00% : 0.000071s : 1: match.inline ------[predicate.] 0.000232 1420 0.79% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.75% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.79% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.17% : 0.000005s : 25: predicate.arithmetic_simplify 0.91% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.30% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.91% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.27% : 0.000001s : 6: predicate.elim_not_effective 0.69% : 0.000002s : 6: predicate.elim_shapecalc_of_broadcastargs 1.10% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.15% : 0.000003s : 19: predicate.environ_get_depend_swap 1.90% : 0.000004s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.35% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.16% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.37% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.84% : 0.000014s : 63: predicate.inline 0.98% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.97% : 0.000002s : 12: predicate.less_batch_normalization 1.76% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.47% : 0.000006s : 38: predicate.load_eliminater 1.30% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.77% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.76% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.79% : 0.000002s : 13: predicate.minmaximum_grad 0.84% : 0.000002s : 6: predicate.mutable_eliminate 0.45% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.14% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.82% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.13% : 0.000003s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.88% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.45% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.49% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.36% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.06% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.71% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.58% : 0.000011s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.82% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.87% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.68% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.63% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.31% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.65% : 0.000002s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.60% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000151 4 11.30% : 0.000017s : 1: func_graph_cloner_run.FuncGraphClonerGraph 88.70% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089966 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.17% : 0.000150s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000354s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.06% : 0.00005e.mini_step_allgather_replace 0.70% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.10% : 0.000003s : 13: predicate.reduce_eliminate 0.65% : 0.000002s : 12: predicate.remove_not_recompute_node 1.14% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.96% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.51% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.93% : 0.000002s : 12: predicate.shard_identity_eliminate 1.42% : 0.000003s : 18: predicate.special_op_eliminate 0.89% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.99% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.36% : 0.000010s : 43: predicate.switch_simplify 0.72% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.69% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.59% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.77% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.50% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.33% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.54% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.84% : 0.000002s : 12: predicate.virtual_output_eliminate 0.62% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000150 4 9.03% : 0.000014s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.97% : 0.000136s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089968 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.17% : 0.000149s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000354s : 1: bootstrap 0.03% : 0.000023s : 1: cconv 0.03% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.04% : 0.000040s : 1: distribtued_split 0.58% : 0.000519s : 1: eliminate_special_op_node 0.01% : 0.000009s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000503s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000017s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001109s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.18% : 0.000158s : 27: opt.transform.opt_b 0.06% : 0.000051s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.05% : 0.005447s : 1: opt_a 0.16% : 0.000140s : 1: opt_after_cconv 0.34% : 0.000301s : 1: opt_b 8.14% : 0.007320s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000015s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.09% : 0.000077s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.04% : 0.000034s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000015s : 1: remove_dup_value 0.27% : 0.000243s : 1: renormalize.infer 0.22% : 0.000194s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000135s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 77.34% : 0.069577s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.89% : 0.002602s : 1: type_inference 0.08% : 0.000069s : 1: validate 0s : 1: distribtued_split 0.59% : 0.000529s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000009s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000493s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001106s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000155s : 27: opt.transform.opt_b 0.06% : 0.000055s : 1: opt.transform.opt_trans_graph 0.04% : 0.000038s : 3: opt.transform.special_op_eliminate 0.06% : 0.000051s : 4: opt.transform.symbol_engine_opt 6.04% : 0.005436s : 1: opt_a 0.15% : 0.000137s : 1: opt_after_cconv 0.34% : 0.000306s : 1: opt_b 8.14% : 0.007323s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000019s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000010s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000072s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000018s : 1: remove_dup_value 0.26% : 0.000237s : 1: renormalize.infer 0.21% : 0.000193s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000146s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000095s : 1: symbol_engine_optimizer 77.33% : 0.069577s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.89% : 0.002601s : 1: type_inference 0.08% : 0.000069s : 1: validate TotalTime = 0.0814035, [21] [bootstrap]: 0.00030992 [type_inference]: 0.00253978 [auto_monad]: 0.00013091 [graph_reusing]: 2.30991e-06 [inline]: 1.81003e-06 [parallel-infer-symbol]: 1.95007e-06 [pre_auto_parallel]: 2.531e-05 [insert-virtual-dataset]: 2.75997e-06 [parallel-infer-symbol-second]: 4.30038e-07 [dataset_repeat_opt]: 1.56008e-06 [pipeline_split]: 1.54995e-06 [optimize]: 0.00720021, [52] [py_interpret_to_execute]: 1.53499e-05 [rewriter_before_opt_a]: 3.56e-05 [opt_a]: 0.00533768, [2] [Cycle 1]: 0.00151514, [43] [expand_dump_flag]: 3.32005e-06 [switch_simplify]: 3.033e-05 [loop_unroll]: 1.318e-05 [a_1]: 0.00034658 [recompute_prepare]: 8.43999e-06 [updatestate_depend_eliminate]: 8.79006e-06 [updatestate_assign_eliminate]: 5.88002e-06 [updatestate_loads_eliminate]: 7.73999e-06 [parameter_eliminate]: 3.00002e-06 [a_2]: 0.00011662 [accelerated_algorithm]: 8.22998e-06 [shard]: 2.12004e-06 [meta_shard_fg_expand]: 3.30003e-06 [shard_inline]: 8.39995e-06 [auto_parallel]: 1.22699e-05 [parallel]: 6.40994e-06 [flash_sp]: 9.57004e-06 [merge_comm]: 8.10996e-06 [allreduce_fusion]: 5.13997e-06 [matmul_add_comm_reduction]: 9.56003e-06 [allreduce_slice_to_reducescatter]: 5.59958e-07 [virtual_shard_identity]: 9.95002e-06 [virtual_dataset]: 7.88004e-06 [get_grad_eliminate_]: 7.52998e-06 [virtual_output]: 7.59005e-06 [merge_forward]: 5.42996e-06 [cell_reuse_recompute_pass]: 1.84996e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.662e-05 [before_grad]: 1.39001e-05 [inplace_validation]: 4.77e-06 [meta_fg_expand]: 5.26989e-06 [inplace_validation_after_expand]: 5.59003e-06 [flash_sp_send_recv_attached]: 3.94997e-06 [receive_attached]: 2.19001e-06 [after_resolve]: 1.062e-05 [a_after_grad]: 1.282e-05 [special_op_eliminate]: 7.52998e-06 [renormalize]: 0.00042561 [add_forward_monad_depend]: 3.88001e-06 [auto_monad_grad]: 2.24996e-06 [auto_monad_eliminator]: 3.209e-05 [cse]: 3.35299e-05 [a_3]: 5.871e-05 [Cycle 2]: 0.00078277, [43] [expand_dump_flag]: 1.07998e-06 [switch_simplify]: 9.12999e-06 [loop_unroll]: 9.51998e-06 [a_1]: 0.00020581 [recompute_prepare]: 7.67002e-06 [updatestate_depend_eliminate]: 5.86989e-06 [updatestate_assign_eliminate]: 4.61994e-06 [updatestate_loads_eliminate]: 5.38002e-06 [parameter_eliminate]: 1.19e-06 [a_2]: 0.00010573 [accelerated_algorithm]: 8.49995e-06 [shard]: 1.11992e-06 [meta_shard_fg_expand]: 2.49001e-06 [shard_inline]: 7.97003e-06 [auto_parallel]: 1.06101e-05 [parallel]: 3.54997e-06 [flash_sp]: 3.58e-06 [merge_comm]: 5.70004e-06 [allreduce_fusion]: 4.99003e-06 [matmul_add_comm_reduction]: 7.85e-06 [allreduce_slice_to_reducescatter]: 2.79979e-07 [virtual_shard_identity]: 8.48994e-06 [virtual_dataset]: 7.61007e-06 [get_grad_eliminate_]: 7.16001e-06 [virtual_output]: 7.18003e-06 [merge_forward]: 4.52995e-06 [cell_reuse_recompute_pass]: 1.92004e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.46501e-05 [before_grad]: 1.269e-05 [inplace_validation]: 4.68001e-06 [meta_fg_expand]: 4.73007e-06 [inplace_validation_after_expand]: 5.27001e-06 [flash_sp_send_recv_attached]: 1.02003e-06 [receive_attached]: 6.59958e-07 [after_resolve]: 9.71998e-06 [a_after_grad]: 1.163e-05 [special_op_eliminate]: 7.37002e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 1.02993e-06 [auto_monad_grad]: 1.06997e-06 [auto_monad_eliminator]: 1.816e-05 [cse]: 1.971e-05 [a_3]: 4.88601e-05 [py_interpret_to_execute_after_opt_a]: 8.75001e-06 [slice_cell_reuse_recomputed_activation]: 2.80992e-06 [rewriter_after_opt_a]: 0.00014245 [convert_after_rewriter]: 9.4201e-06 [order_py_execute_after_rewriter]: 6.30005e-06 [opt_b]: 0.00024268, [1] [Cycle 1]: 0.00023743, [7] [b_1]: 0.00016243 [b_2]: 9.89996e-06 [updatestate_depend_eliminate]: 5.47001e-06 [updatestate_assign_eliminate]: 4.54998e-06 [updatestate_loads_eliminate]: 5.39003e-06 [renormalize]: 3.20026e-07 [cse]: 1.88601e-05 [optimize_parallel_all_gather_comm]: 8.18993e-06 [overlap_param_gather]: 1.02003e-06 [cconv]: 5.61801e-05 [loop_unroll]: 0.00048582 [opt_after_cconv]: 0.00013383, [1] [Cycle 1]: 0.00012776, [7] [c_1]: 5.34001e-05 [parameter_eliminate]: 2.45997e-06 [updatestate_depend_eliminate]: 8.37992e-06 [updatestate_assign_eliminate]: 4.58001e-06 [updatestate_loads_eliminate]: 5.67001e-06 [cse]: 2.183e-05 [renormalize]: 2.89991e-07 [remove_dup_value]: 1.302e-05 [tuple_transform]: 6.846e-05, [1] [Cycle 1]: 6.437e-05, [2] [d_1]: 5.546e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 1.81003e-06 [add_cache_embedding]: 1.339e-05 [add_recomputation]: 6.288e-05 [cse_after_recomputation]: 2.676e-05, [1] [Cycle 1]: 2.239e-05, [1] [cse]: 1.739e-05 [environ_conv]: 7.99005e-06 [swap_dp_allreduce_reducescatter]: 8.01997e-06 [bias_add_comm_swap]: 2.56009e-06 [label_micro_interleaved_index]: 1.92004e-06 [label_fine_grained_interleaved_index]: 1.95997e-06 [merge_cast_opt]: 1.03994e-06 [slice_recompute_activation]: 1.77999e-06 [micro_interleaved_order_control]: 1.51002e-06 [assign_add_opt]: 2.796e-05 [ForceFp32Comm]: 8.49948e-07 [remove_cast_before_assign_add]: 7.08993e-06 [full_micro_interleaved_order_control]: 2.41003e-06 [reorder_send_recv_between_fp_bp]: 2.00002e-06 [comm_op_add_attrs]: 2.75701e-05 [add_comm_op_reuse_tag]: 2.03995e-06 [interleave_split_concat_branches]: 1.04995e-06 [interleave_parallel_branches]: 8.79983e-07 [overlap_opt_shard_in_pipeline]: 1.32993e-06 [overlap_opt_shard_grad_in_pipeline]: 2.25008e-06 [control_data_broadcast_order]: 1.06008e-06 [grouped_pairwise_exchange_alltoall]: 9.15001e-06 [offloading_packed_experts]: 2.00991e-06 [overlap_recompute_and_grad_model_parallel]: 1.67999e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.07998e-06 [overlap_recompute_allgather_and_fa_grad]: 7.163e-05 [overlap_grad_ring_attention]: 2.20002e-06 [overlap_grad_flash_sp]: 1.432e-05 [begin_end_overlap_inline]: 6.79982e-07 [split_matmul_comm_elemetwise]: 2.17999e-06 [split_layernorm_comm]: 1.84006e-06 [handle_group_info]: 5.04008e-06 [symbol_engine_optimizer]: 8.96e-05, [1] [Cycle 1]: 8.464e-05, [6] [build]: 5.02006e-06 [elim_shapecalc]: 1.31901e-05 [elim_not_effective]: 1.68401e-05 [opt_reshape]: 9.52999e-06 [fold_const_symbol]: 1.372e-05 [renormalize]: 3.49944e-07 [pipeline_parallel_scheduler]: 1.29e-06 [auto_monad_reorder]: 3.14e-05 [get_jit_bprop_graph]: 4.4005e-07 [rewriter_after_jit_bprop_graph]: 4.00003e-07 [eliminate_special_op_node]: 0.0005056 [distribtued_split]: 4.016e-05 [validate]: 3.50201e-05 [task_emit]: 0.0703285 [execute]: 9.22999e-06 Sums bootstrap : 0.000310s : 0.40% type_inference : 0.002540s : 3.28% auto_monad : 0.000131s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000023s : 0.03% optimize.opt_a.a_1 : 0.000552s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000222s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000426s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000142s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000056s : 0.07% optimize.loop_unroll : 0.000486s : 0.63% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000072s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000010s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000506s : 0.65% distribtued_split : 0.000040s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.070329s : 90.87% execute : 0.000009s : 0.01% TotalTime = 0.0814406, [21] [bootstrap]: 0.00031064 [type_inference]: 0.00240713 [auto_monad]: 0.00010148 [graph_reusing]: 1.99e-06 [inline]: 1.17999e-06 [parallel-infer-symbol]: 1.29989e-06 [pre_auto_parallel]: 2.155e-05 [insert-virtual-dataset]: 1.50001e-06 [parallel-infer-symbol-second]: 3.7998e-07 [dataset_repeat_opt]: 8.00006e-07 [pipeline_split]: 8.19913e-07 [optimize]: 0.00714815, [52] [py_interpret_to_execute]: 1.23e-05 [rewriter_before_opt_a]: 3.027e-05 [opt_a]: 0.00539249, [2] [Cycle 1]: 0.0014289, [43] [expand_dump_flag]: 2.24996e-06 [switch_simplify]: 2.58799e-05 [loop_unroll]: 1.29599e-05 [a_1]: 0.00032273 [recompute_prepare]: 8.72998e-06 [updatestate_depend_eliminate]: 7.21007e-06 [updatestate_assign_eliminate]: 5.53997e-06 [updatestate_loads_eliminate]: 5.73997e-06 [parameter_eliminate]: 2.50991e-06 [a_2]: 0.00011395 [accelerated_algorithm]: 8.40006e-06 [shard]: 1.19e-06 [meta_shard_fg_expand]: 3.39001e-06 [shard_inline]: 8.23999e-06 [auto_parallel]: 1.07799e-05 [parallel]: 5.44998e-06 [flash_sp]: 8.08993e-06 [merge_comm]: 7.16001e-06 [allreduce_fusion]: 5.29992e-06 [matmul_add_comm_reduction]: 8.59995e-06 [allreduce_slice_to_reducescatter]: 3.40049e-07 [virtual_shard_identity]: 9.59006e-06 [virtual_dataset]: 7.81997e-06 [get_grad_eliminate_]: 8.06991e-06 [virtual_output]: 8.02998e-06 [merge_forward]: 4.91994e-06 [cell_reuse_recompute_pass]: 1.46998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.65501e-05 [before_grad]: 1.37701e-05 [inplace_validation]: 4.41994e-06 [meta_fg_expand]: 5.11005e-06 [inplace_validation_after_expand]: 5.54998e-06 [flash_sp_send_recv_attached]: 3.05998e-06 [receive_attached]: 1.49e-06 [after_resolve]: 1.101e-05 [a_after_grad]: 1.301e-05 [special_op_eliminate]: 7.90996e-06 [renormalize]: 0.00040096 [add_forward_monad_depend]: 2.88989e-06 [auto_monad_grad]: 1.66008e-06 [auto_monad_eliminator]: 2.309e-05 [cse]: 2.65e-05 [a_3]: 5.72901e-05 [Cycle 2]: 0.00077329, [43] [expand_dump_flag]: 1.00001e-06 [switch_simplify]: 8.96002e-06 [loop_unroll]: 7.62998e-06 [a_1]: 0.00020146 [recompute_prepare]: 7.36001e-06 [updatestate_depend_eliminate]: 5.58002e-06 [updatestate_assign_eliminate]: 4.80993e-06 [updatestate_loads_eliminate]: 5.34998e-06 [parameter_eliminate]: 1.14995e-06 [a_2]: 0.00010402 [accelerated_algorithm]: 8.21997e-06 [shard]: 8.29925e-07 [meta_shard_fg_expand]: 2.50991e-06 [shard_inline]: 7.84011e-06 [auto_parallel]: 9.99996e-06 [parallel]: 3.11004e-06 [flash_sp]: 2.71993e-06 [merge_comm]: 5.94999e-06 [allreduce_fusion]: 4.95999e-06 [matmul_add_comm_reduction]: 7.61007e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 8.91008e-06 [virtual_dataset]: 7.62998e-06 [get_grad_eliminate_]: 7.50995e-06 [virtual_output]: 7.35e-06 [merge_forward]: 4.49002e-06 [cell_reuse_recompute_pass]: 1.65997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.50499e-05 [before_grad]: 1.244e-05 [inplace_validation]: 4.13996e-06 [meta_fg_expand]: 4.47e-06 [inplace_validation_after_expand]: 4.72995e-06 [flash_sp_send_recv_attached]: 8.69972e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 9.72999e-06 [a_after_gr TotalTime = 0.0815452, [21] [bootstrap]: 0.00028854 [type_inference]: 0.00247395 [auto_monad]: 0.00012895 [graph_reusing]: 2.42994e-06 [inline]: 1.44006e-06 [parallel-infer-symbol]: 2.63995e-06 [pre_auto_parallel]: 2.496e-05 [insert-virtual-dataset]: 2.75997e-06 [parallel-infer-symbol-second]: 4.20026e-07 [dataset_repeat_opt]: 1.10001e-06 [pipeline_split]: 1.53994e-06 [optimize]: 0.00718547, [52] [py_interpret_to_execute]: 1.511e-05 [rewriter_before_opt_a]: 3.35e-05 [opt_a]: 0.00529221, [2] [Cycle 1]: 0.0015303, [43] [expand_dump_flag]: 3.09001e-06 [switch_simplify]: 3.034e-05 [loop_unroll]: 1.347e-05 [a_1]: 0.00033861 [recompute_prepare]: 9.11008e-06 [updatestate_depend_eliminate]: 8.88994e-06 [updatestate_assign_eliminate]: 5.84009e-06 [updatestate_loads_eliminate]: 7.33009e-06 [parameter_eliminate]: 3.22005e-06 [a_2]: 0.00011796 [accelerated_algorithm]: 8.79995e-06 [shard]: 2.04996e-06 [meta_shard_fg_expand]: 3.50003e-06 [shard_inline]: 8.70996e-06 [auto_parallel]: 1.249e-05 [parallel]: 6.77991e-06 [flash_sp]: 9.97004e-06 [merge_comm]: 8.41008e-06 [allreduce_fusion]: 5.18991e-06 [matmul_add_comm_reduction]: 1.07001e-05 [allreduce_slice_to_reducescatter]: 7.79983e-07 [virtual_shard_identity]: 9.91998e-06 [virtual_dataset]: 7.73999e-06 [get_grad_eliminate_]: 7.71997e-06 [virtual_output]: 7.78993e-06 [merge_forward]: 5.89993e-06 [cell_reuse_recompute_pass]: 1.64006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.687e-05 [before_grad]: 1.399e-05 [inplace_validation]: 5.23007e-06 [meta_fg_expand]: 5.46e-06 [inplace_validation_after_expand]: 6.22997e-06 [flash_sp_send_recv_attached]: 5.00993e-06 [receive_attached]: 3.14997e-06 [after_resolve]: 1.132e-05 [a_after_grad]: 1.27599e-05 [special_op_eliminate]: 7.52998e-06 [renormalize]: 0.00043407 [add_forward_monad_depend]: 3.48e-06 [auto_monad_grad]: 2.01003e-06 [auto_monad_eliminator]: 3.048e-05 [cse]: 3.168e-05 [a_3]: 5.957e-05 [Cycle 2]: 0.00078426, [43] [expand_dump_flag]: 1.02003e-06 [switch_simplify]: 9.62999e-06 [loop_unroll]: 7.98004e-06 [a_1]: 0.00020407 [recompute_prepare]: 7.57002e-06 [updatestate_depend_eliminate]: 6.10994e-06 [updatestate_assign_eliminate]: 4.90004e-06 [updatestate_loads_eliminate]: 5.57001e-06 [parameter_eliminate]: 1.42993e-06 [a_2]: 0.00010577 [accelerated_algorithm]: 8.38994e-06 [shard]: 1.31992e-06 [meta_shard_fg_expand]: 2.71993e-06 [shard_inline]: 7.66001e-06 [auto_parallel]: 1.073e-05 [parallel]: 3.62995e-06 [flash_sp]: 3.20002e-06 [merge_comm]: 5.80004e-06 [allreduce_fusion]: 4.89003e-06 [matmul_add_comm_reduction]: 8.13999e-06 [allreduce_slice_to_reducescatter]: 2.30037e-07 [virtual_shard_identity]: 8.41997e-06 [virtual_dataset]: 7.8599e-06 [get_grad_eliminate_]: 7.19994e-06 [virtual_output]: 7.15e-06 [merge_forward]: 4.69002e-06 [cell_reuse_recompute_pass]: 1.86998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.539e-05 [before_grad]: 1.241e-05 [inplace_validation]: 4.18001e-06 [meta_fg_expand]: 4.71994e-06 [inplace_validation_after_expand]: 5.68002e-06 [flash_sp_send_recv_attached]: 1.00001e-06 [receive_attached]: 6.79982e-07 [after_resolve]: 9.31998e-06 [a_after_grad]: 1.ad]: 1.20699e-05 [special_op_eliminate]: 7.63999e-06 [renormalize]: 8.00937e-08 [add_forward_monad_depend]: 9.69972e-07 [auto_monad_grad]: 8.79983e-07 [auto_monad_eliminator]: 1.519e-05 [cse]: 1.868e-05 [a_3]: 4.90099e-05 [py_interpret_to_execute_after_opt_a]: 8.42998e-06 [slice_cell_reuse_recomputed_activation]: 1.81003e-06 [rewriter_after_opt_a]: 0.00013649 [convert_after_rewriter]: 8.87003e-06 [order_py_execute_after_rewriter]: 5.12006e-06 [opt_b]: 0.00024025, [1] [Cycle 1]: 0.00023528, [7] [b_1]: 0.00016097 [b_2]: 1.047e-05 [updatestate_depend_eliminate]: 5.15999e-06 [updatestate_assign_eliminate]: 4.38001e-06 [updatestate_loads_eliminate]: 5.01995e-06 [renormalize]: 2.89991e-07 [cse]: 1.788e-05 [optimize_parallel_all_gather_comm]: 8.07003e-06 [overlap_param_gather]: 1.03994e-06 [cconv]: 1.51e-05 [loop_unroll]: 0.00050317 [opt_after_cconv]: 0.00012898, [1] [Cycle 1]: 0.00012288, [7] [c_1]: 5.12e-05 [parameter_eliminate]: 1.72004e-06 [updatestate_depend_eliminate]: 7.37992e-06 [updatestate_assign_eliminate]: 4.84008e-06 [updatestate_loads_eliminate]: 5.11005e-06 [cse]: 2.007e-05 [renormalize]: 4.10015e-07 [remove_dup_value]: 1.007e-05 [tuple_transform]: 6.775e-05, [1] [Cycle 1]: 6.321e-05, [2] [d_1]: 5.326e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 1.43994e-06 [add_cache_embedding]: 1.092e-05 [add_recomputation]: 5.562e-05 [cse_after_recomputation]: 2.639e-05, [1] [Cycle 1]: 2.214e-05, [1] [cse]: 1.68501e-05 [environ_conv]: 6.67002e-06 [swap_dp_allreduce_reducescatter]: 7.27002e-06 [bias_add_comm_swap]: 1.83005e-06 [label_micro_interleaved_index]: 1.43994e-06 [label_fine_grained_interleaved_index]: 1.80001e-06 [merge_cast_opt]: 6.79982e-07 [slice_recompute_activation]: 1.00001e-06 [micro_interleaved_order_control]: 1.22993e-06 [assign_add_opt]: 2.486e-05 [ForceFp32Comm]: 6.79982e-07 [remove_cast_before_assign_add]: 6.06e-06 [full_micro_interleaved_order_control]: 1.10001e-06 [reorder_send_recv_between_fp_bp]: 1.30001e-06 [comm_op_add_attrs]: 2.233e-05 [add_comm_op_reuse_tag]: 1.45007e-06 [interleave_split_concat_branches]: 7.00005e-07 [interleave_parallel_branches]: 4.20026e-07 [overlap_opt_shard_in_pipeline]: 1.06008e-06 [overlap_opt_shard_grad_in_pipeline]: 8.89995e-07 [control_data_broadcast_order]: 5.29923e-07 [grouped_pairwise_exchange_alltoall]: 5.24998e-06 [offloading_packed_experts]: 1.02003e-06 [overlap_recompute_and_grad_model_parallel]: 7.49948e-07 [overlap_grad_matmul_and_grad_allreduce]: 4.10015e-07 [overlap_recompute_allgather_and_fa_grad]: 6.239e-05 [overlap_grad_ring_attention]: 1.23994e-06 [overlap_grad_flash_sp]: 1.18599e-05 [begin_end_overlap_inline]: 4.69969e-07 [split_matmul_comm_elemetwise]: 1.29e-06 [split_layernorm_comm]: 9.89996e-07 [handle_group_info]: 3.11004e-06 [symbol_engine_optimizer]: 8.695e-05, [1] [Cycle 1]: 8.195e-05, [6] [build]: 4.17e-06 [elim_shapecalc]: 1.24901e-05 [elim_not_effective]: 1.621e-05 [opt_reshape]: 8.98005e-06 [fold_const_symbol]: 1.285e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 9.69972e-07 [auto_monad_reorder]: 2.31799e-05 [get_jit_bprop_graph]: 3.39933e-07 [rewriter_after_jit_bprop_graph]: 3.10014e-07 [eliminate_special_op_node]: 0.00050289 [distribtued_split]: 3.38401e-05 [validate]: 3.077e-05 [task_emit]: 0.0706152 [execute]: 8.18004e-06 Sums bootstrap : 0.000311s : 0.40% type_inference : 0.002407s : 3.12% auto_monad : 0.000101s : 0.13% 175e-05 [special_op_eliminate]: 7.27002e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 8.79983e-07 [auto_monad_grad]: 1.17999e-06 [auto_monad_eliminator]: 1.8e-05 [cse]: 1.949e-05 [a_3]: 4.83701e-05 [py_interpret_to_execute_after_opt_a]: 9.70997e-06 [slice_cell_reuse_recomputed_activation]: 2.31003e-06 [rewriter_after_opt_a]: 0.00014218 [convert_after_rewriter]: 8.52998e-06 [order_py_execute_after_rewriter]: 6.28992e-06 [opt_b]: 0.00024698, [1] [Cycle 1]: 0.00024132, [7] [b_1]: 0.00016578 [b_2]: 9.94001e-06 [updatestate_depend_eliminate]: 5.44009e-06 [updatestate_assign_eliminate]: 4.35999e-06 [updatestate_loads_eliminate]: 5.1501e-06 [renormalize]: 3.00002e-07 [cse]: 1.87999e-05 [optimize_parallel_all_gather_comm]: 8.52998e-06 [overlap_param_gather]: 9.59961e-07 [cconv]: 2.31799e-05 [loop_unroll]: 0.00053104 [opt_after_cconv]: 0.00013492, [1] [Cycle 1]: 0.00012902, [7] [c_1]: 5.322e-05 [parameter_eliminate]: 2.35997e-06 [updatestate_depend_eliminate]: 8.10006e-06 [updatestate_assign_eliminate]: 4.70004e-06 [updatestate_loads_eliminate]: 5.98992e-06 [cse]: 2.193e-05 [renormalize]: 3.10014e-07 [remove_dup_value]: 1.154e-05 [tuple_transform]: 6.795e-05, [1] [Cycle 1]: 6.379e-05, [2] [d_1]: 5.46e-05 [renormalize]: 2.20025e-07 [partial_unused_args_eliminate]: 2.16998e-06 [add_cache_embedding]: 1.34199e-05 [add_recomputation]: 5.969e-05 [cse_after_recomputation]: 2.6e-05, [1] [Cycle 1]: 2.185e-05, [1] [cse]: 1.705e-05 [environ_conv]: 7.29004e-06 [swap_dp_allreduce_reducescatter]: 7.28993e-06 [bias_add_comm_swap]: 2.06998e-06 [label_micro_interleaved_index]: 1.91003e-06 [label_fine_grained_interleaved_index]: 2.05997e-06 [merge_cast_opt]: 9.80101e-07 [slice_recompute_activation]: 2.11992e-06 [micro_interleaved_order_control]: 1.77999e-06 [assign_add_opt]: 2.957e-05 [ForceFp32Comm]: 9.00007e-07 [remove_cast_before_assign_add]: 6.78992e-06 [full_micro_interleaved_order_control]: 1.79e-06 [reorder_send_recv_between_fp_bp]: 1.67999e-06 [comm_op_add_attrs]: 2.822e-05 [add_comm_op_reuse_tag]: 1.96998e-06 [interleave_split_concat_branches]: 6.099e-07 [interleave_parallel_branches]: 8.79983e-07 [overlap_opt_shard_in_pipeline]: 1.09e-06 [overlap_opt_shard_grad_in_pipeline]: 2.23995e-06 [control_data_broadcast_order]: 1.02003e-06 [grouped_pairwise_exchange_alltoall]: 9.51998e-06 [offloading_packed_experts]: 1.92993e-06 [overlap_recompute_and_grad_model_parallel]: 1.67999e-06 [overlap_grad_matmul_and_grad_allreduce]: 9.00007e-07 [overlap_recompute_allgather_and_fa_grad]: 8.59901e-05 [overlap_grad_ring_attention]: 1.91003e-06 [overlap_grad_flash_sp]: 1.339e-05 [begin_end_overlap_inline]: 6.89994e-07 [split_matmul_comm_elemetwise]: 2.13995e-06 [split_layernorm_comm]: 1.71002e-06 [handle_group_info]: 4.44998e-06 [symbol_engine_optimizer]: 8.943e-05, [1] [Cycle 1]: 8.46899e-05, [6] [build]: 4.59992e-06 [elim_shapecalc]: 1.324e-05 [elim_not_effective]: 1.703e-05 [opt_reshape]: 8.74e-06 [fold_const_symbol]: 1.359e-05 [renormalize]: 2.89991e-07 [pipeline_parallel_scheduler]: 1.46998e-06 [auto_monad_reorder]: 3.091e-05 [get_jit_bprop_graph]: 4.20026e-07 [rewriter_after_jit_bprop_graph]: 4.20026e-07 [eliminate_special_op_node]: 0.00050039 [distribtued_split]: 3.951e-05 [validate]: 3.607e-05 [task_emit]: 0.0705774 [execute]: 1.047e-05 Sums bootstrap : 0.000289s : 0.37% type_inference : 0.002474s : 3.19% auto_monad : 0.000129s : 0.17% graph_reusing graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000001s : 0.00% pre_auto_parallel : 0.000022s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000035s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000524s : 0.68% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000218s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000016s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000002s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000401s : 0.52% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.00% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000038s : 0.05% optimize.opt_a.cse : 0.000045s : 0.06% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000136s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depen : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000543s : 0.70% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000434s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000048s : 0.06% optimize.opt_a.cse : 0.000051s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000142s : 0.18% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000166s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : d_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000503s : 0.65% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000056s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000025s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000022s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000005s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000062s : 0.08% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% opt 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000531s : 0.68% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000012s : 0.01% optimize.tuple_transform.d_1 : 0.000055s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000030s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000028s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000086s : 0.11% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000004s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000023s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000503s : 0.65% distribtued_split : 0.000034s : 0.04% validate : 0.000031s : 0.04% task_emit : 0.070615s : 91.39% execute : 0.000008s : 0.01% ine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000031s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000500s : 0.65% distribtued_split : 0.000040s : 0.05% validate : 0.000036s : 0.05% task_emit : 0.070577s : 90.99% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000128 63 4.97% : 0.000006s : 2: substitution.depend_value_elim 1.94% : 0.000002s : 5: substitution.elim_not_effective 1.75% : 0.000002s : 5: substitution.fold_const_symbol 5.51% : 0.000007s : 6: substitution.graph_param_transform 51.49% : 0.000066s : 1: substitution.inline 3.95% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.27% : 0.000004s : 6: substitution.load_eliminater 2.55% : 0.000003s : 2: substitution.reduce_all_const_elim 5.55% : 0.000007s : 10: substitution.remove_not_recompute_node 2.51% : 0.000003s : 2: substitution.replace_old_param 8.52% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.99% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002509 2 89.10% : 0.002235s : 1: type_inference.infer 10.90% : 0.000273s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000230 1420 0.75% : 0.000002s : 13: predicate.accumulaten_eliminater 1.14% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.36% : 0.000005s : 25: predicate.arithmetic_simplify 0.79% : 0.000002s : 13: predicate.cast_eliminate 0.75% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.43% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.44% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.78% : 0.000002s : 12: predicate.depend_value_elim 0.87% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.87% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.04% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_depend_swap 2.01% : 0.000005s : 31: predicate.environ_get_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.44% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.20% : 0.000000s : 6: predicate.fold_const_symbol 0.83% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.67% : 0.000002s : 12: predicate.incorporate_call_switch 5.57% : 0.000013s : 63: predicate.inline 1.01% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.63% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.55% : 0.000004s : 6: predicate.loop_unroll_after_grad 1.25% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.82% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.74% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.76% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.71% : 0.000002s : 13: predicate.minmaximum_grad 0.81% : 0.000002s : 6: predicate.mutable_eliminate 0.44% : 0.000001s : 6: predicate.opt_reshape 0.43% : 0.000001s : 6: predicate.parallel_virtual_node 1.13% : 0.000003s : 14: predicate.partial_defer_inline 1.29% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.09% : 0.000003s : 13: predicate.reduce_eliminate 0.67% : 0.000002s : 12: predicate.remove_not_recompute_node 1.16% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 0.92% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 0.96% : 0.000002s : 12: predicate.specialize_transform 1.00% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 1.03% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.90% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.53% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.85% : 0.000002s : 13: predicate.transpose_eliminate 1.66% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.79% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.43% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.81% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.61% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.39% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.39% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.50% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.66% : 0.000002s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000181 4 26.81% : 0.000049s : 1: func_graph_cloner_run.FuncGraphClonerGraph 73.19% : 0.000133s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090425 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000068s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000143s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000334s : 1: bootstrap 0.07% : 0.000061s : 1: cconv 0.03% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.57% : 0.000520s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000018s : 1: execute 0.01% : 0.000006s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000495s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001109s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000049s : 4: opt.transform.symbol_engine_opt 5.91% : 0.005341s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000246s : 1: opt_b 7.97% : 0.007208s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000077s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000232s : 1: renormalize.infer 0.21% : 0.000188s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000148s : 1: rewriter_after_opt_a 0.04% : 0.000040s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000093s : 1: symbol_engine_optimizer 77.81% : 0.070356s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.83% : 0.002559s : 1: type_inference 0.08% : 0.000068s : 1: validate Time group info: ------[substitution.] 0.000109 63 4.58% : 0.000005s : 2: substitution.depend_value_elim 2.03% : 0.000002s : 5: substitution.elim_not_effective 1.90% : 0.000002s : 5: substitution.fold_const_symbol 5.74% : 0.000006s : 6: substitution.graph_param_transform 47.85% : 0.000052s : 1: substitution.inline 4.77% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.28% : 0.000004s : 6: substitution.load_eliminater 2.22% : 0.000002s : 2: substitution.reduce_all_const_elim 7.09% : 0.000008s : 10: substitution.remove_not_recompute_node 2.72% : 0.000003s : 2: substitution.replace_old_param 9.51% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.32% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002382 2 90.61% : 0.002158s : 1: type_inference.infer 9.39% : 0.000224s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000051 1 100.00% : 0.000051s : 1: match.inline ------[predicate.] 0.000230 1420 0.74% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.88% : 0.000002s : 13: predicate.addn_zero_filter 0.77% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.20% : 0.000005s : 25: predicate.arithmetic_simplify 0.86% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.77% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.45% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.37% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.84% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.92% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.25% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.25% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_depend_swap 2.00% : 0.000005s : 31: predicate.environ_get_eliminate 1.08% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.20% : 0.000003s : 14: predicate.float_depend_g_call 0.75% : 0.000002s : 12: predicate.float_environ_get_switch 1.18% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.92% : 0.000002s : 12: predicate.get_grad_eliminate 0.27% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.50% : 0.000013s : 63: predicate.inline 1.03% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.63% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.47% : 0.000006s : 38: predicate.load_eliminater 1.50% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.91% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.87% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.78% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.70% : 0.000002s : 13: predicate.minmaximum_grad 0.71% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.54% : 0.000001s : 6: predicate.parallel_virtual_node 1.10% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.81% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.12% : 0.000003s : 13: predicate.reduce_eliminate 0.52% : 0.000001s : 12: predicate.remove_not_recompute_node 1.08% : 0.000002s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.56% : 0.000001s : 6: predicate.row_tensor_eliminate 0.98% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.49% : 0.000003s : 18: predicate.special_op_eliminate 0.92% : 0.000002s : 12: predicate.specialize_transform 1.08% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.98% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.62% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.40% : 0.000010s : 43: predicate.switch_simplify 0.80% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.73% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.66% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.68% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.61% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.69% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.32% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.41% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.83% : 0.000002s : 12: predicate.virtual_output_eliminate 0.55% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000127 4 8.00% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.00% : 0.000116s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090340 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000060s : 1: add_recomputation 0.03% : 0.000029s : 1: assign_add_opt 0.13% : 0.000113s : 1: auto_monad 0.03% : 0.000029s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.37% : 0.000334s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.03% : 0.000027s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.00004 Time group info: ------[substitution.] 0.000129 63 4.86% : 0.000006s : 2: substitution.depend_value_elim 2.09% : 0.000003s : 5: substitution.elim_not_effective 1.79% : 0.000002s : 5: substitution.fold_const_symbol 5.39% : 0.000007s : 6: substitution.graph_param_transform 50.74% : 0.000065s : 1: substitution.inline 4.30% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.36% : 0.000004s : 6: substitution.load_eliminater 2.61% : 0.000003s : 2: substitution.reduce_all_const_elim 6.01% : 0.000008s : 10: substitution.remove_not_recompute_node 2.48% : 0.000003s : 2: substitution.replace_old_param 8.43% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.94% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002444 2 89.06% : 0.002177s : 1: type_inference.infer 10.94% : 0.000267s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000229 1420 0.82% : 0.000002s : 13: predicate.accumulaten_eliminater 1.18% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.80% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.08% : 0.000005s : 25: predicate.arithmetic_simplify 0.85% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.42% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.51% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.79% : 0.000002s : 12: predicate.depend_value_elim 0.81% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.95% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.81% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_depend_swap 1.97% : 0.000004s : 31: predicate.environ_get_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.22% : 0.000003s : 14: predicate.float_depend_g_call 0.80% : 0.000002s : 12: predicate.float_environ_get_switch 1.08% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.85% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.71% : 0.000013s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.17% : 0.000003s : 12: predicate.less_batch_normalization 1.75% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.35% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.80% : 0.000002s : 12: predicat1s : 1: distribtued_split 0.57% : 0.000516s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000008s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000512s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.19% : 0.001073s : 80: opt.transform.opt_a 0.05% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.97% : 0.005396s : 1: opt_a 0.15% : 0.000132s : 1: opt_after_cconv 0.27% : 0.000243s : 1: opt_b 7.92% : 0.007156s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000008s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000068s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000216s : 1: renormalize.infer 0.20% : 0.000180s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000143s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 78.20% : 0.070642s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.68% : 0.002424s : 1: type_inference 0.07% : 0.000063s : 1: validate e.mini_step_allgather_replace 0.73% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.19% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.75% : 0.000002s : 13: predicate.print_const_string_wrapper 0.78% : 0.000002s : 12: predicate.reduce_all_const_elim 1.03% : 0.000002s : 13: predicate.reduce_eliminate 0.56% : 0.000001s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.27% : 0.000001s : 6: predicate.reset_defer_inline 0.84% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.46% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.91% : 0.000002s : 12: predicate.shard_identity_eliminate 1.35% : 0.000003s : 18: predicate.special_op_eliminate 0.94% : 0.000002s : 12: predicate.specialize_transform 1.01% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.97% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.27% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.87% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.58% : 0.000010s : 43: predicate.switch_simplify 0.73% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.72% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.67% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.68% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.67% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.62% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.71% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.46% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.65% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.60% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000148 4 10.82% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.18% : 0.000132s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090549 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000064s : 1: add_recomputation 0.04% : 0.000034s : 1: assign_add_opt 0.16% : 0.000142s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.34% : 0.000311s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.04% : 0.000033s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000047s : 1: distribtued_split 0.57% : 0.000514s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.60% : 0.000542s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001101s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000156s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.85% : 0.005296s : 1: opt_a 0.15% : 0.000139s : 1: opt_after_cconv 0.28% : 0.000250s : 1: opt_b 7.95% : 0.007194s : 1: optimize 0.01% : 0.000013s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.10% : 0.000092s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000031s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000016s : 1: remove_dup_value 0.26% : 0.000235s : 1: renormalize.infer 0.21% : 0.000193s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000148s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000092s : 1: symbol_engine_optimizer 77.97% : 0.070603s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.75% : 0.002492s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.0826207, [21] [bootstrap]: 0.00033358 [type_inference]: 0.00270548 [auto_monad]: 0.00013425 [graph_reusing]: 2.33995e-06 [inline]: 1.34995e-06 [parallel-infer-symbol]: 2.64007e-06 [pre_auto_parallel]: 2.615e-05 [insert-virtual-dataset]: 2.23995e-06 [parallel-infer-symbol-second]: 4.59957e-07 [dataset_repeat_opt]: 1.45996e-06 [pipeline_split]: 1.67999e-06 [optimize]: 0.00753175, [52] [py_interpret_to_execute]: 1.612e-05 [rewriter_before_opt_a]: 3.666e-05 [opt_a]: 0.00565732, [2] [Cycle 1]: 0.00155219, [43] [expand_dump_flag]: 4.19002e-06 [switch_simplify]: 3.08399e-05 [loop_unroll]: 1.309e-05 [a_1]: 0.00034466 [recompute_prepare]: 8.54e-06 [updatestate_depend_eliminate]: 8.94e-06 [updatestate_assign_eliminate]: 6.06e-06 [updatestate_loads_eliminate]: 7.77002e-06 [parameter_eliminate]: 3.12994e-06 [a_2]: 0.00011803 [accelerated_algorithm]: 8.67993e-06 [shard]: 2.3999e-06 [meta_shard_fg_expand]: 4.04008e-06 [shard_inline]: 8.41997e-06 [auto_parallel]: 1.154e-05 [parallel]: 8.26002e-06 [flash_sp]: 1.124e-05 [merge_comm]: 8.06001e-06 [allreduce_fusion]: 5.60994e-06 [matmul_add_comm_reduction]: 1.10001e-05 [allreduce_slice_to_reducescatter]: 4.69969e-07 [virtual_shard_identity]: 9.75002e-06 [virtual_dataset]: 7.76001e-06 [get_grad_eliminate_]: 7.47002e-06 [virtual_output]: 7.95e-06 [merge_forward]: 6.14999e-06 [cell_reuse_recompute_pass]: 1.95997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.71501e-05 [before_grad]: 1.382e-05 [inplace_validation]: 4.87e-06 [meta_fg_expand]: 5.76e-06 [inplace_validation_after_expand]: 6.91996e-06 [flash_sp_send_recv_attached]: 4.9501e-06 [receive_attached]: 2.92005e-06 [after_resolve]: 1.13801e-05 [a_after_grad]: 1.275e-05 [special_op_eliminate]: 7.99005e-06 [renormalize]: 0.00044626 [add_forward_monad_depend]: 3.52995e-06 [auto_monad_grad]: 1.90001e-06 [auto_monad_eliminator]: 3.12401e-05 [cse]: 3.33e-05 [a_3]: 5.831e-05 [Cycle 2]: 0.00081857, [43] [expand_dump_flag]: 1.04005e-06 [switch_simplify]: 1.08001e-05 [loop_unroll]: 7.80006e-06 [a_1]: 0.00020142 [recompute_prepare]: 7.28003e-06 [updatestate_depend_eliminate]: 6.23998e-06 [updatestate_assign_eliminate]: 4.59002e-06 [updatestate_loads_eliminate]: 5.61005e-06 [parameter_eliminate]: 1.44006e-06 [a_2]: 0.00010566 [accelerated_algorithm]: 8.35001e-06 [shard]: 1.21992e-06 [meta_shard_fg_expand]: 2.45997e-06 [shard_inline]: 8.31997e-06 [auto_parallel]: 1.087e-05 [parallel]: 3.75998e-06 [flash_sp]: 4.00993e-06 [merge_comm]: 6.37001e-06 [allreduce_fusion]: 4.68001e-06 [matmul_add_comm_reduction]: 7.86991e-06 [allreduce_slice_to_reducescatter]: 2.80095e-07 [virtual_shard_identity]: 9.04e-06 [virtual_dataset]: 7.77002e-06 [get_grad_eliminate_]: 7.10995e-06 [virtual_output]: 7.40995e-06 [merge_forward]: 4.60004e-06 [cell_reuse_recompute_pass]: 1.80991e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.522e-05 [before_grad]: 1.25e-05 [inplace_validation]: 4.30003e-06 [meta_fg_expand]: 4.64008e-06 [inplace_validation_after_expand]: 5.14998e-06 [flash_sp_send_recv_attached]: 9.30042e-07 [receive_attached]: 7.69971e-07 [after_resolve]: 9.95002e-06 [a_after_grad]: 1.18801e-05 [special_op_eliminate]: 7.57002e-06 [renormalize]: 7.99773e-08 [add_forward_monad_depend]: 1.01002e-06 [auto_monad_grad]: 1.22003e-06 [auto_monad_eliminator]: 1.90401e-05 [cse]: 2.104e-05 [a_3]: 4.938e-05 [py_interpret_to_execute_after_opt_a]: 9.26002e-06 [slice_cell_reuse_recomputed_activation]: 2.40002e-06 [rewriter_after_opt_a]: 0.00015174 [convert_after_rewriter]: 9.31008e-06 [order_py_execute_after_rewriter]: 6.11006e-06 [opt_b]: 0.00024617, [1] [Cycle 1]: 0.00024074, [7] [b_1]: 0.00016304 [b_2]: 1.02201e-05 [updatestate_depend_eliminate]: 5.57001e-06 [updatestate_assign_eliminate]: 4.50003e-06 [updatestate_loads_eliminate]: 5.39992e-06 [renormalize]: 2.79979e-07 [cse]: 1.95199e-05 [optimize_parallel_all_gather_comm]: 8.03999e-06 [overlap_param_gather]: 1.47999e-06 [cconv]: 2.30001e-05 [loop_unroll]: 0.00050032 [opt_after_cconv]: 0.00013571, [1] [Cycle 1]: 0.0001296, [7] [c_1]: 5.248e-05 [parameter_eliminate]: 2.62004e-06 [updatestate_depend_eliminate]: 8.38994e-06 [updatestate_assign_eliminate]: 4.91005e-06 [updatestate_loads_eliminate]: 5.72007e-06 [cse]: 2.271e-05 [renormalize]: 3.19909e-07 [remove_dup_value]: 1.34701e-05 [tuple_transform]: 6.82799e-05, [1] [Cycle 1]: 6.387e-05, [2] [d_1]: 5.416e-05 [renormalize]: 1.40048e-07 [partial_unused_args_eliminate]: 2.29001e-06 [add_cache_embedding]: 1.312e-05 [add_recomputation]: 6.19299e-05 [cse_after_recomputation]: 2.735e-05, [1] [Cycle 1]: 2.27899e-05, [1] [cse]: 1.756e-05 [environ_conv]: 7.91007e-06 [swap_dp_allreduce_reducescatter]: 7.40995e-06 [bias_add_comm_swap]: 2.24006e-06 [label_micro_interleaved_index]: 1.87999e-06 [label_fine_grained_interleaved_index]: 1.77009e-06 [merge_cast_opt]: 1.04005e-06 [slice_recompute_activation]: 2.0999e-06 [micro_interleaved_order_control]: 1.73005e-06 [assign_add_opt]: 2.845e-05 [ForceFp32Comm]: 8.69972e-07 [remove_cast_before_assign_add]: 7.40995e-06 [full_micro_interleaved_order_control]: 2.14006e-06 [reorder_send_recv_between_fp_bp]: 2.01003e-06 [comm_op_add_attrs]: 2.71699e-05 [add_comm_op_reuse_tag]: 1.72004e-06 [interleave_split_concat_branches]: 9.39937e-07 [interleave_parallel_branches]: 8.79983e-07 [overlap_opt_shard_in_pipeline]: 1.39e-06 [overlap_opt_shard_grad_in_pipeline]: 2.36009e-06 [control_data_broadcast_order]: 1.07998e-06 [grouped_pairwise_exchange_alltoall]: 1.001e-05 [offloading_packed_experts]: 2.02993e-06 [overlap_recompute_and_grad_model_parallel]: 1.95997e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.10018e-07 [overlap_recompute_allgather_and_fa_grad]: 7.30699e-05 [overlap_grad_ring_attention]: 2.83995e-06 [overlap_grad_flash_sp]: 1.58401e-05 [begin_end_overlap_inline]: 7.89994e-07 [split_matmul_comm_elemetwise]: 2.11992e-06 [split_layernorm_comm]: 1.83005e-06 [handle_group_info]: 5.69993e-06 [symbol_engine_optimizer]: 9.119e-05, [1] [Cycle 1]: 8.622e-05, [6] [build]: 5.03997e-06 [elim_shapecalc]: 1.347e-05 [elim_not_effective]: 1.683e-05 [opt_reshape]: 9.05001e-06 [fold_const_symbol]: 1.417e-05 [renormalize]: 3.39933e-07 [pipeline_parallel_scheduler]: 2.03005e-06 [auto_monad_reorder]: 3.212e-05 [get_jit_bprop_graph]: 5.10016e-07 [rewriter_after_jit_bprop_graph]: 4.39934e-07 [eliminate_special_op_node]: 0.0005247 [distribtued_split]: 4.38601e-05 [validate]: 3.583e-05 [task_emit]: 0.0709807 [execute]: 1.195e-05 Sums bootstrap : 0.000334s : 0.43% type_inference : 0.002705s : 3.46% auto_monad : 0.000134s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000042s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000546s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000004s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000016s : 0.02% optimize.opt_a.renormalize : 0.000446s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000108s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000152s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000163s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000500s : 0.64% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000062s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000073s : 0.09% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000016s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000006s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000525s : 0.67% distribtued_split : 0.000044s : 0.06% validate : 0.000036s : 0.05% task_emit : 0.070981s : 90.66% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000135 63 4.84% : 0.000007s : 2: substitution.depend_value_elim 1.92% : 0.000003s : 5: substitution.elim_not_effective 2.06% : 0.000003s : 5: substitution.fold_const_symbol 5.17% : 0.000007s : 6: substitution.graph_param_transform 52.58% : 0.000071s : 1: substitution.inline 4.03% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.22% : 0.000004s : 6: substitution.load_eliminater 2.51% : 0.000003s : 2: substitution.reduce_all_const_elim 5.60% : 0.000008s : 10: substitution.remove_not_recompute_node 2.59% : 0.000003s : 2: substitution.replace_old_param 8.15% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.33% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002676 2 89.53% : 0.002395s : 1: type_inference.infer 10.47% : 0.000280s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000070 1 100.00% : 0.000070s : 1: match.inline ------[predicate.] 0.000231 1420 0.73% : 0.000002s : 13: predicate.accumulaten_eliminater 1.23% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.69% : 0.000002s : 12: predicate.addn_check_dump 0.74% : 0.000002s : 13: predicate.addn_zero_filter 0.73% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.38% : 0.000005s : 25: predicate.arithmetic_simplify 0.98% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.70% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.45% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.76% : 0.000002s : 12: predicate.depend_value_elim 0.79% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.89% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.63% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.17% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.12% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_depend_swap 1.97% : 0.000005s : 31: predicate.environ_get_eliminate 1.20% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.25% : 0.000003s : 14: predicate.float_depend_g_call 0.74% : 0.000002s : 12: predicate.float_environ_get_switch 1.12% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.79% : 0.000002s : 12: predicate.get_grad_eliminate 0.36% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.66% : 0.000002s : 12: predicate.incorporate_call_switch 5.76% : 0.000013s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.38% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.13% : 0.000003s : 12: predicate.less_batch_normalization 1.67% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.44% : 0.000006s : 38: predicate.load_eliminater 1.29% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.22% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.86% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.80% : 0.000002s : 12: predicate.merge_addn 0.82% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.86% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.74% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.50% : 0.000001s : 6: predicate.parallel_virtual_node 1.09% : 0.000003s : 14: predicate.partial_defer_inline 1.27% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.83% : 0.000002s : 12: predicate.reduce_all_const_elim 1.12% : 0.000003s : 13: predicate.reduce_eliminate 0.67% : 0.000002s : 12: predicate.remove_not_recompute_node 1.08% : 0.000003s : 25: predicate.replace_applicator 0.48% : 0.000001s : 12: predicate.replace_old_param 0.25% : 0.000001s : 6: predicate.reset_defer_inline 0.77% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.44% : 0.000001s : 6: predicate.row_tensor_eliminate 0.95% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.04% : 0.000002s : 12: predicate.shard_identity_eliminate 1.49% : 0.000003s : 18: predicate.special_op_eliminate 0.93% : 0.000002s : 12: predicate.specialize_transform 1.09% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.24% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.86% : 0.000002s : 14: predicate.switch_defer_inline 1.59% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.43% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.78% : 0.000002s : 13: predicate.transpose_eliminate 1.66% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.77% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.44% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.97% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.56% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.32% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.36% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.86% : 0.000002s : 12: predicate.virtual_output_eliminate 0.64% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000153 4 10.50% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.50% : 0.000137s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091990 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000067s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.39% : 0.000359s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000032s : 1: comm_op_add_attrs 0.01% : 0.000005s : 1: control_data_broadcast_order 0.02% : 0.000014s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000051s : 1: distribtued_split 0.59% : 0.000539s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000006s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000009s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000510s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.20% : 0.001107s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000154s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000033s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.15% : 0.005661s : 1: opt_a 0.15% : 0.000140s : 1: opt_after_cconv 0.27% : 0.000249s : 1: opt_b 8.20% : 0.007540s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000020s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000007s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.09% : 0.000079s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000011s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000247s : 1: renormalize.infer 0.21% : 0.000194s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000158s : 1: rewriter_after_opt_a 0.04% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 77.20% : 0.071014s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.96% : 0.002724s : 1: type_inference 0.08% : 0.000070s : 1: validate [WARNING] PARALLEL(169251,ffff805f5c10,python3.7):2025-02-07-15:54:30.241.820 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169260,ffff90a16c10,python3.7):2025-02-07-15:54:30.242.221 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:30.242.303 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169242,ffff96d80c10,python3.7):2025-02-07-15:54:30.242.363 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169273,ffff95a23c10,python3.7):2025-02-07-15:54:30.242.438 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169323,ffff82a65c10,python3.7):2025-02-07-15:54:30.242.699 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:30.242.699 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. [WARNING] PARALLEL(169285,ffff97501c10,python3.7):2025-02-07-15:54:30.243.051 [mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.cc:194] OverlapRecomputeAllGatherAndFlashAttentionGrad] Currently, duplicated allgather overlap with flashattention grad only support in lazy_line mode. TotalTime = 0.077455, [21] [bootstrap]: 0.00029138 [type_inference]: 0.00226065 [auto_monad]: 0.00010192 [graph_reusing]: 1.72993e-06 [inline]: 9.50065e-07 [parallel-infer-symbol]: 1.55997e-06 [pre_auto_parallel]: 2.12999e-05 [insert-virtual-dataset]: 2.33995e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 8.49948e-07 [pipeline_split]: 1.04995e-06 [optimize]: 0.00686961, [52] [py_interpret_to_execute]: 1.21299e-05 [rewriter_before_opt_a]: 3.04901e-05 [opt_a]: 0.00511391, [2] [Cycle 1]: 0.00143038, [43] [expand_dump_flag]: 2.35997e-06 [switch_simplify]: 2.547e-05 [loop_unroll]: 1.30801e-05 [a_1]: 0.00032396 [recompute_prepare]: 8.58004e-06 [updatestate_depend_eliminate]: 7.97003e-06 [updatestate_assign_eliminate]: 5.63008e-06 [updatestate_loads_eliminate]: 6.24999e-06 [parameter_eliminate]: 2.35997e-06 [a_2]: 0.00011215 [accelerated_algorithm]: 8.52998e-06 [shard]: 1.65997e-06 [meta_shard_fg_expand]: 3.30003e-06 [shard_inline]: 8.27003e-06 [auto_parallel]: 1.139e-05 [parallel]: 5.51005e-06 [flash_sp]: 7.86001e-06 [merge_comm]: 7.01007e-06 [allreduce_fusion]: 5.05999e-06 [matmul_add_comm_reduction]: 9.6499e-06 [allreduce_slice_to_reducescatter]: 3.7998e-07 [virtual_shard_identity]: 9.3499e-06 [virtual_dataset]: 7.82008e-06 [get_grad_eliminate_]: 7.49005e-06 [virtual_output]: 7.23999e-06 [merge_forward]: 4.85999e-06 [cell_reuse_recompute_pass]: 1.55997e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.669e-05 [before_grad]: 1.391e-05 [inplace_validation]: 4.78001e-06 [meta_fg_expand]: 5.38002e-06 [inplace_validation_after_expand]: 5.64998e-06 [flash_sp_send_recv_attached]: 3.65998e-06 [receive_attached]: 2.12004e-06 [after_resolve]: 1.066e-05 [a_after_grad]: 1.222e-05 [special_op_eliminate]: 7.55e-06 [renormalize]: 0.00040138 [add_forward_monad_depend]: 2.23005e-06 [auto_monad_grad]: 1.44995e-06 [auto_monad_eliminator]: 2.50201e-05 [cse]: 2.495e-05 [a_3]: 5.67e-05 [Cycle 2]: 0.00076999, [43] [expand_dump_flag]: 1.0099e-06 [switch_simplify]: 8.89995e-06 [loop_unroll]: 8.01007e-06 [a_1]: 0.00019968 [recompute_prepare]: 7.36001e-06 [updatestate_depend_eliminate]: 5.39992e-06 [updatestate_assign_eliminate]: 5.01005e-06 [updatestate_loads_eliminate]: 5.21005e-06 [parameter_eliminate]: 9.69972e-07 [a_2]: 0.0001045 [accelerated_algorithm]: 8.28994e-06 [shard]: 1.16997e-06 [meta_shard_fg_expand]: 2.40002e-06 [shard_inline]: 7.72998e-06 [auto_parallel]: 1.01e-05 [parallel]: 3.0701e-06 [flash_sp]: 2.64996e-06 [merge_comm]: 5.72996e-06 [allreduce_fusion]: 5.13997e-06 [matmul_add_comm_reduction]: 6.94999e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 8.45001e-06 [virtual_dataset]: 7.55e-06 [get_grad_eliminate_]: 7.40006e-06 [virtual_output]: 6.97991e-06 [merge_forward]: 4.43996e-06 [cell_reuse_recompute_pass]: 1.70001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.454e-05 [before_grad]: 1.229e-05 [inplace_validation]: 4.12995e-06 [meta_fg_expand]: 4.64998e-06 [inplace_validation_after_expand]: 4.94008e-06 [flash_sp_send_recv_attached]: 7.79983e-07 [receive_attached]: 6.10016e-07 [after_resolve]: 9.32999e-06 [a_after_grad]: 1.17e-05 [special_op_eliminate]: 7.29994e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 1.01002e-06 [auto_monad_grad]: 1.02993e-06 [auto_monad_eliminator]: 1.62099e-05 [cse]: 1.75e-05 [a_3]: 4.85199e-05 [py_interpret_to_execute_after_opt_a]: 8.43999e-06 [slice_cell_reuse_recomputed_activation]: 1.40001e-06 [rewriter_after_opt_a]: 0.00012819 [convert_after_rewriter]: 8.00006e-06 [order_py_execute_after_rewriter]: 5.90004e-06 [opt_b]: 0.00026852, [1] [Cycle 1]: 0.00026348, [7] [b_1]: 0.00018661 [b_2]: 9.75002e-06 [updatestate_depend_eliminate]: 5.6799e-06 [updatestate_assign_eliminate]: 4.58991e-06 [updatestate_loads_eliminate]: 5.33997e-06 [renormalize]: 2.89991e-07 [cse]: 1.777e-05 [optimize_parallel_all_gather_comm]: 8.13999e-06 [overlap_param_gather]: 8.39937e-07 [cconv]: 1.687e-05 [loop_unroll]: 0.00047194 [opt_after_cconv]: 0.00012663, [1] [Cycle 1]: 0.00012107, [7] [c_1]: 5.06199e-05 [parameter_eliminate]: 1.84996e-06 [updatestate_depend_eliminate]: 6.80005e-06 [updatestate_assign_eliminate]: 4.45999e-06 [updatestate_loads_eliminate]: 5.18002e-06 [cse]: 1.935e-05 [renormalize]: 3.59956e-07 [remove_dup_value]: 9.89996e-06 [tuple_transform]: 7.18499e-05, [1] [Cycle 1]: 6.764e-05, [2] [d_1]: 5.744e-05 [renormalize]: 1.60071e-07 [partial_unused_args_eliminate]: 1.77999e-06 [add_cache_embedding]: 1.12799e-05 [add_recomputation]: 5.42901e-05 [cse_after_recomputation]: 2.865e-05, [1] [Cycle 1]: 2.43599e-05, [1] [cse]: 1.86501e-05 [environ_conv]: 6.53998e-06 [swap_dp_allreduce_reducescatter]: 7.19004e-06 [bias_add_comm_swap]: 1.56998e-06 [label_micro_interleaved_index]: 1.39e-06 [label_fine_grained_interleaved_index]: 1.27999e-06 [merge_cast_opt]: 7.3004e-07 [slice_recompute_activation]: 1.13004e-06 [micro_interleaved_order_control]: 1.51002e-06 [assign_add_opt]: 2.448e-05 [ForceFp32Comm]: 6.3004e-07 [remove_cast_before_assign_add]: 6.01006e-06 [full_micro_interleaved_order_control]: 1.23994e-06 [reorder_send_recv_between_fp_bp]: 1.20001e-06 [comm_op_add_attrs]: 2.261e-05 [add_comm_op_reuse_tag]: 1.46008e-06 [interleave_split_concat_branches]: 5.50062e-07 [interleave_parallel_branches]: 8.00006e-07 [overlap_opt_shard_in_pipeline]: 6.50063e-07 [overlap_opt_shard_grad_in_pipeline]: 1.40001e-06 [control_data_broadcast_order]: 6.79982e-07 [grouped_pairwise_exchange_alltoall]: 6.30994e-06 [offloading_packed_experts]: 1.32003e-06 [overlap_recompute_and_grad_model_parallel]: 1.19e-06 [overlap_grad_matmul_and_grad_allreduce]: 5.70086e-07 [overlap_recompute_allgather_and_fa_grad]: 6.52899e-05 [overlap_grad_ring_attention]: 1.43005e-06 [overlap_grad_flash_sp]: 1.216e-05 [begin_end_overlap_inline]: 5.30039e-07 [split_matmul_comm_elemetwise]: 1.32003e-06 [split_layernorm_comm]: 1.09e-06 [handle_group_info]: 2.94996e-06 [symbol_engine_optimizer]: 8.811e-05, [1] [Cycle 1]: 8.371e-05, [6] [build]: 4.34008e-06 [elim_shapecalc]: 1.258e-05 [elim_not_effective]: 1.591e-05 [opt_reshape]: 8.70996e-06 [fold_const_symbol]: 1.387e-05 [renormalize]: 2.10013e-07 [pipeline_parallel_scheduler]: 1.24995e-06 [auto_monad_reorder]: 2.471e-05 [get_jit_bprop_graph]: 3.20026e-07 [rewriter_after_jit_bprop_graph]: 3.40049e-07 [eliminate_special_op_node]: 0.00048717 [distribtued_split]: 3.44501e-05 [validate]: 3.069e-05 [task_emit]: 0.0670503 [execute]: 9.25001e-06 Sums bootstrap : 0.000291s : 0.40% type_inference : 0.002261s : 3.07% auto_monad : 0.000102s : 0.14% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000021s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000524s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.02% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000217s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000009s : 0.01% optimize.opt_a.flash_sp : 0.000011s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000017s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.04% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000020s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000401s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000041s : 0.06% optimize.opt_a.cse : 0.000042s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000008s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000128s : 0.17% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000187s : 0.25% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000006s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000017s : 0.02% optimize.loop_unroll : 0.000472s : 0.64% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000019s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000057s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.02% optimize.add_recomputation : 0.000054s : 0.07% optimize.cse_after_recomputation.cse : 0.000019s : 0.03% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000024s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000023s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000006s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000065s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000012s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000004s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000025s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000487s : 0.66% distribtued_split : 0.000034s : 0.05% validate : 0.000031s : 0.04% task_emit : 0.067050s : 91.20% execute : 0.000009s : 0.01% Time group info: ------[substitution.] 0.000112 63 4.34% : 0.000005s : 2: substitution.depend_value_elim 1.98% : 0.000002s : 5: substitution.elim_not_effective 1.83% : 0.000002s : 5: substitution.fold_const_symbol 5.83% : 0.000007s : 6: substitution.graph_param_transform 50.05% : 0.000056s : 1: substitution.inline 4.53% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.42% : 0.000004s : 6: substitution.load_eliminater 2.42% : 0.000003s : 2: substitution.reduce_all_const_elim 6.20% : 0.000007s : 10: substitution.remove_not_recompute_node 2.37% : 0.000003s : 2: substitution.replace_old_param 9.34% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 7.68% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002235 2 90.12% : 0.002014s : 1: type_inference.infer 9.88% : 0.000221s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000055 1 100.00% : 0.000055s : 1: match.inline ------[predicate.] 0.000226 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.74% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.82% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.09% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.78% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.49% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.27% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.96% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.82% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.19% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_depend_swap 2.02% : 0.000005s : 31: predicate.environ_get_eliminate 1.17% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.73% : 0.000002s : 12: predicate.float_environ_get_switch 1.09% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.61% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.07% : 0.000002s : 12: predicate.less_batch_normalization 1.70% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.52% : 0.000006s : 38: predicate.load_eliminater 1.28% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.20% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.80% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.74% : 0.000002s : 12: predicate.merge_addn 0.79% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.86% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.81% : 0.000002s : 13: predicate.minmaximum_grad 0.74% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.51% : 0.000001s : 6: predicate.parallel_virtual_node 1.11% : 0.000003s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.05% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 1.17% : 0.000003s : 25: predicate.replace_applicator 0.43% : 0.000001s : 12: predicate.replace_old_param 0.26% : 0.000001s : 6: predicate.reset_defer_inline 0.87% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.32% : 0.000003s : 18: predicate.special_op_eliminate 0.97% : 0.000002s : 12: predicate.specialize_transform 0.97% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.32% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.70% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.31% : 0.000010s : 43: predicate.switch_simplify 0.79% : 0.000002s : 13: predicate.tile_eliminate 0.77% : 0.000002s : 13: predicate.transpose_eliminate 1.71% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.88% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.73% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.42% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.40% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.58% : 0.000001s : 6: predicate.value_based_eliminate 0.81% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000124 4 9.43% : 0.000012s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.57% : 0.000112s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.086091 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000059s : 1: add_recomputation 0.03% : 0.000029s : 1: assign_add_opt 0.13% : 0.000113s : 1: auto_monad 0.04% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.41% : 0.000349s : 1: bootstrap 0.02% : 0.000021s : 1: cconv 0.03% : 0.000026s : 1: comm_op_add_attrs 0.00% : 0.000003s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.04% : 0.000032s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000042s : 1: distribtued_split 0.58% : 0.000500s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000017s : 1: execute 0.01% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000006s : 1: handle_group_info 0.01% : 0.000005s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.56% : 0.000481s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.01% : 0.000004s : 1: micro_interleaved_order_control 0.00% : 0.000004s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.24% : 0.001064s : 80: opt.transform.opt_a 0.06% : 0.000049s : 1: opt.transform.opt_after_cconv 0.20% : 0.000176s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.94% : 0.005117s : 1: opt_a 0.15% : 0.000130s : 1: opt_after_cconv 0.32% : 0.000272s : 1: opt_b 7.99% : 0.006878s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000003s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000070s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000027s : 1: pre_auto_parallel 0.02% : 0.000016s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.25% : 0.000217s : 1: renormalize.infer 0.21% : 0.000180s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000134s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.11% : 0.000091s : 1: symbol_engine_optimizer 77.91% : 0.067076s : 1: task_emit 0.09% : 0.000075s : 1: tuple_transform 2.65% : 0.002277s : 1: type_inference 0.07% : 0.000064s : 1: validate TotalTime = 0.078351, [21] [bootstrap]: 0.00034924 [type_inference]: 0.00246559 [auto_monad]: 0.00012521 [graph_reusing]: 2.19001e-06 [inline]: 1.40001e-06 [parallel-infer-symbol]: 2.11992e-06 [pre_auto_parallel]: 2.454e-05 [insert-virtual-dataset]: 2.40002e-06 [parallel-infer-symbol-second]: 3.40049e-07 [dataset_repeat_opt]: 1.51002e-06 [pipeline_split]: 1.54995e-06 [optimize]: 0.00716786, [52] [py_interpret_to_execute]: 1.52299e-05 [rewriter_before_opt_a]: 3.588e-05 [opt_a]: 0.00533271, [2] [Cycle 1]: 0.00151122, [43] [expand_dump_flag]: 3.31993e-06 [switch_simplify]: 3.081e-05 [loop_unroll]: 1.31801e-05 [a_1]: 0.00034123 [recompute_prepare]: 8.99006e-06 [updatestate_depend_eliminate]: 8.54e-06 [updatestate_assign_eliminate]: 6.18992e-06 [updatestate_loads_eliminate]: 7.06001e-06 [parameter_eliminate]: 3.34007e-06 [a_2]: 0.00011529 [accelerated_algorithm]: 8.90007e-06 [shard]: 2.02993e-06 [meta_shard_fg_expand]: 3.14997e-06 [shard_inline]: 8.51997e-06 [auto_parallel]: 1.089e-05 [parallel]: 6.77002e-06 [flash_sp]: 8.82009e-06 [merge_comm]: 7.38003e-06 [allreduce_fusion]: 5.04998e-06 [matmul_add_comm_reduction]: 1.03101e-05 [allreduce_slice_to_reducescatter]: 5.79981e-07 [virtual_shard_identity]: 9.57004e-06 [virtual_dataset]: 8.00996e-06 [get_grad_eliminate_]: 7.91997e-06 [virtual_output]: 7.78004e-06 [merge_forward]: 1.004e-05 [cell_reuse_recompute_pass]: 1.84006e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.722e-05 [before_grad]: 1.42e-05 [inplace_validation]: 4.84998e-06 [meta_fg_expand]: 5.70994e-06 [inplace_validation_after_expand]: 6.21995e-06 [flash_sp_send_recv_attached]: 3.65009e-06 [receive_attached]: 2.17999e-06 [after_resolve]: 1.223e-05 [a_after_grad]: 1.281e-05 [special_op_eliminate]: 7.80995e-06 [renormalize]: 0.00042282 [add_forward_monad_depend]: 3.64997e-06 [auto_monad_grad]: 1.86998e-06 [auto_monad_eliminator]: 3.06701e-05 [cse]: 3.237e-05 [a_3]: 5.783e-05 [Cycle 2]: 0.00081983, [43] [expand_dump_flag]: 1.11992e-06 [switch_simplify]: 9.62999e-06 [loop_unroll]: 7.97992e-06 [a_1]: 0.00019939 [recompute_prepare]: 6.92997e-06 [updatestate_depend_eliminate]: 5.69993e-06 [updatestate_assign_eliminate]: 4.70993e-06 [updatestate_loads_eliminate]: 5.70004e-06 [parameter_eliminate]: 1.30991e-06 [a_2]: 0.00010382 [accelerated_algorithm]: 8.28004e-06 [shard]: 1.10001e-06 [meta_shard_fg_expand]: 2.56998e-06 [shard_inline]: 7.91997e-06 [auto_parallel]: 1.03001e-05 [parallel]: 3.93006e-06 [flash_sp]: 3.65998e-06 [merge_comm]: 5.84999e-06 [allreduce_fusion]: 5.19992e-06 [matmul_add_comm_reduction]: 8.15e-06 [allreduce_slice_to_reducescatter]: 3.69968e-07 [virtual_shard_identity]: 8.60996e-06 [virtual_dataset]: 7.56001e-06 [get_grad_eliminate_]: 7.29994e-06 [virtual_output]: 7.01007e-06 [merge_forward]: 4.72006e-06 [cell_reuse_recompute_pass]: 2.35008e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51701e-05 [before_grad]: 5.394e-05 [inplace_validation]: 4.50003e-06 [meta_fg_expand]: 4.81005e-06 [inplace_validation_after_expand]: 5.46e-06 [flash_sp_send_recv_attached]: 9.60077e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 1.012e-05 [a_after_grad]: 1.18e-05 [special_op_eliminate]: 7.47992e-06 [renormalize]: 8.9989e-08 [add_forward_monad_depend]: 1.29e-06 [auto_monad_grad]: 1.34995e-06 [auto_monad_eliminator]: 1.818e-05 [cse]: 2.07101e-05 [a_3]: 4.902e-05 [py_interpret_to_execute_after_opt_a]: 9.71998e-06 [slice_cell_reuse_recomputed_activation]: 2.46009e-06 [rewriter_after_opt_a]: 0.00014241 [convert_after_rewriter]: 9.25001e-06 [order_py_execute_after_rewriter]: 6.00005e-06 [opt_b]: 0.00024149, [1] [Cycle 1]: 0.00023616, [7] [b_1]: 0.00016062 [b_2]: 9.32999e-06 [updatestate_depend_eliminate]: 5.46e-06 [updatestate_assign_eliminate]: 4.52006e-06 [updatestate_loads_eliminate]: 5.21995e-06 [renormalize]: 3.20026e-07 [cse]: 1.99e-05 [optimize_parallel_all_gather_comm]: 8.89995e-06 [overlap_param_gather]: 1.00001e-06 [cconv]: 2.26001e-05 [loop_unroll]: 0.00048606 [opt_after_cconv]: 0.00013312, [1] [Cycle 1]: 0.00012721, [7] [c_1]: 5.33301e-05 [parameter_eliminate]: 2.39001e-06 [updatestate_depend_eliminate]: 7.86991e-06 [updatestate_assign_eliminate]: 4.51005e-06 [updatestate_loads_eliminate]: 5.08991e-06 [cse]: 2.324e-05 [renormalize]: 3.10014e-07 [remove_dup_value]: 1.352e-05 [tuple_transform]: 6.75101e-05, [1] [Cycle 1]: 6.32301e-05, [2] [d_1]: 5.418e-05 [renormalize]: 1.49943e-07 [partial_unused_args_eliminate]: 2.01003e-06 [add_cache_embedding]: 1.328e-05 [add_recomputation]: 5.99599e-05 [cse_after_recomputation]: 2.676e-05, [1] [Cycle 1]: 2.218e-05, [1] [cse]: 1.69299e-05 [environ_conv]: 7.43999e-06 [swap_dp_allreduce_reducescatter]: 6.99004e-06 [bias_add_comm_swap]: 2.41993e-06 [label_micro_interleaved_index]: 1.92004e-06 [label_fine_grained_interleaved_index]: 1.96998e-06 [merge_cast_opt]: 1.09e-06 [slice_recompute_activation]: 1.69e-06 [micro_interleaved_order_control]: 2.11003e-06 [assign_add_opt]: 2.771e-05 [ForceFp32Comm]: 9.60077e-07 [remove_cast_before_assign_add]: 7.09004e-06 [full_micro_interleaved_order_control]: 2.19001e-06 [reorder_send_recv_between_fp_bp]: 2.03995e-06 [comm_op_add_attrs]: 2.703e-05 [add_comm_op_reuse_tag]: 1.72004e-06 [interleave_split_concat_branches]: 1.2099e-06 [interleave_parallel_branches]: 7.59959e-07 [overlap_opt_shard_in_pipeline]: 7.00005e-07 [overlap_opt_shard_grad_in_pipeline]: 2.09e-06 [control_data_broadcast_order]: 1.06997e-06 [grouped_pairwise_exchange_alltoall]: 8.67003e-06 [offloading_packed_experts]: 2.25997e-06 [overlap_recompute_and_grad_model_parallel]: 1.43994e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.69971e-07 [overlap_recompute_allgather_and_fa_grad]: 8.612e-05 [overlap_grad_ring_attention]: 2.35997e-06 [overlap_grad_flash_sp]: 1.407e-05 [begin_end_overlap_inline]: 8.09901e-07 [split_matmul_comm_elemetwise]: 2.27999e-06 [split_layernorm_comm]: 1.56998e-06 [handle_group_info]: 4.90004e-06 [symbol_engine_optimizer]: 8.823e-05, [1] [Cycle 1]: 8.35899e-05, [6] [build]: 5.31995e-06 [elim_shapecalc]: 1.303e-05 [elim_not_effective]: 1.682e-05 [opt_reshape]: 8.51997e-06 [fold_const_symbol]: 1.339e-05 [renormalize]: 2.30037e-07 [pipeline_parallel_scheduler]: 1.45996e-06 [auto_monad_reorder]: 3.027e-05 [get_jit_bprop_graph]: 4.59957e-07 [rewriter_after_jit_bprop_graph]: 6.39935e-07 [eliminate_special_op_node]: 0.00050561 [distribtued_split]: 4.096e-05 [validate]: 3.53e-05 [task_emit]: 0.0673453 [execute]: 1.02799e-05 Sums bootstrap : 0.000349s : 0.47% type_inference : 0.002466s : 3.32% auto_monad : 0.000125s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000025s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000036s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.73% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000219s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000012s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000015s : 0.02% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000068s : 0.09% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000011s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000022s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000423s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.07% optimize.opt_a.cse : 0.000053s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000142s : 0.19% optimize.convert_after_rewriter : 0.000009s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.22% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000486s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000086s : 0.12% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000014s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000506s : 0.68% distribtued_split : 0.000041s : 0.06% validate : 0.000035s : 0.05% task_emit : 0.067345s : 90.56% execute : 0.000010s : 0.01% Time group info: ------[substitution.] 0.000129 63 4.60% : 0.000006s : 2: substitution.depend_value_elim 2.08% : 0.000003s : 5: substitution.elim_not_effective 1.97% : 0.000003s : 5: substitution.fold_const_symbol 5.37% : 0.000007s : 6: substitution.graph_param_transform 51.30% : 0.000066s : 1: substitution.inline 4.03% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.32% : 0.000004s : 6: substitution.load_eliminater 2.18% : 0.000003s : 2: substitution.reduce_all_const_elim 5.90% : 0.000008s : 10: substitution.remove_not_recompute_node 2.55% : 0.000003s : 2: substitution.replace_old_param 8.59% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.12% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002436 2 88.92% : 0.002166s : 1: type_inference.infer 11.08% : 0.000270s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000065 1 100.00% : 0.000065s : 1: match.inline ------[predicate.] 0.000230 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.05% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.79% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.25% : 0.000005s : 25: predicate.arithmetic_simplify 0.88% : 0.000002s : 13: predicate.cast_eliminate 0.81% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.20% : 0.000000s : 6: predicate.const_output_eliminate 0.39% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.34% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.81% : 0.000002s : 12: predicate.depend_value_elim 0.80% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.92% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.56% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.26% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.05% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.85% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.44% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.81% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.84% : 0.000013s : 63: predicate.inline 1.04% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.62% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.45% : 0.000006s : 38: predicate.load_eliminater 1.32% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.15% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.81% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.71% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.78% : 0.000002s : 6: predicate.mutable_eliminate 0.47% : 0.000001s : 6: predicate.opt_reshape 0.56% : 0.000001s : 6: predicate.parallel_virtual_node 1.11% : 0.000003s : 14: predicate.partial_defer_inline 1.32% : 0.000003s : 19: predicate.partial_eliminate 0.80% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.18% : 0.000003s : 13: predicate.reduce_eliminate 0.64% : 0.000001s : 12: predicate.remove_not_recompute_node 1.10% : 0.000003s : 25: predicate.replace_applicator 0.49% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.89% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.00% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.39% : 0.000003s : 18: predicate.special_op_eliminate 0.91% : 0.000002s : 12: predicate.specialize_transform 1.12% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.95% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.34% : 0.000005s : 38: predicate.stopgrad_eliminater 0.46% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.89% : 0.000002s : 14: predicate.switch_defer_inline 1.67% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.37% : 0.000010s : 43: predicate.switch_simplify 0.84% : 0.000002s : 13: predicate.tile_eliminate 0.80% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.78% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.45% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.85% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.55% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.58% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.41% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 3.49% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.77% : 0.000002s : 12: predicate.virtual_output_eliminate 0.57% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000145 4 11.05% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 88.95% : 0.000129s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.087360 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000138s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.43% : 0.000374s : 1: bootstrap 0.03% : 0.000026s : 1: cconv 0.04% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000013s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.06% : 0.000049s : 1: distribtued_split 0.59% : 0.000520s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.57% : 0.000496s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.30% : 0.001139s : 80: opt.transform.opt_a 0.06% : 0.000052s : 1: opt.transform.opt_after_cconv 0.17% : 0.000151s : 27: opt.transform.opt_b 0.06% : 0.000053s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 6.11% : 0.005337s : 1: opt_a 0.16% : 0.000137s : 1: opt_after_cconv 0.28% : 0.000245s : 1: opt_b 8.21% : 0.007176s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.11% : 0.000092s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000007s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.27% : 0.000232s : 1: renormalize.infer 0.21% : 0.000186s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000148s : 1: rewriter_after_opt_a 0.05% : 0.000041s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000004s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 77.12% : 0.067370s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.84% : 0.002485s : 1: type_inference 0.08% : 0.000070s : 1: validate TotalTime = 0.080319, [21] [bootstrap]: 0.00036599 [type_inference]: 0.00254777 [auto_monad]: 0.00013597 [graph_reusing]: 1.52003e-06 [inline]: 1.40001e-06 [parallel-infer-symbol]: 1.63005e-06 [pre_auto_parallel]: 2.735e-05 [insert-virtual-dataset]: 3.01003e-06 [parallel-infer-symbol-second]: 4.00003e-07 [dataset_repeat_opt]: 1.00001e-06 [pipeline_split]: 1.69e-06 [optimize]: 0.0071726, [52] [py_interpret_to_execute]: 1.599e-05 [rewriter_before_opt_a]: 3.456e-05 [opt_a]: 0.00539637, [2] [Cycle 1]: 0.00153275, [43] [expand_dump_flag]: 4.28001e-06 [switch_simplify]: 3.23399e-05 [loop_unroll]: 1.30699e-05 [a_1]: 0.00034268 [recompute_prepare]: 9.10007e-06 [updatestate_depend_eliminate]: 8.76002e-06 [updatestate_assign_eliminate]: 5.83008e-06 [updatestate_loads_eliminate]: 7.83999e-06 [parameter_eliminate]: 3.67011e-06 [a_2]: 0.00011587 [accelerated_algorithm]: 8.54e-06 [shard]: 1.26008e-06 [meta_shard_fg_expand]: 3.33006e-06 [shard_inline]: 8.72009e-06 [auto_parallel]: 1.22599e-05 [parallel]: 6.1699e-06 [flash_sp]: 1.20701e-05 [merge_comm]: 7.53999e-06 [allreduce_fusion]: 5.30994e-06 [matmul_add_comm_reduction]: 1.12699e-05 [allreduce_slice_to_reducescatter]: 3.10014e-07 [virtual_shard_identity]: 9.58005e-06 [virtual_dataset]: 8.71997e-06 [get_grad_eliminate_]: 7.58003e-06 [virtual_output]: 7.46991e-06 [merge_forward]: 5.08002e-06 [cell_reuse_recompute_pass]: 1.41992e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.70199e-05 [before_grad]: 1.359e-05 [inplace_validation]: 5.19992e-06 [meta_fg_expand]: 5.59993e-06 [inplace_validation_after_expand]: 6.47001e-06 [flash_sp_send_recv_attached]: 5.08991e-06 [receive_attached]: 2.11003e-06 [after_resolve]: 1.121e-05 [a_after_grad]: 1.233e-05 [special_op_eliminate]: 7.47002e-06 [renormalize]: 0.00044264 [add_forward_monad_depend]: 3.36999e-06 [auto_monad_grad]: 1.46998e-06 [auto_monad_eliminator]: 2.541e-05 [cse]: 3.38401e-05 [a_3]: 5.861e-05 [Cycle 2]: 0.00085658, [43] [expand_dump_flag]: 1.22993e-06 [switch_simplify]: 9.37004e-06 [loop_unroll]: 7.57992e-06 [a_1]: 0.00019981 [recompute_prepare]: 7.25e-06 [updatestate_depend_eliminate]: 5.96e-06 [updatestate_assign_eliminate]: 4.89992e-06 [updatestate_loads_eliminate]: 5.90994e-06 [parameter_eliminate]: 1.30001e-06 [a_2]: 0.00017703 [accelerated_algorithm]: 8.71008e-06 [shard]: 1.11002e-06 [meta_shard_fg_expand]: 2.66999e-06 [shard_inline]: 7.85e-06 [auto_parallel]: 1.104e-05 [parallel]: 3.75998e-06 [flash_sp]: 2.33995e-06 [merge_comm]: 5.71995e-06 [allreduce_fusion]: 5.00004e-06 [matmul_add_comm_reduction]: 7.87003e-06 [allreduce_slice_to_reducescatter]: 2.10013e-07 [virtual_shard_identity]: 8.56002e-06 [virtual_dataset]: 7.45e-06 [get_grad_eliminate_]: 7.17002e-06 [virtual_output]: 6.89004e-06 [merge_forward]: 4.71994e-06 [cell_reuse_recompute_pass]: 2.07999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.579e-05 [before_grad]: 1.253e-05 [inplace_validation]: 4.54998e-06 [meta_fg_expand]: 4.70004e-06 [inplace_validation_after_expand]: 5.39992e-06 [flash_sp_send_recv_attached]: 9.40054e-07 [receive_attached]: 8.69972e-07 [after_resolve]: 1.023e-05 [a_after_grad]: 1.17e TotalTime = 0.0803183, [21] [bootstrap]: 0.00036707 [type_inference]: 0.00254761 [auto_monad]: 0.00013574 [graph_reusing]: 2.82004e-06 [inline]: 1.79e-06 [parallel-infer-symbol]: 2.74996e-06 [pre_auto_parallel]: 2.679e-05 [insert-virtual-dataset]: 2.71993e-06 [parallel-infer-symbol-second]: 4.89992e-07 [dataset_repeat_opt]: 1.62004e-06 [pipeline_split]: 1.23004e-06 [optimize]: 0.00716996, [52] [py_interpret_to_execute]: 1.581e-05 [rewriter_before_opt_a]: 3.507e-05 [opt_a]: 0.00535138, [2] [Cycle 1]: 0.0014916, [43] [expand_dump_flag]: 2.41993e-06 [switch_simplify]: 2.493e-05 [loop_unroll]: 1.30699e-05 [a_1]: 0.00031763 [recompute_prepare]: 9.08994e-06 [updatestate_depend_eliminate]: 7.66001e-06 [updatestate_assign_eliminate]: 5.03997e-06 [updatestate_loads_eliminate]: 6.52007e-06 [parameter_eliminate]: 3.22994e-06 [a_2]: 0.00011582 [accelerated_algorithm]: 8.17992e-06 [shard]: 2.26998e-06 [meta_shard_fg_expand]: 4.08001e-06 [shard_inline]: 8.48994e-06 [auto_parallel]: 1.15599e-05 [parallel]: 8.17003e-06 [flash_sp]: 1.198e-05 [merge_comm]: 8.56002e-06 [allreduce_fusion]: 5.11005e-06 [matmul_add_comm_reduction]: 1.119e-05 [allreduce_slice_to_reducescatter]: 4.89992e-07 [virtual_shard_identity]: 9.58005e-06 [virtual_dataset]: 8.00996e-06 [get_grad_eliminate_]: 7.67002e-06 [virtual_output]: 7.55e-06 [merge_forward]: 6.03008e-06 [cell_reuse_recompute_pass]: 1.71002e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.69401e-05 [before_grad]: 1.36801e-05 [inplace_validation]: 5.19992e-06 [meta_fg_expand]: 5.49003e-06 [inplace_validation_after_expand]: 6.79994e-06 [flash_sp_send_recv_attached]: 4.60004e-06 [receive_attached]: 2.64007e-06 [after_resolve]: 1.119e-05 [a_after_grad]: 1.29699e-05 [special_op_eliminate]: 8.06001e-06 [renormalize]: 0.00043376 [add_forward_monad_depend]: 3.95009e-06 [auto_monad_grad]: 1.99e-06 [auto_monad_eliminator]: 3.341e-05 [cse]: 3.493e-05 [a_3]: 5.70399e-05 [Cycle 2]: 0.00081326, [43] [expand_dump_flag]: 1.13994e-06 [switch_simplify]: 8.70007e-06 [loop_unroll]: 7.62008e-06 [a_1]: 0.00019841 [recompute_prepare]: 7.63999e-06 [updatestate_depend_eliminate]: 6.28002e-06 [updatestate_assign_eliminate]: 4.59992e-06 [updatestate_loads_eliminate]: 5.68002e-06 [parameter_eliminate]: 1.29e-06 [a_2]: 0.00013633 [accelerated_algorithm]: 8.64e-06 [shard]: 1.12003e-06 [meta_shard_fg_expand]: 2.42994e-06 [shard_inline]: 8.18004e-06 [auto_parallel]: 1.164e-05 [parallel]: 3.71004e-06 [flash_sp]: 3.42005e-06 [merge_comm]: 6.21995e-06 [allreduce_fusion]: 4.78001e-06 [matmul_add_comm_reduction]: 7.60006e-06 [allreduce_slice_to_reducescatter]: 2.49944e-07 [virtual_shard_identity]: 8.99006e-06 [virtual_dataset]: 7.46001e-06 [get_grad_eliminate_]: 7.10995e-06 [virtual_output]: 7.32997e-06 [merge_forward]: 4.91005e-06 [cell_reuse_recompute_pass]: 2.07999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.56601e-05 [before_grad]: 1.27199e-05 [inplace_validation]: 4.45999e-06 [meta_fg_expand]: 4.88001e-06 [inplace_validation_after_expand]: 5.34009e-06 [flash_sp_send_recv_attached]: 9.30042e-07 [receive_attached]: 7.49948e-07 [after_resolve]: 9.64e-06 [a_after_grad]:-05 [special_op_eliminate]: 7.30006e-06 [renormalize]: 7.0082e-08 [add_forward_monad_depend]: 9.2003e-07 [auto_monad_grad]: 1.26008e-06 [auto_monad_eliminator]: 1.977e-05 [cse]: 2.075e-05 [a_3]: 4.875e-05 [py_interpret_to_execute_after_opt_a]: 9.84001e-06 [slice_cell_reuse_recomputed_activation]: 1.27999e-06 [rewriter_after_opt_a]: 0.00012398 [convert_after_rewriter]: 7.91997e-06 [order_py_execute_after_rewriter]: 5.02006e-06 [opt_b]: 0.00024357, [1] [Cycle 1]: 0.00023801, [7] [b_1]: 0.00016137 [b_2]: 1.02801e-05 [updatestate_depend_eliminate]: 5.29003e-06 [updatestate_assign_eliminate]: 4.45999e-06 [updatestate_loads_eliminate]: 5.29003e-06 [renormalize]: 2.39932e-07 [cse]: 1.97e-05 [optimize_parallel_all_gather_comm]: 7.95e-06 [overlap_param_gather]: 6.00005e-07 [cconv]: 1.58e-05 [loop_unroll]: 0.00049708 [opt_after_cconv]: 0.00013517, [1] [Cycle 1]: 0.00012879, [7] [c_1]: 5.308e-05 [parameter_eliminate]: 2.69001e-06 [updatestate_depend_eliminate]: 8.45001e-06 [updatestate_assign_eliminate]: 4.82996e-06 [updatestate_loads_eliminate]: 5.69003e-06 [cse]: 2.248e-05 [renormalize]: 4.10015e-07 [remove_dup_value]: 9.84001e-06 [tuple_transform]: 7.15e-05, [1] [Cycle 1]: 6.726e-05, [2] [d_1]: 5.77499e-05 [renormalize]: 1.69966e-07 [partial_unused_args_eliminate]: 1.49e-06 [add_cache_embedding]: 1.163e-05 [add_recomputation]: 4.965e-05 [cse_after_recomputation]: 2.733e-05, [1] [Cycle 1]: 2.307e-05, [1] [cse]: 1.79e-05 [environ_conv]: 6.14999e-06 [swap_dp_allreduce_reducescatter]: 6.56e-06 [bias_add_comm_swap]: 1.32003e-06 [label_micro_interleaved_index]: 1.24995e-06 [label_fine_grained_interleaved_index]: 9.69972e-07 [merge_cast_opt]: 5.29923e-07 [slice_recompute_activation]: 7.00005e-07 [micro_interleaved_order_control]: 8.79983e-07 [assign_add_opt]: 2.363e-05 [ForceFp32Comm]: 5.19911e-07 [remove_cast_before_assign_add]: 5.7799e-06 [full_micro_interleaved_order_control]: 8.69972e-07 [reorder_send_recv_between_fp_bp]: 8.39937e-07 [comm_op_add_attrs]: 2.077e-05 [add_comm_op_reuse_tag]: 1.32993e-06 [interleave_split_concat_branches]: 4.09898e-07 [interleave_parallel_branches]: 4.30038e-07 [overlap_opt_shard_in_pipeline]: 6.60075e-07 [overlap_opt_shard_grad_in_pipeline]: 1.13004e-06 [control_data_broadcast_order]: 5.89993e-07 [grouped_pairwise_exchange_alltoall]: 9.90008e-06 [offloading_packed_experts]: 2.22004e-06 [overlap_recompute_and_grad_model_parallel]: 2.17999e-06 [overlap_grad_matmul_and_grad_allreduce]: 6.79982e-07 [overlap_recompute_allgather_and_fa_grad]: 7.075e-05 [overlap_grad_ring_attention]: 9.70089e-07 [overlap_grad_flash_sp]: 1.32e-05 [begin_end_overlap_inline]: 5.40051e-07 [split_matmul_comm_elemetwise]: 1.57999e-06 [split_layernorm_comm]: 2.12993e-06 [handle_group_info]: 4.92006e-06 [symbol_engine_optimizer]: 9.053e-05, [1] [Cycle 1]: 8.51899e-05, [6] [build]: 4.82006e-06 [elim_shapecalc]: 1.342e-05 [elim_not_effective]: 1.918e-05 [opt_reshape]: 8.35001e-06 [fold_const_symbol]: 1.25801e-05 [renormalize]: 3.89991e-07 [pipeline_parallel_scheduler]: 8.29925e-07 [auto_monad_reorder]: 2.85801e-05 [get_jit_bprop_graph]: 2.99886e-07 [rewriter_after_jit_bprop_graph]: 2.30037e-07 [eliminate_special_op_node]: 0.00050922 [distribtued_split]: 3.26199e-05 [validate]: 3.45301e-05 [task_emit]: 0.0691827 [execute]: 1.21599e-05 Sums bootstrap : 0.000366s : 0.48% type_inference : 0.002548s : 3.34% auto_monad : 0.000136s : 0.18% graph_reusing 1.211e-05 [special_op_eliminate]: 7.32997e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.30042e-07 [auto_monad_grad]: 1.12993e-06 [auto_monad_eliminator]: 1.865e-05 [cse]: 2.037e-05 [a_3]: 4.96199e-05 [py_interpret_to_execute_after_opt_a]: 8.72998e-06 [slice_cell_reuse_recomputed_activation]: 2.27999e-06 [rewriter_after_opt_a]: 0.00013953 [convert_after_rewriter]: 1.104e-05 [order_py_execute_after_rewriter]: 6.48003e-06 [opt_b]: 0.00023887, [1] [Cycle 1]: 0.00023353, [7] [b_1]: 0.0001582 [b_2]: 9.77004e-06 [updatestate_depend_eliminate]: 5.25999e-06 [updatestate_assign_eliminate]: 4.61005e-06 [updatestate_loads_eliminate]: 5.42996e-06 [renormalize]: 3.69968e-07 [cse]: 1.87299e-05 [optimize_parallel_all_gather_comm]: 8.43999e-06 [overlap_param_gather]: 1.02003e-06 [cconv]: 2.52801e-05 [loop_unroll]: 0.00048345 [opt_after_cconv]: 0.00013211, [1] [Cycle 1]: 0.00012519, [7] [c_1]: 5.01501e-05 [parameter_eliminate]: 2.55997e-06 [updatestate_depend_eliminate]: 8.25e-06 [updatestate_assign_eliminate]: 4.55009e-06 [updatestate_loads_eliminate]: 5.58002e-06 [cse]: 2.246e-05 [renormalize]: 3.89991e-07 [remove_dup_value]: 1.496e-05 [tuple_transform]: 6.667e-05, [1] [Cycle 1]: 6.24501e-05, [2] [d_1]: 5.33899e-05 [renormalize]: 2.00002e-07 [partial_unused_args_eliminate]: 2.17999e-06 [add_cache_embedding]: 1.34399e-05 [add_recomputation]: 6.33299e-05 [cse_after_recomputation]: 2.6e-05, [1] [Cycle 1]: 2.144e-05, [1] [cse]: 1.678e-05 [environ_conv]: 7.47002e-06 [swap_dp_allreduce_reducescatter]: 7.41996e-06 [bias_add_comm_swap]: 2.22004e-06 [label_micro_interleaved_index]: 2.28989e-06 [label_fine_grained_interleaved_index]: 2.07999e-06 [merge_cast_opt]: 1.00001e-06 [slice_recompute_activation]: 2.34996e-06 [micro_interleaved_order_control]: 1.66008e-06 [assign_add_opt]: 2.887e-05 [ForceFp32Comm]: 8.2003e-07 [remove_cast_before_assign_add]: 7.28993e-06 [full_micro_interleaved_order_control]: 2.15007e-06 [reorder_send_recv_between_fp_bp]: 2.64007e-06 [comm_op_add_attrs]: 2.74599e-05 [add_comm_op_reuse_tag]: 1.71002e-06 [interleave_split_concat_branches]: 8.69972e-07 [interleave_parallel_branches]: 9.69972e-07 [overlap_opt_shard_in_pipeline]: 1.60001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.29001e-06 [control_data_broadcast_order]: 1.05007e-06 [grouped_pairwise_exchange_alltoall]: 9.10007e-06 [offloading_packed_experts]: 2.01003e-06 [overlap_recompute_and_grad_model_parallel]: 2.45997e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.39937e-07 [overlap_recompute_allgather_and_fa_grad]: 6.90699e-05 [overlap_grad_ring_attention]: 2.06998e-06 [overlap_grad_flash_sp]: 1.482e-05 [begin_end_overlap_inline]: 7.3004e-07 [split_matmul_comm_elemetwise]: 2.00002e-06 [split_layernorm_comm]: 1.75007e-06 [handle_group_info]: 4.83997e-06 [symbol_engine_optimizer]: 8.773e-05, [1] [Cycle 1]: 8.27101e-05, [6] [build]: 5.17e-06 [elim_shapecalc]: 1.325e-05 [elim_not_effective]: 1.65099e-05 [opt_reshape]: 8.57003e-06 [fold_const_symbol]: 1.303e-05 [renormalize]: 2.00002e-07 [pipeline_parallel_scheduler]: 1.54995e-06 [auto_monad_reorder]: 3.158e-05 [get_jit_bprop_graph]: 5.00004e-07 [rewriter_after_jit_bprop_graph]: 6.6997e-07 [eliminate_special_op_node]: 0.00050542 [distribtued_split]: 4.115e-05 [validate]: 3.479e-05 [task_emit]: 0.0691828 [execute]: 1.20801e-05 Sums bootstrap : 0.000367s : 0.48% type_inference : 0.002548s : 3.34% auto_monad : 0.000136s : 0.1 : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000027s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000006s : 0.01% optimize.opt_a.switch_simplify : 0.000042s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000542s : 0.71% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000015s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000014s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000293s : 0.38% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.03% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000014s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000443s : 0.58% optimize.opt_a.add_forward_monad_depend : 0.000004s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000045s : 0.06% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000010s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000124s : 0.16% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000161s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 8% graph_reusing : 0.000003s : 0.00% inline : 0.000002s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000027s : 0.04% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.05% optimize.opt_a.expand_dump_flag : 0.000004s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000516s : 0.68% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000252s : 0.33% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000033s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000434s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.07% optimize.opt_a.cse : 0.000055s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000140s : 0.18% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000158s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_ 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000016s : 0.02% optimize.loop_unroll : 0.000497s : 0.65% optimize.opt_after_cconv.c_1 : 0.000053s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000058s : 0.08% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000012s : 0.02% optimize.add_recomputation : 0.000050s : 0.07% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000001s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000024s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000021s : 0.03% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000000s : 0.00% optimize.interleave_parallel_branches : 0.000000s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000071s : 0.09% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000019s : 0.03% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_endepend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000019s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000025s : 0.03% optimize.loop_unroll : 0.000483s : 0.63% optimize.opt_after_cconv.c_1 : 0.000050s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000015s : 0.02% optimize.tuple_transform.d_1 : 0.000053s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000063s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000007s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000069s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% gine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000509s : 0.67% distribtued_split : 0.000033s : 0.04% validate : 0.000035s : 0.05% task_emit : 0.069183s : 90.68% execute : 0.000012s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000001s : 0.00% rewriter_after_jit_bprop_graph : 0.000001s : 0.00% eliminate_special_op_node : 0.000505s : 0.66% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.069183s : 90.69% execute : 0.000012s : 0.02% Time group info: ------[substitution.] 0.000134 63 3.68% : 0.000005s : 2: substitution.depend_value_elim 1.58% : 0.000002s : 5: substitution.elim_not_effective 1.35% : 0.000002s : 5: substitution.fold_const_symbol 7.25% : 0.000010s : 6: substitution.graph_param_transform 51.50% : 0.000069s : 1: substitution.inline 3.78% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.46% : 0.000005s : 6: substitution.load_eliminater 2.07% : 0.000003s : 2: substitution.reduce_all_const_elim 5.94% : 0.000008s : 10: substitution.remove_not_recompute_node 2.73% : 0.000004s : 2: substitution.replace_old_param 8.30% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.36% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002517 2 88.78% : 0.002234s : 1: type_inference.infer 11.22% : 0.000282s : 1: type_inference.specialize ------[replace.] 0.000013 1 100.00% : 0.000013s : 1: replace.inline ------[match.] 0.000068 1 100.00% : 0.000068s : 1: match.inline ------[predicate.] 0.000229 1420 0.75% : 0.000002s : 13: predicate.accumulaten_eliminater 1.24% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.78% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.29% : 0.000005s : 25: predicate.arithmetic_simplify 0.82% : 0.000002s : 13: predicate.cast_eliminate 0.84% : 0.000002s : 12: predicate.check_bprop_eliminate 0.73% : 0.000002s : 12: predicate.compare_switch_simplify 0.20% : 0.000000s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.41% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.85% : 0.000002s : 12: predicate.depend_value_elim 0.86% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.94% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.25% : 0.000001s : 6: predicate.elim_not_effective 0.64% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.23% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.16% : 0.000003s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.09% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.82% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.32% : 0.000003s : 14: predicate.float_depend_g_call 0.72% : 0.000002s : 12: predicate.float_environ_get_switch 1.13% : 0.000003s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.28% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.69% : 0.000002s : 12: predicate.incorporate_call_switch 5.64% : 0.000013s : 63: predicate.inline 1.08% : 0.000002s : 12: predicate.inline_without_move 0.41% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.08% : 0.000002s : 12: predicate.less_batch_normalization 1.79% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.39% : 0.000005s : 38: predicate.load_eliminater 1.46% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.16% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.89% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.84% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.82% : 0.000002s : 12: predicat Time group info: ------[substitution.] 0.000115 63 5.93% : 0.000007s : 2: substitution.depend_value_elim 2.26% : 0.000003s : 5: substitution.elim_not_effective 2.00% : 0.000002s : 5: substitution.fold_const_symbol 6.19% : 0.000007s : 6: substitution.graph_param_transform 45.22% : 0.000052s : 1: substitution.inline 5.01% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.48% : 0.000004s : 6: substitution.load_eliminater 3.15% : 0.000004s : 2: substitution.reduce_all_const_elim 6.99% : 0.000008s : 10: substitution.remove_not_recompute_node 3.04% : 0.000004s : 2: substitution.replace_old_param 8.55% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 8.17% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002517 2 89.13% : 0.002243s : 1: type_inference.infer 10.87% : 0.000274s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000051 1 100.00% : 0.000051s : 1: match.inline ------[predicate.] 0.000257 1420 0.68% : 0.000002s : 13: predicate.accumulaten_eliminater 0.94% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.64% : 0.000002s : 12: predicate.addn_check_dump 0.69% : 0.000002s : 13: predicate.addn_zero_filter 0.65% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.05% : 0.000005s : 25: predicate.arithmetic_simplify 0.81% : 0.000002s : 13: predicate.cast_eliminate 0.73% : 0.000002s : 12: predicate.check_bprop_eliminate 0.67% : 0.000002s : 12: predicate.compare_switch_simplify 0.19% : 0.000000s : 6: predicate.const_output_eliminate 0.39% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.09% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.68% : 0.000002s : 12: predicate.depend_value_elim 0.77% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.80% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.74% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.28% : 0.000001s : 6: predicate.elim_not_effective 0.53% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.03% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.03% : 0.000003s : 19: predicate.environ_get_add_eliminate 0.95% : 0.000002s : 19: predicate.environ_get_depend_swap 1.66% : 0.000004s : 31: predicate.environ_get_eliminate 1.04% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.76% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.10% : 0.000003s : 14: predicate.float_depend_g_call 0.67% : 0.000002s : 12: predicate.float_environ_get_switch 13.14% : 0.000034s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 6: predicate.fold_const_symbol 0.69% : 0.000002s : 12: predicate.get_grad_eliminate 0.33% : 0.000001s : 6: predicate.graph_param_transform 0.68% : 0.000002s : 12: predicate.incorporate_call 0.62% : 0.000002s : 12: predicate.incorporate_call_switch 5.04% : 0.000013s : 63: predicate.inline 0.94% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 0.95% : 0.000002s : 12: predicate.less_batch_normalization 1.45% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.07% : 0.000005s : 38: predicate.load_eliminater 1.08% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.12% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.50% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.66% : 0.000002s : 12: predicate.merge_addn 0.70% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.70% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.80% : 0.000002s : 6: predicate.mutable_eliminate 0.49% : 0.000001s : 6: predicate.opt_reshape 0.48% : 0.000001s : 6: predicate.parallel_virtual_node 1.12% : 0.000003s : 14: predicate.partial_defer_inline 1.22% : 0.000003s : 19: predicate.partial_eliminate 0.78% : 0.000002s : 13: predicate.print_const_string_wrapper 0.84% : 0.000002s : 12: predicate.reduce_all_const_elim 1.06% : 0.000002s : 13: predicate.reduce_eliminate 0.57% : 0.000001s : 12: predicate.remove_not_recompute_node 1.12% : 0.000003s : 25: predicate.replace_applicator 0.46% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.81% : 0.000002s : 13: predicate.reshape_eliminate 0.84% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.50% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.96% : 0.000002s : 12: predicate.shard_identity_eliminate 1.29% : 0.000003s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 1.06% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.23% : 0.000005s : 38: predicate.stopgrad_eliminater 0.43% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.97% : 0.000002s : 14: predicate.switch_defer_inline 1.59% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.56% : 0.000010s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.79% : 0.000002s : 13: predicate.transpose_eliminate 1.87% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.63% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.43% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.69% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.70% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.47% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.61% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.31% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.45% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.82% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.54% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000147 4 6.71% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 93.29% : 0.000137s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089381 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.01% : 0.000004s : 1: add_comm_op_reuse_tag 0.06% : 0.000054s : 1: add_recomputation 0.03% : 0.000028s : 1: assign_add_opt 0.17% : 0.000149s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.00% : 0.000004s : 1: bias_add_comm_swap 0.44% : 0.000395s : 1: bootstrap 0.02% : 0.000020s : 1: cconv 0.03% : 0.000025s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.00004e.mini_step_allgather_replace 0.70% : 0.000002s : 13: predicate.minmaximum_grad 0.70% : 0.000002s : 6: predicate.mutable_eliminate 0.39% : 0.000001s : 6: predicate.opt_reshape 0.43% : 0.000001s : 6: predicate.parallel_virtual_node 1.04% : 0.000003s : 14: predicate.partial_defer_inline 1.12% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.75% : 0.000002s : 12: predicate.reduce_all_const_elim 0.91% : 0.000002s : 13: predicate.reduce_eliminate 0.55% : 0.000001s : 12: predicate.remove_not_recompute_node 0.99% : 0.000003s : 25: predicate.replace_applicator 0.44% : 0.000001s : 12: predicate.replace_old_param 0.20% : 0.000001s : 6: predicate.reset_defer_inline 0.68% : 0.000002s : 13: predicate.reshape_eliminate 0.72% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.41% : 0.000001s : 6: predicate.row_tensor_eliminate 0.87% : 0.000002s : 12: predicate.same_eliminate 0.42% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.17% : 0.000003s : 18: predicate.special_op_eliminate 0.81% : 0.000002s : 12: predicate.specialize_transform 0.90% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.90% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.03% : 0.000005s : 38: predicate.stopgrad_eliminater 0.39% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.74% : 0.000002s : 14: predicate.switch_defer_inline 1.48% : 0.000004s : 26: predicate.switch_layer_defer_inline 3.65% : 0.000009s : 43: predicate.switch_simplify 0.76% : 0.000002s : 13: predicate.tile_eliminate 0.74% : 0.000002s : 13: predicate.transpose_eliminate 1.54% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.55% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.44% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.43% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.41% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.26% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.41% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.15% : 0.000006s : 38: predicate.updatestate_pure_node_eliminater 2.96% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.46% : 0.000001s : 6: predicate.value_based_eliminate 0.73% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.67% : 0.000002s : 12: predicate.virtual_output_eliminate 0.58% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000149 4 12.09% : 0.000018s : 1: func_graph_cloner_run.FuncGraphClonerGraph 87.91% : 0.000131s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.089290 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000068s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.17% : 0.000148s : 1: auto_monad 0.04% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.44% : 0.000395s : 1: bootstrap 0.03% : 0.000029s : 1: cconv 0.04% : 0.000031s : 1: comm_op_add_attrs 0.01% : 0.000009s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.59% : 0.000524s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000008s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000003s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.57% : 0.000507s : 1: loop_unroll 0.00% : 0.000003s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000016s : 1: opt.transform.loop_unroll_optimizer 1.31% : 0.001169s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000056s : 1: opt.transform.opt_trans_graph 0.04% : 0.000032s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.04% : 0.005400s : 1: opt_a 0.16% : 0.000139s : 1: opt_after_cconv 0.28% : 0.000247s : 1: opt_b 8.03% : 0.007181s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000004s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000003s : 1: overlap_param_gather 0.09% : 0.000076s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000033s : 1: pre_auto_parallel 0.02% : 0.000021s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000009s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.27% : 0.000239s : 1: renormalize.infer 0.22% : 0.000197s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000130s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.00% : 0.000004s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000004s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 77.44% : 0.069217s : 1: task_emit 0.08% : 0.000075s : 1: tuple_transform 2.87% : 0.002566s : 1: type_inference 0.08% : 0.000068s : 1: validate 9s : 1: distribtued_split 0.58% : 0.000519s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000021s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.01% : 0.000012s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.55% : 0.000493s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000004s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.23% : 0.001097s : 80: opt.transform.opt_a 0.05% : 0.000049s : 1: opt.transform.opt_after_cconv 0.17% : 0.000149s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.04% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 6.00% : 0.005355s : 1: opt_a 0.15% : 0.000136s : 1: opt_after_cconv 0.27% : 0.000242s : 1: opt_b 8.04% : 0.007178s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000074s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000005s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000012s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.26% : 0.000236s : 1: renormalize.infer 0.22% : 0.000193s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000145s : 1: rewriter_after_opt_a 0.04% : 0.000039s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 77.52% : 0.069216s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.87% : 0.002567s : 1: type_inference 0.08% : 0.000069s : 1: validate TotalTime = 0.0810704, [21] [bootstrap]: 0.00035616 [type_inference]: 0.00242637 [auto_monad]: 0.00012855 [graph_reusing]: 2.11003e-06 [inline]: 1.45996e-06 [parallel-infer-symbol]: 1.93994e-06 [pre_auto_parallel]: 2.565e-05 [insert-virtual-dataset]: 2.90002e-06 [parallel-infer-symbol-second]: 3.59956e-07 [dataset_repeat_opt]: 1.17009e-06 [pipeline_split]: 1.60001e-06 [optimize]: 0.00713211, [52] [py_interpret_to_execute]: 1.537e-05 [rewriter_before_opt_a]: 3.45e-05 [opt_a]: 0.00531507, [2] [Cycle 1]: 0.00151028, [43] [expand_dump_flag]: 3.65998e-06 [switch_simplify]: 3.06501e-05 [loop_unroll]: 1.443e-05 [a_1]: 0.0003393 [recompute_prepare]: 9.15001e-06 [updatestate_depend_eliminate]: 8.38994e-06 [updatestate_assign_eliminate]: 6.67002e-06 [updatestate_loads_eliminate]: 7.91997e-06 [parameter_eliminate]: 3.29001e-06 [a_2]: 0.00011814 [accelerated_algorithm]: 8.49005e-06 [shard]: 2.35997e-06 [meta_shard_fg_expand]: 3.48e-06 [shard_inline]: 8.49005e-06 [auto_parallel]: 1.233e-05 [parallel]: 6.81996e-06 [flash_sp]: 9.89006e-06 [merge_comm]: 8.25e-06 [allreduce_fusion]: 6.03998e-06 [matmul_add_comm_reduction]: 1.049e-05 [allreduce_slice_to_reducescatter]: 6.29923e-07 [virtual_shard_identity]: 9.8499e-06 [virtual_dataset]: 7.82998e-06 [get_grad_eliminate_]: 7.45e-06 [virtual_output]: 7.55e-06 [merge_forward]: 5.88002e-06 [cell_reuse_recompute_pass]: 2.03005e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.71501e-05 [before_grad]: 1.367e-05 [inplace_validation]: 4.68001e-06 [meta_fg_expand]: 5.51005e-06 [inplace_validation_after_expand]: 6.41006e-06 [flash_sp_send_recv_attached]: 4.39002e-06 [receive_attached]: 2.61003e-06 [after_resolve]: 1.174e-05 [a_after_grad]: 1.208e-05 [special_op_eliminate]: 7.69994e-06 [renormalize]: 0.0004189 [add_forward_monad_depend]: 3.51004e-06 [auto_monad_grad]: 1.81003e-06 [auto_monad_eliminator]: 3.324e-05 [cse]: 3.32301e-05 [a_3]: 5.73499e-05 [Cycle 2]: 0.00082047, [43] [expand_dump_flag]: 1.17999e-06 [switch_simplify]: 8.84e-06 [loop_unroll]: 7.67002e-06 [a_1]: 0.00020166 [recompute_prepare]: 7.33009e-06 [updatestate_depend_eliminate]: 6.01006e-06 [updatestate_assign_eliminate]: 4.55009e-06 [updatestate_loads_eliminate]: 5.03997e-06 [parameter_eliminate]: 1.10001e-06 [a_2]: 0.000106 [accelerated_algorithm]: 8.26002e-06 [shard]: 1.04995e-06 [meta_shard_fg_expand]: 2.6999e-06 [shard_inline]: 8.11997e-06 [auto_parallel]: 1.081e-05 [parallel]: 3.49991e-06 [flash_sp]: 4.22995e-06 [merge_comm]: 5.88002e-06 [allreduce_fusion]: 5.02006e-06 [matmul_add_comm_reduction]: 7.76001e-06 [allreduce_slice_to_reducescatter]: 3.69968e-07 [virtual_shard_identity]: 8.56991e-06 [virtual_dataset]: 7.48993e-06 [get_grad_eliminate_]: 7.30006e-06 [virtual_output]: 7.10005e-06 [merge_forward]: 4.54998e-06 [cell_reuse_recompute_pass]: 1.60001e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51199e-05 [before_grad]: 1.244e-05 [inplace_validation]: 4.21004e-06 [meta_fg_expand]: 4.60993e-06 [inplace_validation_after_expand]: 5.04998e-06 [flash_sp_send_recv_attached]: 8.29925e-07 [receive_attached]: 7.49948e-07 [after_resolve]: 9.32999e-06 [a_after_grad]: 1.14801e-05 [special_op_eliminate]: 4.85299e-05 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.89996e-07 [auto_monad_grad]: 1.06997e-06 [auto_monad_eliminator]: 1.845e-05 [cse]: 2.067e-05 [a_3]: 4.945e-05 [py_interpret_to_execute_after_opt_a]: 9.17003e-06 [slice_cell_reuse_recomputed_activation]: 2.21003e-06 [rewriter_after_opt_a]: 0.00014391 [convert_after_rewriter]: 7.50995e-06 [order_py_execute_after_rewriter]: 6.00005e-06 [opt_b]: 0.00024114, [1] [Cycle 1]: 0.00023583, [7] [b_1]: 0.00016174 [b_2]: 8.97003e-06 [updatestate_depend_eliminate]: 5.32006e-06 [updatestate_assign_eliminate]: 4.70004e-06 [updatestate_loads_eliminate]: 5.29992e-06 [renormalize]: 3.40049e-07 [cse]: 1.957e-05 [optimize_parallel_all_gather_comm]: 8.00996e-06 [overlap_param_gather]: 9.69972e-07 [cconv]: 1.94401e-05 [loop_unroll]: 0.00048513 [opt_after_cconv]: 0.00013336, [1] [Cycle 1]: 0.00012722, [7] [c_1]: 5.149e-05 [parameter_eliminate]: 2.49001e-06 [updatestate_depend_eliminate]: 8.16002e-06 [updatestate_assign_eliminate]: 4.87e-06 [updatestate_loads_eliminate]: 5.84009e-06 [cse]: 2.16401e-05 [renormalize]: 4.30038e-07 [remove_dup_value]: 1.34599e-05 [tuple_transform]: 6.87401e-05, [1] [Cycle 1]: 6.449e-05, [2] [d_1]: 5.55801e-05 [renormalize]: 2.30037e-07 [partial_unused_args_eliminate]: 1.87999e-06 [add_cache_embedding]: 1.28699e-05 [add_recomputation]: 6.133e-05 [cse_after_recomputation]: 2.676e-05, [1] [Cycle 1]: 2.23099e-05, [1] [cse]: 1.696e-05 [environ_conv]: 7.57002e-06 [swap_dp_allreduce_reducescatter]: 7.62998e-06 [bias_add_comm_swap]: 2.43995e-06 [label_micro_interleaved_index]: 1.64995e-06 [label_fine_grained_interleaved_index]: 2.39001e-06 [merge_cast_opt]: 1.11002e-06 [slice_recompute_activation]: 1.63994e-06 [micro_interleaved_order_control]: 2.21003e-06 [assign_add_opt]: 2.81599e-05 [ForceFp32Comm]: 7.89994e-07 [remove_cast_before_assign_add]: 7.09004e-06 [full_micro_interleaved_order_control]: 2.22004e-06 [reorder_send_recv_between_fp_bp]: 2.14996e-06 [comm_op_add_attrs]: 2.72599e-05 [add_comm_op_reuse_tag]: 2.40002e-06 [interleave_split_concat_branches]: 8.40053e-07 [interleave_parallel_branches]: 7.59959e-07 [overlap_opt_shard_in_pipeline]: 1.05996e-06 [overlap_opt_shard_grad_in_pipeline]: 2.21003e-06 [control_data_broadcast_order]: 1.06008e-06 [grouped_pairwise_exchange_alltoall]: 9.44e-06 [offloading_packed_experts]: 2.78e-06 [overlap_recompute_and_grad_model_parallel]: 1.65007e-06 [overlap_grad_matmul_and_grad_allreduce]: 7.3004e-07 [overlap_recompute_allgather_and_fa_grad]: 6.95799e-05 [overlap_grad_ring_attention]: 1.93994e-06 [overlap_grad_flash_sp]: 1.478e-05 [begin_end_overlap_inline]: 7.3004e-07 [split_matmul_comm_elemetwise]: 2.14006e-06 [split_layernorm_comm]: 1.93005e-06 [handle_group_info]: 4.64998e-06 [symbol_engine_optimizer]: 8.727e-05, [1] [Cycle 1]: 8.27101e-05, [6] [build]: 4.62995e-06 [elim_shapecalc]: 1.30299e-05 [elim_not_effective]: 1.643e-05 [opt_reshape]: 8.74e-06 [fold_const_symbol]: 1.35701e-05 [renormalize]: 3.89991e-07 [pipeline_parallel_scheduler]: 1.54995e-06 [auto_monad_reorder]: 2.954e-05 [get_jit_bprop_graph]: 4.69969e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00049966 [distribtued_split]: 4.10499e-05 [validate]: 3.52301e-05 [task_emit]: 0.0701339 [execute]: 1.034e-05 Sums bootstrap : 0.000356s : 0.46% type_inference : 0.002426s : 3.15% auto_monad : 0.000129s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000035s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000541s : 0.70% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000004s : 0.01% optimize.opt_a.a_2 : 0.000224s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000023s : 0.03% optimize.opt_a.parallel : 0.000010s : 0.01% optimize.opt_a.flash_sp : 0.000014s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000011s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000018s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000010s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000056s : 0.07% optimize.opt_a.renormalize : 0.000419s : 0.54% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.07% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000107s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000144s : 0.19% optimize.convert_after_rewriter : 0.000008s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000019s : 0.03% optimize.loop_unroll : 0.000485s : 0.63% optimize.opt_after_cconv.c_1 : 0.000051s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000061s : 0.08% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000028s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000027s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000009s : 0.01% optimize.offloading_packed_experts : 0.000003s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000070s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000015s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000500s : 0.65% distribtued_split : 0.000041s : 0.05% validate : 0.000035s : 0.05% task_emit : 0.070134s : 90.96% execute : 0.000010s : 0.01% TotalTime = 0.0811922, [21] [bootstrap]: 0.00030961 [type_inference]: 0.00241958 [auto_monad]: 0.00012331 [graph_reusing]: 2.14996e-06 [inline]: 1.17999e-06 [parallel-infer-symbol]: 1.8701e-06 [pre_auto_parallel]: 2.40699e-05 [insert-virtual-dataset]: 2.46998e-06 [parallel-infer-symbol-second]: 3.69968e-07 [dataset_repeat_opt]: 1.17009e-06 [pipeline_split]: 1.41002e-06 [optimize]: 0.00705454, [52] [py_interpret_to_execute]: 1.468e-05 [rewriter_before_opt_a]: 3.43299e-05 [opt_a]: 0.0052408, [2] [Cycle 1]: 0.00152431, [43] [expand_dump_flag]: 3.93996e-06 [switch_simplify]: 2.991e-05 [loop_unroll]: 1.352e-05 [a_1]: 0.00033646 [recompute_prepare]: 9.09995e-06 [updatestate_depend_eliminate]: 8.51997e-06 [updatestate_assign_eliminate]: 6.12997e-06 [updatestate_loads_eliminate]: 7.10005e-06 [parameter_eliminate]: 3.18e-06 [a_2]: 0.00011585 [accelerated_algorithm]: 8.29005e-06 [shard]: 2.37999e-06 [meta_shard_fg_expand]: 3.61004e-06 [shard_inline]: 8.77003e-06 [auto_parallel]: 1.202e-05 [parallel]: 7.39994e-06 [flash_sp]: 1.02e-05 [merge_comm]: 8.10006e-06 [allreduce_fusion]: 5.23997e-06 [matmul_add_comm_reduction]: 1.067e-05 [allreduce_slice_to_reducescatter]: 4.60073e-07 [virtual_shard_identity]: 9.34e-06 [virtual_dataset]: 7.82998e-06 [get_grad_eliminate_]: 8.03999e-06 [virtual_output]: 7.92998e-06 [merge_forward]: 6.07991e-06 [cell_reuse_recompute_pass]: 1.80991e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.71401e-05 [before_grad]: 1.371e-05 [inplace_validation]: 5.46e-06 [meta_fg_expand]: 5.41005e-06 [inplace_validation_after_expand]: 5.97001e-06 [flash_sp_send_recv_attached]: 4.58991e-06 [receive_attached]: 2.92994e-06 [after_resolve]: 1.11801e-05 [a_after_grad]: 1.242e-05 [special_op_eliminate]: 7.71997e-06 [renormalize]: 0.00043358 [add_forward_monad_depend]: 3.61993e-06 [auto_monad_grad]: 1.83005e-06 [auto_monad_eliminator]: 3.01601e-05 [cse]: 3.36e-05 [a_3]: 5.664e-05 [Cycle 2]: 0.00080325, [43] [expand_dump_flag]: 1.06997e-06 [switch_simplify]: 9.02999e-06 [loop_unroll]: 8.03999e-06 [a_1]: 0.00019956 [recompute_prepare]: 7.17002e-06 [updatestate_depend_eliminate]: 5.94009e-06 [updatestate_assign_eliminate]: 4.74008e-06 [updatestate_loads_eliminate]: 5.08991e-06 [parameter_eliminate]: 1.40001e-06 [a_2]: 0.00010404 [accelerated_algorithm]: 7.88993e-06 [shard]: 1.11002e-06 [meta_shard_fg_expand]: 2.34006e-06 [shard_inline]: 7.73999e-06 [auto_parallel]: 1.044e-05 [parallel]: 3.60003e-06 [flash_sp]: 3.28e-06 [merge_comm]: 5.74999e-06 [allreduce_fusion]: 4.71994e-06 [matmul_add_comm_reduction]: 7.87003e-06 [allreduce_slice_to_reducescatter]: 2.40048e-07 [virtual_shard_identity]: 8.42998e-06 [virtual_dataset]: 7.47992e-06 [get_grad_eliminate_]: 7.26001e-06 [virtual_output]: 6.83998e-06 [merge_forward]: 4.84998e-06 [cell_reuse_recompute_pass]: 1.91003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.516e-05 [before_grad]: 1.213e-05 [inplace_validation]: 4.43996e-06 [meta_fg_expand]: 4.72995e-06 [inplace_validation_after_expand]: 5.08002e-06 [flash_sp_send_recv_attached]: 9.10019e-07 [receive_attached]: 8.2003e-07 [after_resolve]: 9.81998e-06 [a_after_grad]: 1.214e-05 [special_op_eliminate]: 7.1401e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 9.09902e-07 [auto_monad_grad]: 1.11992e-06 [auto_monad_eliminator]: 1.849e-05 [cse]: 2.046e-05 [a_3]: 4.832e-05 [py_interpret_to_execute_after_opt_a]: 9.40997e-06 [slice_cell_reuse_recomputed_activation]: 2.70992e-06 [rewriter_after_opt_a]: 0.00014166 [convert_after_rewriter]: 1.107e-05 [order_py_execute_after_rewriter]: 6.53008e-06 [opt_b]: 0.00023999, [1] [Cycle 1]: 0.00023481, [7] [b_1]: 0.00015921 [b_2]: 9.29995e-06 [updatestate_depend_eliminate]: 5.35999e-06 [updatestate_assign_eliminate]: 4.58001e-06 [updatestate_loads_eliminate]: 5.17e-06 [renormalize]: 2.79979e-07 [cse]: 1.96099e-05 [optimize_parallel_all_gather_comm]: 8.70007e-06 [overlap_param_gather]: 1.07998e-06 [cconv]: 2.266e-05 [loop_unroll]: 0.00047664 [opt_after_cconv]: 0.00013431, [1] [Cycle 1]: 0.00012759, [7] [c_1]: 5.198e-05 [parameter_eliminate]: 2.40991e-06 [updatestate_depend_eliminate]: 8.23999e-06 [updatestate_assign_eliminate]: 4.84998e-06 [updatestate_loads_eliminate]: 5.17e-06 [cse]: 2.238e-05 [renormalize]: 3.20026e-07 [remove_dup_value]: 1.29601e-05 [tuple_transform]: 6.764e-05, [1] [Cycle 1]: 6.32e-05, [2] [d_1]: 5.394e-05 [renormalize]: 2.59955e-07 [partial_unused_args_eliminate]: 2.3701e-06 [add_cache_embedding]: 1.32699e-05 [add_recomputation]: 6.034e-05 [cse_after_recomputation]: 2.78701e-05, [1] [Cycle 1]: 2.313e-05, [1] [cse]: 1.78e-05 [environ_conv]: 7.51996e-06 [swap_dp_allreduce_reducescatter]: 7.58993e-06 [bias_add_comm_swap]: 2.13995e-06 [label_micro_interleaved_index]: 1.92004e-06 [label_fine_grained_interleaved_index]: 2.40991e-06 [merge_cast_opt]: 9.79984e-07 [slice_recompute_activation]: 1.72004e-06 [micro_interleaved_order_control]: 1.82004e-06 [assign_add_opt]: 2.899e-05 [ForceFp32Comm]: 1.04995e-06 [remove_cast_before_assign_add]: 7.26001e-06 [full_micro_interleaved_order_control]: 2.20002e-06 [reorder_send_recv_between_fp_bp]: 2.07999e-06 [comm_op_add_attrs]: 2.644e-05 [add_comm_op_reuse_tag]: 1.63005e-06 [interleave_split_concat_branches]: 8.30041e-07 [interleave_parallel_branches]: 6.89994e-07 [overlap_opt_shard_in_pipeline]: 1.10001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.10002e-06 [control_data_broadcast_order]: 1.14995e-06 [grouped_pairwise_exchange_alltoall]: 9.57993e-06 [offloading_packed_experts]: 2.27999e-06 [overlap_recompute_and_grad_model_parallel]: 1.47999e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.30041e-07 [overlap_recompute_allgather_and_fa_grad]: 7.11e-05 [overlap_grad_ring_attention]: 1.85007e-06 [overlap_grad_flash_sp]: 1.343e-05 [begin_end_overlap_inline]: 7.40052e-07 [split_matmul_comm_elemetwise]: 1.83005e-06 [split_layernorm_comm]: 1.72993e-06 [handle_group_info]: 4.48991e-06 [symbol_engine_optimizer]: 8.77799e-05, [1] [Cycle 1]: 8.285e-05, [6] [build]: 4.75999e-06 [elim_shapecalc]: 1.23e-05 [elim_not_effective]: 1.651e-05 [opt_reshape]: 8.67993e-06 [fold_const_symbol]: 1.33e-05 [renormalize]: 2.79979e-07 [pipeline_parallel_scheduler]: 1.66998e-06 [auto_monad_reorder]: 2.907e-05 [get_jit_bprop_graph]: 4.89992e-07 [rewriter_after_jit_bprop_graph]: 4.70085e-07 [eliminate_special_op_node]: 0.00049538 [distribtued_split]: 3.962e-05 [validate]: 3.458e-05 [task_emit]: 0.0704033 [execute]: 1.054e-05 Sums bootstrap : 0.000310s : 0.40% type_inference : 0.002420s : 3.13% auto_monad : 0.000123s : 0.16% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000024s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000015s : 0.02% optimize.rewriter_before_opt_a : 0.000034s : 0.04% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000039s : 0.05% optimize.opt_a.loop_unroll : 0.000022s : 0.03% optimize.opt_a.a_1 : 0.000536s : 0.69% optimize.opt_a.recompute_prepare : 0.000016s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000016s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000017s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000011s : 0.01% optimize.opt_a.flash_sp : 0.000013s : 0.02% optimize.opt_a.merge_comm : 0.000014s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000010s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000011s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000004s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000434s : 0.56% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000049s : 0.06% optimize.opt_a.cse : 0.000054s : 0.07% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000142s : 0.18% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000007s : 0.01% optimize.opt_b.b_1 : 0.000159s : 0.21% optimize.opt_b.b_2 : 0.000009s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000009s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000477s : 0.62% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000022s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000013s : 0.02% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000060s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000026s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000010s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000001s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000071s : 0.09% optimize.overlap_grad_ring_attention : 0.000002s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000004s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000012s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000017s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000013s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000029s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000495s : 0.64% distribtued_split : 0.000040s : 0.05% validate : 0.000035s : 0.04% task_emit : 0.070403s : 91.13% execute : 0.000011s : 0.01% TotalTime = 0.0812463, [21] [bootstrap]: 0.00033455 [type_inference]: 0.00237037 [auto_monad]: 0.00010028 [graph_reusing]: 1.64006e-06 [inline]: 1.29e-06 [parallel-infer-symbol]: 1.52003e-06 [pre_auto_parallel]: 2.005e-05 [insert-virtual-dataset]: 1.64995e-06 [parallel-infer-symbol-second]: 3.49944e-07 [dataset_repeat_opt]: 6.6997e-07 [pipeline_split]: 1.02993e-06 [optimize]: 0.00713458, [52] [py_interpret_to_execute]: 1.244e-05 [rewriter_before_opt_a]: 3.049e-05 [opt_a]: 0.00539912, [2] [Cycle 1]: 0.00141536, [43] [expand_dump_flag]: 2.13995e-06 [switch_simplify]: 2.50499e-05 [loop_unroll]: 1.311e-05 [a_1]: 0.00032148 [recompute_prepare]: 9.25001e-06 [updatestate_depend_eliminate]: 7.18003e-06 [updatestate_assign_eliminate]: 5.40004e-06 [updatestate_loads_eliminate]: 5.79003e-06 [parameter_eliminate]: 2.05007e-06 [a_2]: 0.00011508 [accelerated_algorithm]: 8.64e-06 [shard]: 1.42003e-06 [meta_shard_fg_expand]: 2.98e-06 [shard_inline]: 8.67003e-06 [auto_parallel]: 1.151e-05 [parallel]: 5.02996e-06 [flash_sp]: 8.10996e-06 [merge_comm]: 7.16001e-06 [allreduce_fusion]: 5.35999e-06 [matmul_add_comm_reduction]: 8.89006e-06 [allreduce_slice_to_reducescatter]: 3.29921e-07 [virtual_shard_identity]: 1.006e-05 [virtual_dataset]: 8.08004e-06 [get_grad_eliminate_]: 7.68004e-06 [virtual_output]: 7.31996e-06 [merge_forward]: 4.85999e-06 [cell_reuse_recompute_pass]: 1.14995e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.629e-05 [before_grad]: 1.39601e-05 [inplace_validation]: 4.62995e-06 [meta_fg_expand]: 5.20004e-06 [inplace_validation_after_expand]: 5.2799e-06 [flash_sp_send_recv_attached]: 3.46999e-06 [receive_attached]: 1.90001e-06 [after_resolve]: 1.062e-05 [a_after_grad]: 1.239e-05 [special_op_eliminate]: 7.87003e-06 [renormalize]: 0.0003949 [add_forward_monad_depend]: 2.41003e-06 [auto_monad_grad]: 1.40001e-06 [auto_monad_eliminator]: 2.371e-05 [cse]: 2.55399e-05 [a_3]: 5.632e-05 [Cycle 2]: 0.00080506, [43] [expand_dump_flag]: 9.2003e-07 [switch_simplify]: 8.99995e-06 [loop_unroll]: 7.98993e-06 [a_1]: 0.00020031 [recompute_prepare]: 7.30995e-06 [updatestate_depend_eliminate]: 5.83008e-06 [updatestate_assign_eliminate]: 5.13007e-06 [updatestate_loads_eliminate]: 5.01005e-06 [parameter_eliminate]: 1.15007e-06 [a_2]: 0.00010511 [accelerated_algorithm]: 8.51997e-06 [shard]: 9.59961e-07 [meta_shard_fg_expand]: 2.69001e-06 [shard_inline]: 7.53999e-06 [auto_parallel]: 9.82999e-06 [parallel]: 3.02005e-06 [flash_sp]: 2.27999e-06 [merge_comm]: 5.61995e-06 [allreduce_fusion]: 4.74998e-06 [matmul_add_comm_reduction]: 7.22008e-06 [allreduce_slice_to_reducescatter]: 2.59955e-07 [virtual_shard_identity]: 9.16002e-06 [virtual_dataset]: 7.86001e-06 [get_grad_eliminate_]: 7.50006e-06 [virtual_output]: 7.30995e-06 [merge_forward]: 4.34008e-06 [cell_reuse_recompute_pass]: 1.57999e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.518e-05 [before_grad]: 1.236e-05 [inplace_validation]: 3.89002e-06 [meta_fg_expand]: 4.88991e-06 [inplace_validation_after_expand]: 4.87e-06 [flash_sp_send_recv_attached]: 8.10018e-07 [receive_attached]: 7.10017e-07 [after_resolve]: 3.575e-05 [a_after_grad]: 1.27699e-05 [special_op_eliminate]: 7.56001e-06 [renormalize]: 6.99656e-08 [add_forward_monad_depend]: 8.89995e-07 [auto_monad_grad]: 8.89995e-07 [auto_monad_eliminator]: 1.62701e-05 [cse]: 1.889e-05 [a_3]: 4.87e-05 [py_interpret_to_execute_after_opt_a]: 8.84e-06 [slice_cell_reuse_recomputed_activation]: 1.36998e-06 [rewriter_after_opt_a]: 0.00012639 [convert_after_rewriter]: 1.081e-05 [order_py_execute_after_rewriter]: 5.20004e-06 [opt_b]: 0.00024166, [1] [Cycle 1]: 0.00023676, [7] [b_1]: 0.00016216 [b_2]: 9.81998e-06 [updatestate_depend_eliminate]: 4.95999e-06 [updatestate_assign_eliminate]: 4.41005e-06 [updatestate_loads_eliminate]: 4.97e-06 [renormalize]: 2.59955e-07 [cse]: 1.829e-05 [optimize_parallel_all_gather_comm]: 7.59994e-06 [overlap_param_gather]: 1.06997e-06 [cconv]: 1.47399e-05 [loop_unroll]: 0.00048028 [opt_after_cconv]: 0.00012787, [1] [Cycle 1]: 0.00012213, [7] [c_1]: 5.162e-05 [parameter_eliminate]: 1.66998e-06 [updatestate_depend_eliminate]: 7.22997e-06 [updatestate_assign_eliminate]: 4.62995e-06 [updatestate_loads_eliminate]: 4.95999e-06 [cse]: 2e-05 [renormalize]: 4.39934e-07 [remove_dup_value]: 9.69996e-06 [tuple_transform]: 6.687e-05, [1] [Cycle 1]: 6.273e-05, [2] [d_1]: 5.377e-05 [renormalize]: 1.60071e-07 [partial_unused_args_eliminate]: 1.33005e-06 [add_cache_embedding]: 1.11799e-05 [add_recomputation]: 5.484e-05 [cse_after_recomputation]: 2.626e-05, [1] [Cycle 1]: 2.206e-05, [1] [cse]: 1.718e-05 [environ_conv]: 6.43998e-06 [swap_dp_allreduce_reducescatter]: 6.86001e-06 [bias_add_comm_swap]: 1.94996e-06 [label_micro_interleaved_index]: 1.26997e-06 [label_fine_grained_interleaved_index]: 1.05996e-06 [merge_cast_opt]: 7.00005e-07 [slice_recompute_activation]: 1.43005e-06 [micro_interleaved_order_control]: 1.22003e-06 [assign_add_opt]: 2.44799e-05 [ForceFp32Comm]: 7.79983e-07 [remove_cast_before_assign_add]: 6.12007e-06 [full_micro_interleaved_order_control]: 1.12993e-06 [reorder_send_recv_between_fp_bp]: 1.03004e-06 [comm_op_add_attrs]: 2.44201e-05 [add_comm_op_reuse_tag]: 1.59e-06 [interleave_split_concat_branches]: 9.10019e-07 [interleave_parallel_branches]: 5.10016e-07 [overlap_opt_shard_in_pipeline]: 1.59e-06 [overlap_opt_shard_grad_in_pipeline]: 1.21002e-06 [control_data_broadcast_order]: 6.40051e-07 [grouped_pairwise_exchange_alltoall]: 7.05989e-06 [offloading_packed_experts]: 1.25007e-06 [overlap_recompute_and_grad_model_parallel]: 1.74996e-06 [overlap_grad_matmul_and_grad_allreduce]: 4.89992e-07 [overlap_recompute_allgather_and_fa_grad]: 5.68799e-05 [overlap_grad_ring_attention]: 1.13004e-06 [overlap_grad_flash_sp]: 1.254e-05 [begin_end_overlap_inline]: 4.69969e-07 [split_matmul_comm_elemetwise]: 1.40001e-06 [split_layernorm_comm]: 1.05007e-06 [handle_group_info]: 3.48e-06 [symbol_engine_optimizer]: 9.079e-05, [1] [Cycle 1]: 8.598e-05, [6] [build]: 4.81994e-06 [elim_shapecalc]: 1.347e-05 [elim_not_effective]: 1.62999e-05 [opt_reshape]: 9.49006e-06 [fold_const_symbol]: 1.423e-05 [renormalize]: 3.50061e-07 [pipeline_parallel_scheduler]: 9.00007e-07 [auto_monad_reorder]: 2.542e-05 [get_jit_bprop_graph]: 3.39933e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00050204 [distribtued_split]: 3.32301e-05 [validate]: 3.096e-05 [task_emit]: 0.0704468 [execute]: 7.92998e-06 Sums bootstrap : 0.000335s : 0.43% type_inference : 0.002370s : 3.08% auto_monad : 0.000100s : 0.13% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000002s : 0.00% pre_auto_parallel : 0.000020s : 0.03% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000001s : 0.00% optimize.py_interpret_to_execute : 0.000012s : 0.02% optimize.rewriter_before_opt_a : 0.000030s : 0.04% optimize.opt_a.expand_dump_flag : 0.000003s : 0.00% optimize.opt_a.switch_simplify : 0.000034s : 0.04% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000522s : 0.68% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000013s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000011s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.01% optimize.opt_a.parameter_eliminate : 0.000003s : 0.00% optimize.opt_a.a_2 : 0.000220s : 0.29% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000002s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000021s : 0.03% optimize.opt_a.parallel : 0.000008s : 0.01% optimize.opt_a.flash_sp : 0.000010s : 0.01% optimize.opt_a.merge_comm : 0.000013s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000016s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000019s : 0.02% optimize.opt_a.virtual_dataset : 0.000016s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000009s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000003s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000031s : 0.04% optimize.opt_a.before_grad : 0.000026s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000010s : 0.01% optimize.opt_a.flash_sp_send_recv_attached : 0.000004s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000046s : 0.06% optimize.opt_a.a_after_grad : 0.000025s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000395s : 0.51% optimize.opt_a.add_forward_monad_depend : 0.000003s : 0.00% optimize.opt_a.auto_monad_grad : 0.000002s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000040s : 0.05% optimize.opt_a.cse : 0.000044s : 0.06% optimize.opt_a.a_3 : 0.000105s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000001s : 0.00% optimize.rewriter_after_opt_a : 0.000126s : 0.16% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000005s : 0.01% optimize.opt_b.b_1 : 0.000162s : 0.21% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000004s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000018s : 0.02% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000015s : 0.02% optimize.loop_unroll : 0.000480s : 0.62% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000002s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000007s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.cse : 0.000020s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000010s : 0.01% optimize.tuple_transform.d_1 : 0.000054s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000001s : 0.00% optimize.add_cache_embedding : 0.000011s : 0.01% optimize.add_recomputation : 0.000055s : 0.07% optimize.cse_after_recomputation.cse : 0.000017s : 0.02% optimize.environ_conv : 0.000006s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000007s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000001s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000001s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000001s : 0.00% optimize.micro_interleaved_order_control : 0.000001s : 0.00% optimize.assign_add_opt : 0.000024s : 0.03% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000006s : 0.01% optimize.full_micro_interleaved_order_control : 0.000001s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000001s : 0.00% optimize.comm_op_add_attrs : 0.000024s : 0.03% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000001s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000007s : 0.01% optimize.offloading_packed_experts : 0.000001s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000000s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000057s : 0.07% optimize.overlap_grad_ring_attention : 0.000001s : 0.00% optimize.overlap_grad_flash_sp : 0.000013s : 0.02% optimize.begin_end_overlap_inline : 0.000000s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000001s : 0.00% optimize.split_layernorm_comm : 0.000001s : 0.00% optimize.handle_group_info : 0.000003s : 0.00% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000013s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000016s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000009s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000001s : 0.00% auto_monad_reorder : 0.000025s : 0.03% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000502s : 0.65% distribtued_split : 0.000033s : 0.04% validate : 0.000031s : 0.04% task_emit : 0.070447s : 91.40% execute : 0.000008s : 0.01% Time group info: ------[substitution.] 0.000130 63 4.98% : 0.000006s : 2: substitution.depend_value_elim 1.86% : 0.000002s : 5: substitution.elim_not_effective 1.73% : 0.000002s : 5: substitution.fold_const_symbol 5.44% : 0.000007s : 6: substitution.graph_param_transform 50.56% : 0.000066s : 1: substitution.inline 4.09% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.24% : 0.000004s : 6: substitution.load_eliminater 2.63% : 0.000003s : 2: substitution.reduce_all_const_elim 5.95% : 0.000008s : 10: substitution.remove_not_recompute_node 2.72% : 0.000004s : 2: substitution.replace_old_param 8.46% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 8.33% : 0.000011s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002396 2 89.19% : 0.002137s : 1: type_inference.infer 10.81% : 0.000259s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000064 1 100.00% : 0.000064s : 1: match.inline ------[predicate.] 0.000229 1420 0.77% : 0.000002s : 13: predicate.accumulaten_eliminater 0.99% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.73% : 0.000002s : 12: predicate.addn_check_dump 0.77% : 0.000002s : 13: predicate.addn_zero_filter 0.81% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.12% : 0.000005s : 25: predicate.arithmetic_simplify 0.90% : 0.000002s : 13: predicate.cast_eliminate 0.83% : 0.000002s : 12: predicate.check_bprop_eliminate 0.72% : 0.000002s : 12: predicate.compare_switch_simplify 0.23% : 0.000001s : 6: predicate.const_output_eliminate 0.40% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.49% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.82% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.88% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.87% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.30% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.15% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.13% : 0.000003s : 19: predicate.environ_get_add_eliminate 1.11% : 0.000003s : 19: predicate.environ_get_depend_swap 2.01% : 0.000005s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.81% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.30% : 0.000003s : 14: predicate.float_depend_g_call 0.81% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.82% : 0.000002s : 12: predicate.get_grad_eliminate 0.37% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.75% : 0.000013s : 63: predicate.inline 0.98% : 0.000002s : 12: predicate.inline_without_move 0.40% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.02% : 0.000002s : 12: predicate.less_batch_normalization 1.71% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.38% : 0.000005s : 38: predicate.load_eliminater 1.30% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.32% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.76% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.77% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.79% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.77% : 0.000002s : 13: predicate.minmaximum_grad 0.77% : 0.000002s : 6: predicate.mutable_eliminate 0.52% : 0.000001s : 6: predicate.opt_reshape 0.49% : 0.000001s : 6: predicate.parallel_virtual_node 1.07% : 0.000002s : 14: predicate.partial_defer_inline 1.32% : 0.000003s : 19: predicate.partial_eliminate 0.79% : 0.000002s : 13: predicate.print_const_string_wrapper 0.87% : 0.000002s : 12: predicate.reduce_all_const_elim 1.03% : 0.000002s : 13: predicate.reduce_eliminate 0.70% : 0.000002s : 12: predicate.remove_not_recompute_node 1.15% : 0.000003s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.99% : 0.000002s : 13: predicate.reshape_eliminate 0.80% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.46% : 0.000001s : 6: predicate.row_tensor_eliminate 0.97% : 0.000002s : 12: predicate.same_eliminate 0.48% : 0.000001s : 12: predicate.set_cell_output_no_recompute 1.00% : 0.000002s : 12: predicate.shard_identity_eliminate 1.47% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.10% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 1.01% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.29% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.91% : 0.000002s : 14: predicate.switch_defer_inline 1.68% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.31% : 0.000010s : 43: predicate.switch_simplify 0.78% : 0.000002s : 13: predicate.tile_eliminate 0.92% : 0.000002s : 13: predicate.transpose_eliminate 1.73% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.72% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.56% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.73% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.54% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.72% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.40% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.32% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.51% : 0.000001s : 6: predicate.value_based_eliminate 0.85% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.80% : 0.000002s : 12: predicate.virtual_output_eliminate 0.45% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000136 4 9.66% : 0.000013s : 1: func_graph_cloner_run.FuncGraphClonerGraph 90.34% : 0.000123s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090042 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000016s : 1: add_cache_embedding 0.01% : 0.000010s : 1: add_comm_op_reuse_tag 0.07% : 0.000066s : 1: add_recomputation 0.04% : 0.000032s : 1: assign_add_opt 0.16% : 0.000141s : 1: auto_monad 0.04% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.42% : 0.000381s : 1: bootstrap 0.03% : 0.000023s : 1: cconv 0.04% : 0.000032s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.01% : 0.000012s : 1: convert_after_rewriter 0.03% : 0.000030s : 1: cse_after_recomputation 0.01% : 0.000007s : 1: dataset_repeat_opt 0.05% : 0.000049s : 1: distribtued_split 0.57% : 0.000514s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000019s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.55% : 0.000495s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.27% : 0.001141s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000152s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000048s : 4: opt.transform.symbol_engine_opt 5.91% : 0.005318s : 1: opt_a 0.15% : 0.000137s : 1: opt_after_cconv 0.27% : 0.000244s : 1: opt_b 7.93% : 0.007141s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000018s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000075s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000004s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.25% : 0.000228s : 1: renormalize.infer 0.20% : 0.000184s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.17% : 0.000149s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000090s : 1: symbol_engine_optimizer 77.92% : 0.070163s : 1: task_emit 0.08% : 0.000072s : 1: tuple_transform 2.71% : 0.002444s : 1: type_inference 0.08% : 0.000068s : 1: validate Time group info: ------[substitution.] 0.000128 63 5.21% : 0.000007s : 2: substitution.depend_value_elim 1.88% : 0.000002s : 5: substitution.elim_not_effective 1.91% : 0.000002s : 5: substitution.fold_const_symbol 5.34% : 0.000007s : 6: substitution.graph_param_transform 49.94% : 0.000064s : 1: substitution.inline 4.08% : 0.000005s : 10: substitution.j_node_and_user_rematch 3.16% : 0.000004s : 6: substitution.load_eliminater 2.78% : 0.000004s : 2: substitution.reduce_all_const_elim 6.34% : 0.000008s : 10: substitution.remove_not_recompute_node 2.62% : 0.000003s : 2: substitution.replace_old_param 8.88% : 0.000011s : 6: substitution.updatestate_pure_node_eliminater 7.85% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002390 2 88.82% : 0.002123s : 1: type_inference.infer 11.18% : 0.000267s : 1: type_inference.specialize ------[replace.] 0.000012 1 100.00% : 0.000012s : 1: replace.inline ------[match.] 0.000063 1 100.00% : 0.000063s : 1: match.inline ------[predicate.] 0.000226 1420 0.78% : 0.000002s : 13: predicate.accumulaten_eliminater 1.19% : 0.000003s : 6: predicate.ad_related_special_op_eliminate 0.70% : 0.000002s : 12: predicate.addn_check_dump 0.76% : 0.000002s : 13: predicate.addn_zero_filter 0.75% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.22% : 0.000005s : 25: predicate.arithmetic_simplify 0.84% : 0.000002s : 13: predicate.cast_eliminate 0.82% : 0.000002s : 12: predicate.check_bprop_eliminate 0.76% : 0.000002s : 12: predicate.compare_switch_simplify 0.21% : 0.000000s : 6: predicate.const_output_eliminate 0.51% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.45% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.83% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.86% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.88% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.29% : 0.000001s : 6: predicate.elim_not_effective 0.60% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.09% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000002s : 19: predicate.environ_get_depend_swap 1.94% : 0.000004s : 31: predicate.environ_get_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.84% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.29% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.23% : 0.000001s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.34% : 0.000001s : 6: predicate.graph_param_transform 0.78% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.72% : 0.000013s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.42% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.00% : 0.000002s : 12: predicate.less_batch_normalization 1.75% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.46% : 0.000006s : 38: predicate.load_eliminater 1.20% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.24% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.82% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.75% : 0.000002s : 12: predicate.merge_addn 0.78% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.77% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.75% : 0.000002s : 13: predicate.minmaximum_grad 0.76% : 0.000002s : 6: predicate.mutable_eliminate 0.52% : 0.000001s : 6: predicate.opt_reshape 0.46% : 0.000001s : 6: predicate.parallel_virtual_node 1.07% : 0.000002s : 14: predicate.partial_defer_inline 1.23% : 0.000003s : 19: predicate.partial_eliminate 0.76% : 0.000002s : 13: predicate.print_const_string_wrapper 0.89% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000002s : 13: predicate.reduce_eliminate 0.68% : 0.000002s : 12: predicate.remove_not_recompute_node 1.11% : 0.000003s : 25: predicate.replace_applicator 0.50% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.92% : 0.000002s : 13: predicate.reshape_eliminate 0.82% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.49% : 0.000001s : 6: predicate.row_tensor_eliminate 1.04% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.92% : 0.000002s : 12: predicate.shard_identity_eliminate 1.30% : 0.000003s : 18: predicate.special_op_eliminate 0.98% : 0.000002s : 12: predicate.specialize_transform 1.07% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.99% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.28% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.95% : 0.000002s : 14: predicate.switch_defer_inline 1.66% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.47% : 0.000010s : 43: predicate.switch_simplify 0.75% : 0.000002s : 13: predicate.tile_eliminate 0.81% : 0.000002s : 13: predicate.transpose_eliminate 1.75% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.67% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.44% : 0.000003s : 25: predicate.tuple_list_get_item_depend_reorder 2.74% : 0.000006s : 37: predicate.tuple_list_get_item_eliminator 1.65% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.62% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.43% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.39% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.53% : 0.000001s : 6: predicate.value_based_eliminate 0.83% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.85% : 0.000002s : 12: predicate.virtual_output_eliminate 0.53% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000149 4 10.77% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.23% : 0.000133s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090040 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.07% : 0.000065s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.15% : 0.000135s : 1: auto_monad 0.04% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000006s : 1: bias_add_comm_swap 0.37% : 0.000333s : 1: bootstrap 0.03% : 0.000027s : 1: cconv 0.03% : 0.000031s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000048s : 1: distribtued_split 0.56% : 0.000508s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000020s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000013s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.54% : 0.000486s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001086s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000149s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000031s : 3: opt.transform.special_op_eliminate 0.05% : 0.000047s : 4: opt.transform.symbol_engine_opt 5.82% : 0.005244s : 1: opt_a 0.15% : 0.000138s : 1: opt_after_cconv 0.27% : 0.000243s : 1: opt_b 7.84% : 0.007062s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000017s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.08% : 0.000076s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000006s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000030s : 1: pre_auto_parallel 0.02% : 0.000019s : 1: py_interpret_to_execute 0.02% : 0.000014s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000017s : 1: remove_dup_value 0.26% : 0.000236s : 1: renormalize.infer 0.21% : 0.000192s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.16% : 0.000147s : 1: rewriter_after_opt_a 0.04% : 0.000038s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000091s : 1: symbol_engine_optimizer 78.22% : 0.070431s : 1: task_emit 0.08% : 0.000071s : 1: tuple_transform 2.71% : 0.002437s : 1: type_inference 0.08% : 0.000069s : 1: validate Time group info: ------[substitution.] 0.000134 63 3.74% : 0.000005s : 2: substitution.depend_value_elim 1.61% : 0.000002s : 5: substitution.elim_not_effective 2.20% : 0.000003s : 5: substitution.fold_const_symbol 4.57% : 0.000006s : 6: substitution.graph_param_transform 38.96% : 0.000052s : 1: substitution.inline 3.97% : 0.000005s : 10: substitution.j_node_and_user_rematch 2.68% : 0.000004s : 6: substitution.load_eliminater 1.94% : 0.000003s : 2: substitution.reduce_all_const_elim 5.37% : 0.000007s : 10: substitution.remove_not_recompute_node 21.06% : 0.000028s : 2: substitution.replace_old_param 7.41% : 0.000010s : 6: substitution.updatestate_pure_node_eliminater 6.49% : 0.000009s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002345 2 90.52% : 0.002123s : 1: type_inference.infer 9.48% : 0.000222s : 1: type_inference.specialize ------[replace.] 0.000010 1 100.00% : 0.000010s : 1: replace.inline ------[match.] 0.000051 1 100.00% : 0.000051s : 1: match.inline ------[predicate.] 0.000229 1420 0.86% : 0.000002s : 13: predicate.accumulaten_eliminater 1.04% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.81% : 0.000002s : 13: predicate.addn_zero_filter 0.72% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.28% : 0.000005s : 25: predicate.arithmetic_simplify 0.78% : 0.000002s : 13: predicate.cast_eliminate 0.88% : 0.000002s : 12: predicate.check_bprop_eliminate 0.74% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000001s : 6: predicate.const_output_eliminate 0.47% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.38% : 0.000003s : 13: predicate.convert_tensor_eliminate 0.89% : 0.000002s : 12: predicate.depend_value_elim 0.78% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.83% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.84% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.24% : 0.000001s : 6: predicate.elim_not_effective 0.57% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.25% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.14% : 0.000003s : 19: predicate.environ_get_depend_swap 1.91% : 0.000004s : 31: predicate.environ_get_eliminate 1.07% : 0.000002s : 19: predicate.environ_get_set_eliminate 0.80% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.20% : 0.000003s : 14: predicate.float_depend_g_call 0.77% : 0.000002s : 12: predicate.float_environ_get_switch 1.06% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.22% : 0.000001s : 6: predicate.fold_const_symbol 0.91% : 0.000002s : 12: predicate.get_grad_eliminate 0.32% : 0.000001s : 6: predicate.graph_param_transform 0.79% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.40% : 0.000012s : 63: predicate.inline 1.05% : 0.000002s : 12: predicate.inline_without_move 0.43% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.03% : 0.000002s : 12: predicate.less_batch_normalization 1.62% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.46% : 0.000006s : 38: predicate.load_eliminater 1.27% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.26% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.78% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.77% : 0.000002s : 12: predicate.merge_addn 0.75% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.73% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.76% : 0.000002s : 13: predicate.minmaximum_grad 0.68% : 0.000002s : 6: predicate.mutable_eliminate 0.50% : 0.000001s : 6: predicate.opt_reshape 0.58% : 0.000001s : 6: predicate.parallel_virtual_node 1.18% : 0.000003s : 14: predicate.partial_defer_inline 1.24% : 0.000003s : 19: predicate.partial_eliminate 0.92% : 0.000002s : 13: predicate.print_const_string_wrapper 1.12% : 0.000003s : 12: predicate.reduce_all_const_elim 1.08% : 0.000002s : 13: predicate.reduce_eliminate 0.60% : 0.000001s : 12: predicate.remove_not_recompute_node 1.08% : 0.000002s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.23% : 0.000001s : 6: predicate.reset_defer_inline 0.90% : 0.000002s : 13: predicate.reshape_eliminate 0.79% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.54% : 0.000001s : 6: predicate.row_tensor_eliminate 1.05% : 0.000002s : 12: predicate.same_eliminate 0.46% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.95% : 0.000002s : 12: predicate.shard_identity_eliminate 1.41% : 0.000003s : 18: predicate.special_op_eliminate 0.99% : 0.000002s : 12: predicate.specialize_transform 1.10% : 0.000003s : 12: predicate.split_environ_get_set_with_tuple_value 0.92% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.33% : 0.000005s : 38: predicate.stopgrad_eliminater 0.44% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.95% : 0.000002s : 14: predicate.switch_defer_inline 1.57% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.26% : 0.000010s : 43: predicate.switch_simplify 0.87% : 0.000002s : 13: predicate.tile_eliminate 0.92% : 0.000002s : 13: predicate.transpose_eliminate 1.78% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.71% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 2.84% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.55% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.51% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.64% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.30% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.58% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.55% : 0.000001s : 6: predicate.value_based_eliminate 0.84% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.79% : 0.000002s : 12: predicate.virtual_output_eliminate 0.52% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000125 4 7.91% : 0.000010s : 1: func_graph_cloner_run.FuncGraphClonerGraph 92.09% : 0.000115s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.090147 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000015s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.07% : 0.000059s : 1: add_recomputation 0.03% : 0.000029s : 1: assign_add_opt 0.12% : 0.000112s : 1: auto_monad 0.03% : 0.000031s : 1: auto_monad_reorder 0.00% : 0.000003s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.40% : 0.000357s : 1: bootstrap 0.02% : 0.000019s : 1: cconv 0.03% : 0.000029s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000029s : 1: cse_after_recomputation 0.01% : 0.000005s : 1: dataset_repeat_opt 0.05% : 0.000041s : 1: distribtued_split 0.57% : 0.000515s : 1: eliminate_special_op_node 0.01% : 0.000010s : 1: environ_conv 0.02% : 0.000016s : 1: execute 0.00% : 0.000004s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000008s : 1: graph_reusing 0.01% : 0.000010s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000007s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000003s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000004s : 1: label_fine_grained_interleaved_index 0.00% : 0.000004s : 1: label_micro_interleaved_index 0.54% : 0.000489s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000014s : 1: opt.transform.loop_unroll_optimizer 1.22% : 0.001097s : 80: opt.transform.opt_a 0.06% : 0.000050s : 1: opt.transform.opt_after_cconv 0.17% : 0.000153s : 27: opt.transform.opt_b 0.06% : 0.000052s : 1: opt.transform.opt_trans_graph 0.03% : 0.000030s : 3: opt.transform.special_op_eliminate 0.06% : 0.000050s : 4: opt.transform.symbol_engine_opt 5.99% : 0.005403s : 1: opt_a 0.15% : 0.000132s : 1: opt_after_cconv 0.27% : 0.000245s : 1: opt_b 7.92% : 0.007142s : 1: optimize 0.01% : 0.000011s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000009s : 1: order_py_execute_after_rewriter 0.02% : 0.000016s : 1: overlap_grad_flash_sp 0.00% : 0.000003s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000005s : 1: overlap_grad_ring_attention 0.00% : 0.000004s : 1: overlap_opt_shard_grad_in_pipeline 0.01% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000004s : 1: overlap_param_gather 0.07% : 0.000062s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000006s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.00% : 0.000004s : 1: partial_unused_args_eliminate 0.01% : 0.000006s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.03% : 0.000026s : 1: pre_auto_parallel 0.02% : 0.000017s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000014s : 1: remove_dup_value 0.24% : 0.000213s : 1: renormalize.infer 0.20% : 0.000177s : 1: renormalize.specialize 0.00% : 0.000004s : 1: reorder_send_recv_between_fp_bp 0.01% : 0.000005s : 1: rewriter_after_jit_bprop_graph 0.15% : 0.000132s : 1: rewriter_after_opt_a 0.04% : 0.000035s : 1: rewriter_before_opt_a 0.01% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000004s : 1: split_matmul_comm_elemetwise 0.01% : 0.000010s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000094s : 1: symbol_engine_optimizer 78.17% : 0.070471s : 1: task_emit 0.08% : 0.000070s : 1: tuple_transform 2.65% : 0.002387s : 1: type_inference 0.07% : 0.000064s : 1: validate TotalTime = 0.0821467, [21] [bootstrap]: 0.00038997 [type_inference]: 0.0026024 [auto_monad]: 0.00013349 [graph_reusing]: 2.32004e-06 [inline]: 1.41002e-06 [parallel-infer-symbol]: 2.93995e-06 [pre_auto_parallel]: 2.62699e-05 [insert-virtual-dataset]: 3.01003e-06 [parallel-infer-symbol-second]: 4.29922e-07 [dataset_repeat_opt]: 1.49e-06 [pipeline_split]: 1.63005e-06 [optimize]: 0.00751342, [52] [py_interpret_to_execute]: 1.566e-05 [rewriter_before_opt_a]: 3.69999e-05 [opt_a]: 0.00561048, [2] [Cycle 1]: 0.00152373, [43] [expand_dump_flag]: 4.12995e-06 [switch_simplify]: 3.02399e-05 [loop_unroll]: 1.30699e-05 [a_1]: 0.00034229 [recompute_prepare]: 8.86002e-06 [updatestate_depend_eliminate]: 8.72998e-06 [updatestate_assign_eliminate]: 5.87001e-06 [updatestate_loads_eliminate]: 7.49005e-06 [parameter_eliminate]: 3.11004e-06 [a_2]: 0.00011573 [accelerated_algorithm]: 8.56002e-06 [shard]: 2.00002e-06 [meta_shard_fg_expand]: 3.50003e-06 [shard_inline]: 8.27992e-06 [auto_parallel]: 1.14799e-05 [parallel]: 8.19005e-06 [flash_sp]: 1.159e-05 [merge_comm]: 8.64e-06 [allreduce_fusion]: 5.12996e-06 [matmul_add_comm_reduction]: 1.102e-05 [allreduce_slice_to_reducescatter]: 4.50062e-07 [virtual_shard_identity]: 9.54e-06 [virtual_dataset]: 7.71997e-06 [get_grad_eliminate_]: 8.02998e-06 [virtual_output]: 7.81007e-06 [merge_forward]: 5.99993e-06 [cell_reuse_recompute_pass]: 1.96998e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.64801e-05 [before_grad]: 1.403e-05 [inplace_validation]: 4.77e-06 [meta_fg_expand]: 5.33997e-06 [inplace_validation_after_expand]: 6.70005e-06 [flash_sp_send_recv_attached]: 4.67e-06 [receive_attached]: 2.50002e-06 [after_resolve]: 1.192e-05 [a_after_grad]: 1.28699e-05 [special_op_eliminate]: 7.66001e-06 [renormalize]: 0.0004254 [add_forward_monad_depend]: 3.75998e-06 [auto_monad_grad]: 2.30002e-06 [auto_monad_eliminator]: 3.19099e-05 [cse]: 3.53401e-05 [a_3]: 5.774e-05 [Cycle 2]: 0.00080467, [43] [expand_dump_flag]: 1.12993e-06 [switch_simplify]: 1.01899e-05 [loop_unroll]: 8.27003e-06 [a_1]: 0.00020745 [recompute_prepare]: 7.96001e-06 [updatestate_depend_eliminate]: 5.72996e-06 [updatestate_assign_eliminate]: 4.44998e-06 [updatestate_loads_eliminate]: 5.11995e-06 [parameter_eliminate]: 1.46998e-06 [a_2]: 0.0001045 [accelerated_algorithm]: 8.42998e-06 [shard]: 1.15007e-06 [meta_shard_fg_expand]: 2.39001e-06 [shard_inline]: 7.70995e-06 [auto_parallel]: 1.07001e-05 [parallel]: 3.63006e-06 [flash_sp]: 3.65009e-06 [merge_comm]: 5.97001e-06 [allreduce_fusion]: 4.87e-06 [matmul_add_comm_reduction]: 7.62998e-06 [allreduce_slice_to_reducescatter]: 2.89991e-07 [virtual_shard_identity]: 8.68004e-06 [virtual_dataset]: 7.77992e-06 [get_grad_eliminate_]: 7.32008e-06 [virtual_output]: 6.97991e-06 [merge_forward]: 4.71994e-06 [cell_reuse_recompute_pass]: 1.81003e-06 [cell_reuse_handle_not_recompute_node_pass]: 1.51501e-05 [before_grad]: 1.25701e-05 [inplace_validation]: 4.07e-06 [meta_fg_expand]: 4.48991e-06 [inplace_validation_after_expand]: 4.99003e-06 [flash_sp_send_recv_attached]: 8.2003e-07 [receive_attached]: 7.39936e-07 [after_resolve]: 9.41008e-06 [a_after_grad]: 1.15001e-05 [special_op_eliminate]: 7.01996e-06 [renormalize]: 1.00001e-07 [add_forward_monad_depend]: 9.10019e-07 [auto_monad_grad]: 1.10001e-06 [auto_monad_eliminator]: 1.80299e-05 [cse]: 2.08899e-05 [a_3]: 4.869e-05 [py_interpret_to_execute_after_opt_a]: 8.74e-06 [slice_cell_reuse_recomputed_activation]: 2.50991e-06 [rewriter_after_opt_a]: 0.0001581 [convert_after_rewriter]: 1.062e-05 [order_py_execute_after_rewriter]: 6.17001e-06 [opt_b]: 0.00024002, [1] [Cycle 1]: 0.00023454, [7] [b_1]: 0.00015862 [b_2]: 9.75002e-06 [updatestate_depend_eliminate]: 5.19003e-06 [updatestate_assign_eliminate]: 4.61005e-06 [updatestate_loads_eliminate]: 5.09003e-06 [renormalize]: 3.00002e-07 [cse]: 1.98799e-05 [optimize_parallel_all_gather_comm]: 8.40996e-06 [overlap_param_gather]: 1.45996e-06 [cconv]: 2.345e-05 [loop_unroll]: 0.00049961 [opt_after_cconv]: 0.00013557, [1] [Cycle 1]: 0.00012927, [7] [c_1]: 5.22201e-05 [parameter_eliminate]: 2.56998e-06 [updatestate_depend_eliminate]: 7.90996e-06 [updatestate_assign_eliminate]: 4.55999e-06 [updatestate_loads_eliminate]: 5.59003e-06 [cse]: 2.323e-05 [renormalize]: 4.89992e-07 [remove_dup_value]: 1.42599e-05 [tuple_transform]: 6.92901e-05, [1] [Cycle 1]: 6.477e-05, [2] [d_1]: 5.572e-05 [renormalize]: 1.60071e-07 [partial_unused_args_eliminate]: 2.05997e-06 [add_cache_embedding]: 1.34001e-05 [add_recomputation]: 6.61401e-05 [cse_after_recomputation]: 2.782e-05, [1] [Cycle 1]: 2.275e-05, [1] [cse]: 1.758e-05 [environ_conv]: 7.57002e-06 [swap_dp_allreduce_reducescatter]: 7.50995e-06 [bias_add_comm_swap]: 2.23005e-06 [label_micro_interleaved_index]: 1.93005e-06 [label_fine_grained_interleaved_index]: 1.94996e-06 [merge_cast_opt]: 1.00001e-06 [slice_recompute_activation]: 2.02004e-06 [micro_interleaved_order_control]: 1.73994e-06 [assign_add_opt]: 2.869e-05 [ForceFp32Comm]: 8.69972e-07 [remove_cast_before_assign_add]: 7.18993e-06 [full_micro_interleaved_order_control]: 2.45008e-06 [reorder_send_recv_between_fp_bp]: 2.15997e-06 [comm_op_add_attrs]: 3.001e-05 [add_comm_op_reuse_tag]: 1.6999e-06 [interleave_split_concat_branches]: 1.19e-06 [interleave_parallel_branches]: 8.79983e-07 [overlap_opt_shard_in_pipeline]: 1.30001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.27999e-06 [control_data_broadcast_order]: 1.29e-06 [grouped_pairwise_exchange_alltoall]: 1.12701e-05 [offloading_packed_experts]: 1.94996e-06 [overlap_recompute_and_grad_model_parallel]: 1.96998e-06 [overlap_grad_matmul_and_grad_allreduce]: 8.39937e-07 [overlap_recompute_allgather_and_fa_grad]: 8.705e-05 [overlap_grad_ring_attention]: 2.50002e-06 [overlap_grad_flash_sp]: 1.691e-05 [begin_end_overlap_inline]: 7.49948e-07 [split_matmul_comm_elemetwise]: 2.11992e-06 [split_layernorm_comm]: 1.74996e-06 [handle_group_info]: 4.82006e-06 [symbol_engine_optimizer]: 9.216e-05, [1] [Cycle 1]: 8.667e-05, [6] [build]: 5.02996e-06 [elim_shapecalc]: 1.41599e-05 [elim_not_effective]: 1.76399e-05 [opt_reshape]: 8.46991e-06 [fold_const_symbol]: 1.412e-05 [renormalize]: 2.5006e-07 [pipeline_parallel_scheduler]: 1.60001e-06 [auto_monad_reorder]: 3.209e-05 [get_jit_bprop_graph]: 4.70085e-07 [rewriter_after_jit_bprop_graph]: 4.4005e-07 [eliminate_special_op_node]: 0.00052387 [distribtued_split]: 4.362e-05 [validate]: 3.46099e-05 [task_emit]: 0.0705716 [execute]: 1.341e-05 Sums bootstrap : 0.000390s : 0.50% type_inference : 0.002602s : 3.34% auto_monad : 0.000133s : 0.17% graph_reusing : 0.000002s : 0.00% inline : 0.000001s : 0.00% parallel-infer-symbol : 0.000003s : 0.00% pre_auto_parallel : 0.000026s : 0.03% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000000s : 0.00% dataset_repeat_opt : 0.000001s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000016s : 0.02% optimize.rewriter_before_opt_a : 0.000037s : 0.05% optimize.opt_a.expand_dump_flag : 0.000005s : 0.01% optimize.opt_a.switch_simplify : 0.000040s : 0.05% optimize.opt_a.loop_unroll : 0.000021s : 0.03% optimize.opt_a.a_1 : 0.000550s : 0.71% optimize.opt_a.recompute_prepare : 0.000017s : 0.02% optimize.opt_a.updatestate_depend_eliminate : 0.000014s : 0.02% optimize.opt_a.updatestate_assign_eliminate : 0.000010s : 0.01% optimize.opt_a.updatestate_loads_eliminate : 0.000013s : 0.02% optimize.opt_a.parameter_eliminate : 0.000005s : 0.01% optimize.opt_a.a_2 : 0.000220s : 0.28% optimize.opt_a.accelerated_algorithm : 0.000017s : 0.02% optimize.opt_a.shard : 0.000003s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000006s : 0.01% optimize.opt_a.shard_inline : 0.000016s : 0.02% optimize.opt_a.auto_parallel : 0.000022s : 0.03% optimize.opt_a.parallel : 0.000012s : 0.02% optimize.opt_a.flash_sp : 0.000015s : 0.02% optimize.opt_a.merge_comm : 0.000015s : 0.02% optimize.opt_a.allreduce_fusion : 0.000010s : 0.01% optimize.opt_a.matmul_add_comm_reduction : 0.000019s : 0.02% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000001s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000018s : 0.02% optimize.opt_a.virtual_dataset : 0.000015s : 0.02% optimize.opt_a.get_grad_eliminate_ : 0.000015s : 0.02% optimize.opt_a.virtual_output : 0.000015s : 0.02% optimize.opt_a.merge_forward : 0.000011s : 0.01% optimize.opt_a.cell_reuse_recompute_pass : 0.000004s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000032s : 0.04% optimize.opt_a.before_grad : 0.000027s : 0.03% optimize.opt_a.inplace_validation : 0.000009s : 0.01% optimize.opt_a.meta_fg_expand : 0.000010s : 0.01% optimize.opt_a.inplace_validation_after_expand : 0.000012s : 0.02% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.01% optimize.opt_a.receive_attached : 0.000003s : 0.00% optimize.opt_a.after_resolve : 0.000021s : 0.03% optimize.opt_a.a_after_grad : 0.000024s : 0.03% optimize.opt_a.special_op_eliminate : 0.000015s : 0.02% optimize.opt_a.renormalize : 0.000425s : 0.55% optimize.opt_a.add_forward_monad_depend : 0.000005s : 0.01% optimize.opt_a.auto_monad_grad : 0.000003s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000050s : 0.06% optimize.opt_a.cse : 0.000056s : 0.07% optimize.opt_a.a_3 : 0.000106s : 0.14% optimize.py_interpret_to_execute_after_opt_a : 0.000009s : 0.01% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000158s : 0.20% optimize.convert_after_rewriter : 0.000011s : 0.01% optimize.order_py_execute_after_rewriter : 0.000006s : 0.01% optimize.opt_b.b_1 : 0.000159s : 0.20% optimize.opt_b.b_2 : 0.000010s : 0.01% optimize.opt_b.updatestate_depend_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.01% optimize.opt_b.renormalize : 0.000000s : 0.00% optimize.opt_b.cse : 0.000020s : 0.03% optimize.optimize_parallel_all_gather_comm : 0.000008s : 0.01% optimize.overlap_param_gather : 0.000001s : 0.00% optimize.cconv : 0.000023s : 0.03% optimize.loop_unroll : 0.000500s : 0.64% optimize.opt_after_cconv.c_1 : 0.000052s : 0.07% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000008s : 0.01% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.01% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000006s : 0.01% optimize.opt_after_cconv.cse : 0.000023s : 0.03% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000014s : 0.02% optimize.tuple_transform.d_1 : 0.000056s : 0.07% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_cache_embedding : 0.000013s : 0.02% optimize.add_recomputation : 0.000066s : 0.08% optimize.cse_after_recomputation.cse : 0.000018s : 0.02% optimize.environ_conv : 0.000008s : 0.01% optimize.swap_dp_allreduce_reducescatter : 0.000008s : 0.01% optimize.bias_add_comm_swap : 0.000002s : 0.00% optimize.label_micro_interleaved_index : 0.000002s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000002s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000029s : 0.04% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000007s : 0.01% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000002s : 0.00% optimize.comm_op_add_attrs : 0.000030s : 0.04% optimize.add_comm_op_reuse_tag : 0.000002s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000001s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000011s : 0.01% optimize.offloading_packed_experts : 0.000002s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000002s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000087s : 0.11% optimize.overlap_grad_ring_attention : 0.000003s : 0.00% optimize.overlap_grad_flash_sp : 0.000017s : 0.02% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000005s : 0.01% optimize.symbol_engine_optimizer.build : 0.000005s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000014s : 0.02% optimize.symbol_engine_optimizer.elim_not_effective : 0.000018s : 0.02% optimize.symbol_engine_optimizer.opt_reshape : 0.000008s : 0.01% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000014s : 0.02% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000032s : 0.04% get_jit_bprop_graph : 0.000000s : 0.00% rewriter_after_jit_bprop_graph : 0.000000s : 0.00% eliminate_special_op_node : 0.000524s : 0.67% distribtued_split : 0.000044s : 0.06% validate : 0.000035s : 0.04% task_emit : 0.070572s : 90.66% execute : 0.000013s : 0.02% Time group info: ------[substitution.] 0.000135 63 4.89% : 0.000007s : 2: substitution.depend_value_elim 1.91% : 0.000003s : 5: substitution.elim_not_effective 1.90% : 0.000003s : 5: substitution.fold_const_symbol 5.59% : 0.000008s : 6: substitution.graph_param_transform 50.86% : 0.000069s : 1: substitution.inline 4.35% : 0.000006s : 10: substitution.j_node_and_user_rematch 3.00% : 0.000004s : 6: substitution.load_eliminater 2.83% : 0.000004s : 2: substitution.reduce_all_const_elim 5.59% : 0.000008s : 10: substitution.remove_not_recompute_node 2.67% : 0.000004s : 2: substitution.replace_old_param 8.85% : 0.000012s : 6: substitution.updatestate_pure_node_eliminater 7.54% : 0.000010s : 8: substitution.updatestate_useless_node_eliminater ------[type_inference.] 0.002573 2 89.30% : 0.002298s : 1: type_inference.infer 10.70% : 0.000275s : 1: type_inference.specialize ------[replace.] 0.000011 1 100.00% : 0.000011s : 1: replace.inline ------[match.] 0.000067 1 100.00% : 0.000067s : 1: match.inline ------[predicate.] 0.000229 1420 0.73% : 0.000002s : 13: predicate.accumulaten_eliminater 1.04% : 0.000002s : 6: predicate.ad_related_special_op_eliminate 0.71% : 0.000002s : 12: predicate.addn_check_dump 0.86% : 0.000002s : 13: predicate.addn_zero_filter 0.74% : 0.000002s : 13: predicate.adjust_all_reduce_mul_add 2.15% : 0.000005s : 25: predicate.arithmetic_simplify 0.87% : 0.000002s : 13: predicate.cast_eliminate 0.74% : 0.000002s : 12: predicate.check_bprop_eliminate 0.75% : 0.000002s : 12: predicate.compare_switch_simplify 0.22% : 0.000000s : 6: predicate.const_output_eliminate 0.44% : 0.000001s : 6: predicate.convert_tensor_all_eliminate 1.54% : 0.000004s : 13: predicate.convert_tensor_eliminate 0.82% : 0.000002s : 12: predicate.depend_value_elim 0.88% : 0.000002s : 13: predicate.dict_get_item_const_eliminator 0.90% : 0.000002s : 13: predicate.dict_get_item_eliminator 0.83% : 0.000002s : 13: predicate.dict_set_item_eliminator 0.34% : 0.000001s : 6: predicate.elim_not_effective 0.62% : 0.000001s : 6: predicate.elim_shapecalc_of_broadcastargs 1.34% : 0.000003s : 19: predicate.environ_add_const_eliminate 1.06% : 0.000002s : 19: predicate.environ_get_add_eliminate 1.10% : 0.000003s : 19: predicate.environ_get_depend_swap 1.96% : 0.000004s : 31: predicate.environ_get_eliminate 1.31% : 0.000003s : 19: predicate.environ_get_set_eliminate 0.79% : 0.000002s : 14: predicate.exchange_switch_depend_value 1.33% : 0.000003s : 14: predicate.float_depend_g_call 0.76% : 0.000002s : 12: predicate.float_environ_get_switch 1.07% : 0.000002s : 18: predicate.float_tuple_getitem_switch 0.21% : 0.000000s : 6: predicate.fold_const_symbol 0.87% : 0.000002s : 12: predicate.get_grad_eliminate 0.46% : 0.000001s : 6: predicate.graph_param_transform 0.77% : 0.000002s : 12: predicate.incorporate_call 0.68% : 0.000002s : 12: predicate.incorporate_call_switch 5.62% : 0.000013s : 63: predicate.inline 1.06% : 0.000002s : 12: predicate.inline_without_move 0.37% : 0.000001s : 12: predicate.j_node_and_user_rematch 1.06% : 0.000002s : 12: predicate.less_batch_normalization 1.75% : 0.000004s : 25: predicate.list_to_tuple_eliminator_ 2.36% : 0.000005s : 38: predicate.load_eliminater 1.16% : 0.000003s : 6: predicate.loop_unroll_after_grad 1.18% : 0.000003s : 17: predicate.loop_unroll_before_grad 1.85% : 0.000004s : 25: predicate.make_slice_get_slice_eliminator 0.73% : 0.000002s : 12: predicate.merge_addn 0.84% : 0.000002s : 12: predicate.micro_step_allgather_replace 0.82% : 0.000002s : 12: predicate.mini_step_allgather_replace 0.80% : 0.000002s : 13: predicate.minmaximum_grad 0.73% : 0.000002s : 6: predicate.mutable_eliminate 0.46% : 0.000001s : 6: predicate.opt_reshape 0.53% : 0.000001s : 6: predicate.parallel_virtual_node 1.16% : 0.000003s : 14: predicate.partial_defer_inline 1.25% : 0.000003s : 19: predicate.partial_eliminate 0.77% : 0.000002s : 13: predicate.print_const_string_wrapper 0.85% : 0.000002s : 12: predicate.reduce_all_const_elim 1.08% : 0.000002s : 13: predicate.reduce_eliminate 0.60% : 0.000001s : 12: predicate.remove_not_recompute_node 1.07% : 0.000002s : 25: predicate.replace_applicator 0.47% : 0.000001s : 12: predicate.replace_old_param 0.24% : 0.000001s : 6: predicate.reset_defer_inline 0.86% : 0.000002s : 13: predicate.reshape_eliminate 0.81% : 0.000002s : 12: predicate.row_tensor_add_zeros_like 0.45% : 0.000001s : 6: predicate.row_tensor_eliminate 1.03% : 0.000002s : 12: predicate.same_eliminate 0.47% : 0.000001s : 12: predicate.set_cell_output_no_recompute 0.89% : 0.000002s : 12: predicate.shard_identity_eliminate 1.34% : 0.000003s : 18: predicate.special_op_eliminate 0.95% : 0.000002s : 12: predicate.specialize_transform 1.04% : 0.000002s : 12: predicate.split_environ_get_set_with_tuple_value 0.92% : 0.000002s : 12: predicate.stack_unstack_eliminate 2.30% : 0.000005s : 38: predicate.stopgrad_eliminater 0.42% : 0.000001s : 6: predicate.switch_call_monad_eliminater 0.92% : 0.000002s : 14: predicate.switch_defer_inline 1.64% : 0.000004s : 26: predicate.switch_layer_defer_inline 4.42% : 0.000010s : 43: predicate.switch_simplify 0.77% : 0.000002s : 13: predicate.tile_eliminate 0.73% : 0.000002s : 13: predicate.transpose_eliminate 1.68% : 0.000004s : 25: predicate.tuple_list_convert_item_index_to_positive 1.70% : 0.000004s : 25: predicate.tuple_list_get_item_const_eliminator 1.54% : 0.000004s : 25: predicate.tuple_list_get_item_depend_reorder 3.14% : 0.000007s : 37: predicate.tuple_list_get_item_eliminator 1.60% : 0.000004s : 25: predicate.tuple_list_get_set_item_eliminator 2.56% : 0.000006s : 37: predicate.tuple_list_set_item_eliminator 1.66% : 0.000004s : 25: predicate.tuple_to_list_eliminator_ 2.36% : 0.000005s : 38: predicate.updatestate_pure_node_eliminater 3.46% : 0.000008s : 50: predicate.updatestate_useless_node_eliminater 0.57% : 0.000001s : 6: predicate.value_based_eliminate 0.78% : 0.000002s : 12: predicate.virtual_dataset_eliminate 0.78% : 0.000002s : 12: predicate.virtual_output_eliminate 0.48% : 0.000001s : 6: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.000150 4 10.97% : 0.000016s : 1: func_graph_cloner_run.FuncGraphClonerGraph 89.03% : 0.000134s : 3: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.091469 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.02% : 0.000017s : 1: add_cache_embedding 0.01% : 0.000005s : 1: add_comm_op_reuse_tag 0.08% : 0.000071s : 1: add_recomputation 0.04% : 0.000033s : 1: assign_add_opt 0.16% : 0.000147s : 1: auto_monad 0.04% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.01% : 0.000005s : 1: bias_add_comm_swap 0.45% : 0.000414s : 1: bootstrap 0.03% : 0.000028s : 1: cconv 0.04% : 0.000034s : 1: comm_op_add_attrs 0.00% : 0.000004s : 1: control_data_broadcast_order 0.02% : 0.000015s : 1: convert_after_rewriter 0.03% : 0.000031s : 1: cse_after_recomputation 0.01% : 0.000006s : 1: dataset_repeat_opt 0.06% : 0.000052s : 1: distribtued_split 0.59% : 0.000538s : 1: eliminate_special_op_node 0.01% : 0.000011s : 1: environ_conv 0.02% : 0.000022s : 1: execute 0.01% : 0.000005s : 1: full_micro_interleaved_order_control 0.01% : 0.000005s : 1: get_jit_bprop_graph 0.01% : 0.000009s : 1: graph_reusing 0.02% : 0.000015s : 1: grouped_pairwise_exchange_alltoall 0.01% : 0.000008s : 1: handle_group_info 0.01% : 0.000006s : 1: inline 0.01% : 0.000008s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.01% : 0.000005s : 1: label_fine_grained_interleaved_index 0.01% : 0.000005s : 1: label_micro_interleaved_index 0.56% : 0.000509s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.01% : 0.000005s : 1: micro_interleaved_order_control 0.01% : 0.000005s : 1: offloading_packed_experts 0.02% : 0.000015s : 1: opt.transform.loop_unroll_optimizer 1.21% : 0.001104s : 80: opt.transform.opt_a 0.06% : 0.000051s : 1: opt.transform.opt_after_cconv 0.16% : 0.000150s : 27: opt.transform.opt_b 0.06% : 0.000054s : 1: opt.transform.opt_trans_graph 0.03% : 0.000032s : 3: opt.transform.special_op_eliminate 0.05% : 0.000050s : 4: opt.transform.symbol_engine_opt 6.14% : 0.005614s : 1: opt_a 0.15% : 0.000139s : 1: opt_after_cconv 0.27% : 0.000243s : 1: opt_b 8.22% : 0.007522s : 1: optimize 0.01% : 0.000012s : 1: optimize_parallel_all_gather_comm 0.01% : 0.000010s : 1: order_py_execute_after_rewriter 0.02% : 0.000021s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.01% : 0.000006s : 1: overlap_grad_ring_attention 0.01% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.01% : 0.000005s : 1: overlap_param_gather 0.10% : 0.000093s : 1: overlap_recompute_allgather_and_fa_grad 0.01% : 0.000005s : 1: overlap_recompute_and_grad_model_parallel 0.01% : 0.000008s : 1: parallel-infer-symbol 0.01% : 0.000005s : 1: parallel-infer-symbol-second 0.01% : 0.000005s : 1: partial_unused_args_eliminate 0.01% : 0.000007s : 1: pipeline_parallel_scheduler 0.01% : 0.000006s : 1: pipeline_split 0.04% : 0.000032s : 1: pre_auto_parallel 0.02% : 0.000020s : 1: py_interpret_to_execute 0.01% : 0.000013s : 1: py_interpret_to_execute_after_opt_a 0.01% : 0.000010s : 1: remove_cast_before_assign_add 0.02% : 0.000019s : 1: remove_dup_value 0.26% : 0.000233s : 1: renormalize.infer 0.20% : 0.000186s : 1: renormalize.specialize 0.01% : 0.000005s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000004s : 1: rewriter_after_jit_bprop_graph 0.18% : 0.000164s : 1: rewriter_after_opt_a 0.05% : 0.000042s : 1: rewriter_before_opt_a 0.01% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.01% : 0.000005s : 1: slice_recompute_activation 0.01% : 0.000005s : 1: split_layernorm_comm 0.01% : 0.000005s : 1: split_matmul_comm_elemetwise 0.01% : 0.000011s : 1: swap_dp_allreduce_reducescatter 0.10% : 0.000096s : 1: symbol_engine_optimizer 77.19% : 0.070601s : 1: task_emit 0.08% : 0.000073s : 1: tuple_transform 2.87% : 0.002621s : 1: type_inference 0.08% : 0.000070s : 1: validate distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. distribute network parameter broadcast. ..... =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 . /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 . /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56=============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad")=============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54-- Docs: https://docs.pytest.org/en/latest/warnings.html/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d")=============================== warnings summary =============================== . /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") ================== 1 passed, 18 warnings in 60.16s (0:01:00) ===================/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 ================== 1 passed, 18 warnings in 60.00s (0:01:00) =================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel")/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html======================= 1 passed, 18 warnings in 59.83s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") ======================= 1 passed, 18 warnings in 58.99s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 ======================= 1 passed, 18 warnings in 59.29s ========================/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50/home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") ======================= 1 passed, 18 warnings in 59.48s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") ======================= 1 passed, 18 warnings in 59.15s ======================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ======================= 1 passed, 18 warnings in 59.66s ======================== [WARNING] DEVICE(169323,ffff82a65c10,python3.7):2025-02-07-15:54:40.245.378 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x28c6e7b0 is not exist. [WARNING] DEVICE(169251,ffff805f5c10,python3.7):2025-02-07-15:54:40.315.018 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x2fb66130 is not exist. [WARNING] DEVICE(169260,ffff90a16c10,python3.7):2025-02-07-15:54:40.464.976 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x3b35e7b0 is not exist. [WARNING] DEVICE(169309,ffff88eb9c10,python3.7):2025-02-07-15:54:42.403.937 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x416cc7b0 is not exist. [WARNING] DEVICE(169242,ffff96d80c10,python3.7):2025-02-07-15:54:42.416.814 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x48328400 is not exist. [WARNING] DEVICE(169297,ffff9ffe2c10,python3.7):2025-02-07-15:54:42.464.967 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x278d77b0 is not exist. [WARNING] DEVICE(169285,ffff97501c10,python3.7):2025-02-07-15:54:42.487.109 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x31e00d60 is not exist. [WARNING] DEVICE(169273,ffff95a23c10,python3.7):2025-02-07-15:54:42.506.503 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc:147] UnRegCallback] Unregister callback thread failed, stream : 0x1ec131c0 is not exist. . =============================== warnings summary =============================== /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/.local/lib/python3.7/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/latest/warnings.html ================== 1 passed, 18 warnings in 86.80s (0:01:26) ===================